1use std::mem;
11
12use anyhow::{Context as _, Result};
13use base64::Engine as _;
14use mailparse::ParsedContentType;
15use mime::Mime;
16
17use crate::context::Context;
18use crate::headerdef::{HeaderDef, HeaderDefMap};
19use crate::log::warn;
20use crate::message::{self, Message, MsgId};
21use crate::mimeparser::parse_message_id;
22use crate::param::Param::SendHtml;
23use crate::plaintext::PlainText;
24
25impl Message {
26 pub fn has_html(&self) -> bool {
33 self.mime_modified
34 }
35
36 pub fn set_html(&mut self, html: Option<String>) {
45 if let Some(html) = html {
46 self.param.set(SendHtml, html);
47 self.mime_modified = true;
48 } else {
49 self.param.remove(SendHtml);
50 self.mime_modified = false;
51 }
52 }
53}
54
55enum MimeMultipartType {
59 Multiple,
60 Single,
61 Message,
62}
63
64fn get_mime_multipart_type(ctype: &ParsedContentType) -> MimeMultipartType {
67 let mimetype = ctype.mimetype.to_lowercase();
68 if mimetype.starts_with("multipart") && ctype.params.contains_key("boundary") {
69 MimeMultipartType::Multiple
70 } else if mimetype == "message/rfc822" {
71 MimeMultipartType::Message
72 } else {
73 MimeMultipartType::Single
74 }
75}
76
77#[derive(Debug)]
79struct HtmlMsgParser {
80 pub html: String,
81 pub plain: Option<PlainText>,
82 pub(crate) msg_html: String,
83}
84
85impl HtmlMsgParser {
86 pub async fn from_bytes<'a>(
90 context: &Context,
91 rawmime: &'a [u8],
92 ) -> Result<(Self, mailparse::ParsedMail<'a>)> {
93 let mut parser = HtmlMsgParser {
94 html: "".to_string(),
95 plain: None,
96 msg_html: "".to_string(),
97 };
98
99 let parsedmail = mailparse::parse_mail(rawmime).context("Failed to parse mail")?;
100
101 parser.collect_texts_recursive(context, &parsedmail).await?;
102
103 if parser.html.is_empty() {
104 if let Some(plain) = &parser.plain {
105 parser.html = plain.to_html();
106 }
107 } else {
108 parser.cid_to_data_recursive(context, &parsedmail).await?;
109 }
110 parser.html += &mem::take(&mut parser.msg_html);
111 Ok((parser, parsedmail))
112 }
113
114 async fn collect_texts_recursive<'a>(
123 &'a mut self,
124 context: &'a Context,
125 mail: &'a mailparse::ParsedMail<'a>,
126 ) -> Result<()> {
127 match get_mime_multipart_type(&mail.ctype) {
128 MimeMultipartType::Multiple => {
129 for cur_data in &mail.subparts {
130 Box::pin(self.collect_texts_recursive(context, cur_data)).await?
131 }
132 Ok(())
133 }
134 MimeMultipartType::Message => {
135 let raw = mail.get_body_raw()?;
136 if raw.is_empty() {
137 return Ok(());
138 }
139 let (parser, mail) = Box::pin(HtmlMsgParser::from_bytes(context, &raw)).await?;
140 if !parser.html.is_empty() {
141 let mut text = "\r\n\r\n".to_string();
142 for h in mail.headers {
143 let key = h.get_key();
144 if matches!(
145 key.to_lowercase().as_str(),
146 "date"
147 | "from"
148 | "sender"
149 | "reply-to"
150 | "to"
151 | "cc"
152 | "bcc"
153 | "subject"
154 ) {
155 text += &format!("{key}: {}\r\n", h.get_value());
156 }
157 }
158 text += "\r\n";
159 self.msg_html += &PlainText {
160 text,
161 flowed: false,
162 delsp: false,
163 }
164 .to_html();
165 self.msg_html += &parser.html;
166 }
167 Ok(())
168 }
169 MimeMultipartType::Single => {
170 let mimetype = mail.ctype.mimetype.parse::<Mime>()?;
171 if mimetype == mime::TEXT_HTML {
172 if self.html.is_empty() {
173 if let Ok(decoded_data) = mail.get_body() {
174 self.html = decoded_data;
175 }
176 }
177 } else if mimetype == mime::TEXT_PLAIN && self.plain.is_none() {
178 if let Ok(decoded_data) = mail.get_body() {
179 self.plain = Some(PlainText {
180 text: decoded_data,
181 flowed: if let Some(format) = mail.ctype.params.get("format") {
182 format.as_str().eq_ignore_ascii_case("flowed")
183 } else {
184 false
185 },
186 delsp: if let Some(delsp) = mail.ctype.params.get("delsp") {
187 delsp.as_str().eq_ignore_ascii_case("yes")
188 } else {
189 false
190 },
191 });
192 }
193 }
194 Ok(())
195 }
196 }
197 }
198
199 async fn cid_to_data_recursive<'a>(
202 &'a mut self,
203 context: &'a Context,
204 mail: &'a mailparse::ParsedMail<'a>,
205 ) -> Result<()> {
206 match get_mime_multipart_type(&mail.ctype) {
207 MimeMultipartType::Multiple => {
208 for cur_data in &mail.subparts {
209 Box::pin(self.cid_to_data_recursive(context, cur_data)).await?;
210 }
211 Ok(())
212 }
213 MimeMultipartType::Message => Ok(()),
214 MimeMultipartType::Single => {
215 let mimetype = mail.ctype.mimetype.parse::<Mime>()?;
216 if mimetype.type_() == mime::IMAGE {
217 if let Some(cid) = mail.headers.get_header_value(HeaderDef::ContentId) {
218 if let Ok(cid) = parse_message_id(&cid) {
219 if let Ok(replacement) = mimepart_to_data_url(mail) {
220 let re_string = format!(
221 "(<img[^>]*src[^>]*=[^>]*)(cid:{})([^>]*>)",
222 regex::escape(&cid)
223 );
224 match regex::Regex::new(&re_string) {
225 Ok(re) => {
226 self.html = re
227 .replace_all(
228 &self.html,
229 format!("${{1}}{replacement}${{3}}").as_str(),
230 )
231 .as_ref()
232 .to_string()
233 }
234 Err(e) => warn!(
235 context,
236 "Cannot create regex for cid: {} throws {}", re_string, e
237 ),
238 }
239 }
240 }
241 }
242 }
243 Ok(())
244 }
245 }
246 }
247}
248
249fn mimepart_to_data_url(mail: &mailparse::ParsedMail<'_>) -> Result<String> {
251 let data = mail.get_body_raw()?;
252 let data = base64::engine::general_purpose::STANDARD.encode(data);
253 Ok(format!("data:{};base64,{}", mail.ctype.mimetype, data))
254}
255
256impl MsgId {
257 pub async fn get_html(self, context: &Context) -> Result<Option<String>> {
263 let rawmime = message::get_mime_headers(context, self).await?;
264
265 if !rawmime.is_empty() {
266 match HtmlMsgParser::from_bytes(context, &rawmime).await {
267 Err(err) => {
268 warn!(context, "get_html: parser error: {:#}", err);
269 Ok(None)
270 }
271 Ok((parser, _)) => Ok(Some(parser.html)),
272 }
273 } else {
274 warn!(context, "get_html: no mime for {}", self);
275 Ok(None)
276 }
277 }
278}
279
280#[cfg(test)]
281mod tests {
282 use super::*;
283 use crate::chat;
284 use crate::chat::{forward_msgs, save_msgs};
285 use crate::config::Config;
286 use crate::contact::ContactId;
287 use crate::message::{MessengerMessage, Viewtype};
288 use crate::receive_imf::receive_imf;
289 use crate::test_utils::{TestContext, TestContextManager};
290
291 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
292 async fn test_htmlparse_plain_unspecified() {
293 let t = TestContext::new().await;
294 let raw = include_bytes!("../test-data/message/text_plain_unspecified.eml");
295 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
296 assert_eq!(
297 parser.html,
298 r#"<!DOCTYPE html>
299<html><head>
300<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
301<meta name="color-scheme" content="light dark" />
302</head><body>
303This message does not have Content-Type nor Subject.<br/>
304</body></html>
305"#
306 );
307 }
308
309 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
310 async fn test_htmlparse_plain_iso88591() {
311 let t = TestContext::new().await;
312 let raw = include_bytes!("../test-data/message/text_plain_iso88591.eml");
313 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
314 assert_eq!(
315 parser.html,
316 r#"<!DOCTYPE html>
317<html><head>
318<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
319<meta name="color-scheme" content="light dark" />
320</head><body>
321message with a non-UTF-8 encoding: äöüßÄÖÜ<br/>
322</body></html>
323"#
324 );
325 }
326
327 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
328 async fn test_htmlparse_plain_flowed() {
329 let t = TestContext::new().await;
330 let raw = include_bytes!("../test-data/message/text_plain_flowed.eml");
331 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
332 assert!(parser.plain.unwrap().flowed);
333 assert_eq!(
334 parser.html,
335 r#"<!DOCTYPE html>
336<html><head>
337<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
338<meta name="color-scheme" content="light dark" />
339</head><body>
340This line ends with a space and will be merged with the next one due to format=flowed.<br/>
341<br/>
342This line does not end with a space<br/>
343and will be wrapped as usual.<br/>
344</body></html>
345"#
346 );
347 }
348
349 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
350 async fn test_htmlparse_alt_plain() {
351 let t = TestContext::new().await;
352 let raw = include_bytes!("../test-data/message/text_alt_plain.eml");
353 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
354 assert_eq!(
355 parser.html,
356 r#"<!DOCTYPE html>
357<html><head>
358<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
359<meta name="color-scheme" content="light dark" />
360</head><body>
361mime-modified should not be set set as there is no html and no special stuff;<br/>
362although not being a delta-message.<br/>
363test some special html-characters as < > and & but also " and ' :)<br/>
364</body></html>
365"#
366 );
367 }
368
369 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
370 async fn test_htmlparse_html() {
371 let t = TestContext::new().await;
372 let raw = include_bytes!("../test-data/message/text_html.eml");
373 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
374
375 assert_eq!(
379 parser.html.replace('\r', ""),
380 r##"
381<html>
382 <p>mime-modified <b>set</b>; simplify is always regarded as lossy.</p>
383</html>"##
384 );
385 }
386
387 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
388 async fn test_htmlparse_alt_html() {
389 let t = TestContext::new().await;
390 let raw = include_bytes!("../test-data/message/text_alt_html.eml");
391 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
392 assert_eq!(
393 parser.html.replace('\r', ""), r##"<html>
395 <p>mime-modified <b>set</b>; simplify is always regarded as lossy.</p>
396</html>
397"##
398 );
399 }
400
401 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
402 async fn test_htmlparse_alt_plain_html() {
403 let t = TestContext::new().await;
404 let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
405 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
406 assert_eq!(
407 parser.html.replace('\r', ""), r##"<html>
409 <p>
410 this is <b>html</b>
411 </p>
412</html>
413"##
414 );
415 }
416
417 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
418 async fn test_htmlparse_apple_cid_jpg() {
419 let t = TestContext::new().await;
422 let raw = include_bytes!("../test-data/message/apple_cid_jpg.eml");
423 let test = String::from_utf8_lossy(raw);
424 assert!(test.contains("Content-Id: <8AE052EF-BC90-486F-BB78-58D3590308EC@fritz.box>"));
425 assert!(test.contains("cid:8AE052EF-BC90-486F-BB78-58D3590308EC@fritz.box"));
426 assert!(test.find("data:").is_none());
427
428 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
430 assert!(parser.html.contains("<html>"));
431 assert!(!parser.html.contains("Content-Id:"));
432 assert!(parser.html.contains(""));
433 assert!(!parser.html.contains("cid:"));
434 }
435
436 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
437 async fn test_get_html_invalid_msgid() {
438 let t = TestContext::new().await;
439 let msg_id = MsgId::new(100);
440 assert!(msg_id.get_html(&t).await.is_err())
441 }
442
443 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
444 async fn test_html_forwarding() {
445 let mut tcm = TestContextManager::new();
447 let alice = &tcm.alice().await;
448 let chat = alice
449 .create_chat_with_contact("", "sender@testrun.org")
450 .await;
451 let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
452 receive_imf(alice, raw, false).await.unwrap();
453 let msg = alice.get_last_msg_in(chat.get_id()).await;
454 assert_ne!(msg.get_from_id(), ContactId::SELF);
455 assert_eq!(msg.is_dc_message, MessengerMessage::No);
456 assert!(!msg.is_forwarded());
457 assert!(msg.get_text().contains("this is plain"));
458 assert!(msg.has_html());
459 let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
460 assert!(html.contains("this is <b>html</b>"));
461
462 let chat = alice.create_chat_with_contact("", "bob@example.net").await;
464 forward_msgs(alice, &[msg.get_id()], chat.get_id())
465 .await
466 .unwrap();
467 let msg = alice.get_last_msg_in(chat.get_id()).await;
468 assert_eq!(msg.get_from_id(), ContactId::SELF);
469 assert_eq!(msg.is_dc_message, MessengerMessage::Yes);
470 assert!(msg.is_forwarded());
471 assert!(msg.get_text().contains("this is plain"));
472 assert!(msg.has_html());
473 let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
474 assert!(html.contains("this is <b>html</b>"));
475
476 let bob = &tcm.bob().await;
478 let chat = bob.create_chat_with_contact("", "alice@example.org").await;
479 let msg = bob.recv_msg(&alice.pop_sent_msg().await).await;
480 assert_eq!(chat.id, msg.chat_id);
481 assert_ne!(msg.get_from_id(), ContactId::SELF);
482 assert_eq!(msg.is_dc_message, MessengerMessage::Yes);
483 assert!(msg.is_forwarded());
484 assert!(msg.get_text().contains("this is plain"));
485 assert!(msg.has_html());
486 let html = msg.get_id().get_html(bob).await.unwrap().unwrap();
487 assert!(html.contains("this is <b>html</b>"));
488 }
489
490 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
491 async fn test_html_save_msg() -> Result<()> {
492 let alice = TestContext::new_alice().await;
494 let chat = alice
495 .create_chat_with_contact("", "sender@testrun.org")
496 .await;
497 let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
498 receive_imf(&alice, raw, false).await?;
499 let msg = alice.get_last_msg_in(chat.get_id()).await;
500
501 let self_chat = alice.get_self_chat().await;
503 save_msgs(&alice, &[msg.id]).await?;
504 let saved_msg = alice.get_last_msg_in(self_chat.get_id()).await;
505 assert_ne!(saved_msg.id, msg.id);
506 assert_eq!(
507 saved_msg.get_original_msg_id(&alice).await?.unwrap(),
508 msg.id
509 );
510 assert!(!saved_msg.is_forwarded()); assert_ne!(saved_msg.get_from_id(), ContactId::SELF);
512 assert_eq!(saved_msg.get_from_id(), msg.get_from_id());
513 assert_eq!(saved_msg.is_dc_message, MessengerMessage::No);
514 assert!(saved_msg.get_text().contains("this is plain"));
515 assert!(saved_msg.has_html());
516 let html = saved_msg.get_id().get_html(&alice).await?.unwrap();
517 assert!(html.contains("this is <b>html</b>"));
518
519 Ok(())
520 }
521
522 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
523 async fn test_html_forwarding_encrypted() {
524 let mut tcm = TestContextManager::new();
525 let alice = &tcm.alice().await;
529 alice
530 .set_config(Config::ShowEmails, Some("1"))
531 .await
532 .unwrap();
533 let chat = alice
534 .create_chat_with_contact("", "sender@testrun.org")
535 .await;
536 let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
537 receive_imf(alice, raw, false).await.unwrap();
538 let msg = alice.get_last_msg_in(chat.get_id()).await;
539
540 let chat = alice.get_self_chat().await;
543 forward_msgs(alice, &[msg.get_id()], chat.get_id())
544 .await
545 .unwrap();
546 let msg = alice.pop_sent_msg().await;
547
548 let alice = &tcm.alice().await;
550 alice
551 .set_config(Config::ShowEmails, Some("0"))
552 .await
553 .unwrap();
554 let msg = alice.recv_msg(&msg).await;
555 assert_eq!(msg.chat_id, alice.get_self_chat().await.id);
556 assert_eq!(msg.get_from_id(), ContactId::SELF);
557 assert_eq!(msg.is_dc_message, MessengerMessage::Yes);
558 assert!(msg.get_showpadlock());
559 assert!(msg.is_forwarded());
560 assert!(msg.get_text().contains("this is plain"));
561 assert!(msg.has_html());
562 let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
563 assert!(html.contains("this is <b>html</b>"));
564 }
565
566 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
567 async fn test_set_html() {
568 let mut tcm = TestContextManager::new();
569 let alice = &tcm.alice().await;
570 let bob = &tcm.bob().await;
571
572 let chat_id = alice.create_chat(bob).await.id;
574 let mut msg = Message::new_text("plain text".to_string());
575 msg.set_html(Some("<b>html</b> text".to_string()));
576 assert!(msg.mime_modified);
577 chat::send_msg(alice, chat_id, &mut msg).await.unwrap();
578
579 let msg = alice.get_last_msg_in(chat_id).await;
581 assert_eq!(msg.get_text(), "plain text");
582 assert!(!msg.is_forwarded());
583 assert!(msg.mime_modified);
584 let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
585 assert!(html.contains("<b>html</b> text"));
586
587 let chat_id = bob.create_chat(alice).await.id;
589 let msg = bob.recv_msg(&alice.pop_sent_msg().await).await;
590 assert_eq!(msg.chat_id, chat_id);
591 assert_eq!(msg.get_text(), "plain text");
592 assert!(!msg.is_forwarded());
593 assert!(msg.mime_modified);
594 let html = msg.get_id().get_html(bob).await.unwrap().unwrap();
595 assert!(html.contains("<b>html</b> text"));
596 }
597
598 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
599 async fn test_cp1252_html() -> Result<()> {
600 let t = TestContext::new_alice().await;
601 receive_imf(
602 &t,
603 include_bytes!("../test-data/message/cp1252-html.eml"),
604 false,
605 )
606 .await?;
607 let msg = t.get_last_msg().await;
608 assert_eq!(msg.viewtype, Viewtype::Text);
609 assert!(msg.text.contains("foo bar ä ö ü ß"));
610 assert!(msg.has_html());
611 let html = msg.get_id().get_html(&t).await?.unwrap();
612 println!("{html}");
613 assert!(html.contains("foo bar ä ö ü ß"));
614 Ok(())
615 }
616}