1use std::mem;
11
12use anyhow::{Context as _, Result};
13use base64::Engine as _;
14use mailparse::ParsedContentType;
15use mime::Mime;
16
17use crate::context::Context;
18use crate::headerdef::{HeaderDef, HeaderDefMap};
19use crate::log::warn;
20use crate::message::{self, Message, MsgId};
21use crate::mimeparser::parse_message_id;
22use crate::param::Param::SendHtml;
23use crate::plaintext::PlainText;
24
25impl Message {
26 pub fn has_html(&self) -> bool {
33 self.mime_modified
34 }
35
36 pub fn set_html(&mut self, html: Option<String>) {
45 if let Some(html) = html {
46 self.param.set(SendHtml, html);
47 self.mime_modified = true;
48 } else {
49 self.param.remove(SendHtml);
50 self.mime_modified = false;
51 }
52 }
53}
54
55enum MimeMultipartType {
59 Multiple,
60 Single,
61 Message,
62}
63
64fn get_mime_multipart_type(ctype: &ParsedContentType) -> MimeMultipartType {
67 let mimetype = ctype.mimetype.to_lowercase();
68 if mimetype.starts_with("multipart") && ctype.params.contains_key("boundary") {
69 MimeMultipartType::Multiple
70 } else if mimetype == "message/rfc822" {
71 MimeMultipartType::Message
72 } else {
73 MimeMultipartType::Single
74 }
75}
76
77#[derive(Debug)]
79struct HtmlMsgParser {
80 pub html: String,
81 pub plain: Option<PlainText>,
82 pub(crate) msg_html: String,
83}
84
85impl HtmlMsgParser {
86 #[expect(clippy::arithmetic_side_effects)]
90 pub async fn from_bytes<'a>(
91 context: &Context,
92 rawmime: &'a [u8],
93 ) -> Result<(Self, mailparse::ParsedMail<'a>)> {
94 let mut parser = HtmlMsgParser {
95 html: "".to_string(),
96 plain: None,
97 msg_html: "".to_string(),
98 };
99
100 let parsedmail = mailparse::parse_mail(rawmime).context("Failed to parse mail")?;
101
102 parser.collect_texts_recursive(context, &parsedmail).await?;
103
104 if parser.html.is_empty() {
105 if let Some(plain) = &parser.plain {
106 parser.html = plain.to_html();
107 }
108 } else {
109 parser.cid_to_data_recursive(context, &parsedmail).await?;
110 }
111 parser.html += &mem::take(&mut parser.msg_html);
112 Ok((parser, parsedmail))
113 }
114
115 #[expect(clippy::arithmetic_side_effects)]
124 async fn collect_texts_recursive<'a>(
125 &'a mut self,
126 context: &'a Context,
127 mail: &'a mailparse::ParsedMail<'a>,
128 ) -> Result<()> {
129 match get_mime_multipart_type(&mail.ctype) {
130 MimeMultipartType::Multiple => {
131 for cur_data in &mail.subparts {
132 Box::pin(self.collect_texts_recursive(context, cur_data)).await?
133 }
134 Ok(())
135 }
136 MimeMultipartType::Message => {
137 let raw = mail.get_body_raw()?;
138 if raw.is_empty() {
139 return Ok(());
140 }
141 let (parser, mail) = Box::pin(HtmlMsgParser::from_bytes(context, &raw)).await?;
142 if !parser.html.is_empty() {
143 let mut text = "\r\n\r\n".to_string();
144 for h in mail.headers {
145 let key = h.get_key();
146 if matches!(
147 key.to_lowercase().as_str(),
148 "date"
149 | "from"
150 | "sender"
151 | "reply-to"
152 | "to"
153 | "cc"
154 | "bcc"
155 | "subject"
156 ) {
157 text += &format!("{key}: {}\r\n", h.get_value());
158 }
159 }
160 text += "\r\n";
161 self.msg_html += &PlainText {
162 text,
163 flowed: false,
164 delsp: false,
165 }
166 .to_html();
167 self.msg_html += &parser.html;
168 }
169 Ok(())
170 }
171 MimeMultipartType::Single => {
172 let mimetype = mail.ctype.mimetype.parse::<Mime>()?;
173 if mimetype == mime::TEXT_HTML {
174 if self.html.is_empty()
175 && let Ok(decoded_data) = mail.get_body()
176 {
177 self.html = decoded_data;
178 }
179 } else if mimetype == mime::TEXT_PLAIN
180 && self.plain.is_none()
181 && let Ok(decoded_data) = mail.get_body()
182 {
183 self.plain = Some(PlainText {
184 text: decoded_data,
185 flowed: if let Some(format) = mail.ctype.params.get("format") {
186 format.as_str().eq_ignore_ascii_case("flowed")
187 } else {
188 false
189 },
190 delsp: if let Some(delsp) = mail.ctype.params.get("delsp") {
191 delsp.as_str().eq_ignore_ascii_case("yes")
192 } else {
193 false
194 },
195 });
196 }
197 Ok(())
198 }
199 }
200 }
201
202 async fn cid_to_data_recursive<'a>(
205 &'a mut self,
206 context: &'a Context,
207 mail: &'a mailparse::ParsedMail<'a>,
208 ) -> Result<()> {
209 match get_mime_multipart_type(&mail.ctype) {
210 MimeMultipartType::Multiple => {
211 for cur_data in &mail.subparts {
212 Box::pin(self.cid_to_data_recursive(context, cur_data)).await?;
213 }
214 Ok(())
215 }
216 MimeMultipartType::Message => Ok(()),
217 MimeMultipartType::Single => {
218 let mimetype = mail.ctype.mimetype.parse::<Mime>()?;
219 if mimetype.type_() == mime::IMAGE
220 && let Some(cid) = mail.headers.get_header_value(HeaderDef::ContentId)
221 && let Ok(cid) = parse_message_id(&cid)
222 && let Ok(replacement) = mimepart_to_data_url(mail)
223 {
224 let re_string = format!(
225 "(<img[^>]*src[^>]*=[^>]*)(cid:{})([^>]*>)",
226 regex::escape(&cid)
227 );
228 match regex::Regex::new(&re_string) {
229 Ok(re) => {
230 self.html = re
231 .replace_all(
232 &self.html,
233 format!("${{1}}{replacement}${{3}}").as_str(),
234 )
235 .as_ref()
236 .to_string()
237 }
238 Err(e) => warn!(
239 context,
240 "Cannot create regex for cid: {} throws {}", re_string, e
241 ),
242 }
243 }
244 Ok(())
245 }
246 }
247 }
248}
249
250fn mimepart_to_data_url(mail: &mailparse::ParsedMail<'_>) -> Result<String> {
252 let data = mail.get_body_raw()?;
253 let data = base64::engine::general_purpose::STANDARD.encode(data);
254 Ok(format!("data:{};base64,{}", mail.ctype.mimetype, data))
255}
256
257impl MsgId {
258 pub async fn get_html(self, context: &Context) -> Result<Option<String>> {
263 let (param, rawmime) = tokio::join!(
265 self.get_param(context),
266 message::get_mime_headers(context, self)
267 );
268 if let Some(html) = param?.get(SendHtml) {
269 return Ok(Some(html.to_string()));
270 }
271
272 let rawmime = rawmime?;
273 if !rawmime.is_empty() {
274 match HtmlMsgParser::from_bytes(context, &rawmime).await {
275 Err(err) => {
276 warn!(context, "get_html: parser error: {:#}", err);
277 Ok(None)
278 }
279 Ok((parser, _)) => Ok(Some(parser.html)),
280 }
281 } else {
282 warn!(context, "get_html: no mime for {}", self);
283 Ok(None)
284 }
285 }
286}
287
288#[cfg(test)]
289mod tests {
290 use super::*;
291 use crate::chat::{self, Chat, forward_msgs, save_msgs};
292 use crate::config::Config;
293 use crate::constants;
294 use crate::contact::ContactId;
295 use crate::message::{MessengerMessage, Viewtype};
296 use crate::receive_imf::receive_imf;
297 use crate::test_utils::{TestContext, TestContextManager};
298
299 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
300 async fn test_htmlparse_plain_unspecified() {
301 let t = TestContext::new().await;
302 let raw = include_bytes!("../test-data/message/text_plain_unspecified.eml");
303 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
304 assert_eq!(
305 parser.html,
306 r#"<!DOCTYPE html>
307<html><head>
308<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
309<meta name="color-scheme" content="light dark" />
310</head><body>
311This message does not have Content-Type nor Subject.<br/>
312</body></html>
313"#
314 );
315 }
316
317 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
318 async fn test_htmlparse_plain_iso88591() {
319 let t = TestContext::new().await;
320 let raw = include_bytes!("../test-data/message/text_plain_iso88591.eml");
321 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
322 assert_eq!(
323 parser.html,
324 r#"<!DOCTYPE html>
325<html><head>
326<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
327<meta name="color-scheme" content="light dark" />
328</head><body>
329message with a non-UTF-8 encoding: äöüßÄÖÜ<br/>
330</body></html>
331"#
332 );
333 }
334
335 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
336 async fn test_htmlparse_plain_flowed() {
337 let t = TestContext::new().await;
338 let raw = include_bytes!("../test-data/message/text_plain_flowed.eml");
339 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
340 assert!(parser.plain.unwrap().flowed);
341 assert_eq!(
342 parser.html,
343 r#"<!DOCTYPE html>
344<html><head>
345<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
346<meta name="color-scheme" content="light dark" />
347</head><body>
348This line ends with a space and will be merged with the next one due to format=flowed.<br/>
349<br/>
350This line does not end with a space<br/>
351and will be wrapped as usual.<br/>
352</body></html>
353"#
354 );
355 }
356
357 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
358 async fn test_htmlparse_alt_plain() {
359 let t = TestContext::new().await;
360 let raw = include_bytes!("../test-data/message/text_alt_plain.eml");
361 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
362 assert_eq!(
363 parser.html,
364 r#"<!DOCTYPE html>
365<html><head>
366<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
367<meta name="color-scheme" content="light dark" />
368</head><body>
369mime-modified should not be set set as there is no html and no special stuff;<br/>
370although not being a delta-message.<br/>
371test some special html-characters as < > and & but also " and ' :)<br/>
372</body></html>
373"#
374 );
375 }
376
377 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
378 async fn test_htmlparse_html() {
379 let t = TestContext::new().await;
380 let raw = include_bytes!("../test-data/message/text_html.eml");
381 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
382
383 assert_eq!(
387 parser.html.replace('\r', ""),
388 r##"
389<html>
390 <p>mime-modified <b>set</b>; simplify is always regarded as lossy.</p>
391</html>"##
392 );
393 }
394
395 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
396 async fn test_htmlparse_alt_html() {
397 let t = TestContext::new().await;
398 let raw = include_bytes!("../test-data/message/text_alt_html.eml");
399 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
400 assert_eq!(
401 parser.html.replace('\r', ""), r##"<html>
403 <p>mime-modified <b>set</b>; simplify is always regarded as lossy.</p>
404</html>
405"##
406 );
407 }
408
409 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
410 async fn test_htmlparse_alt_plain_html() {
411 let t = TestContext::new().await;
412 let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
413 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
414 assert_eq!(
415 parser.html.replace('\r', ""), r##"<html>
417 <p>
418 this is <b>html</b>
419 </p>
420</html>
421"##
422 );
423 }
424
425 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
426 async fn test_htmlparse_apple_cid_jpg() {
427 let t = TestContext::new().await;
430 let raw = include_bytes!("../test-data/message/apple_cid_jpg.eml");
431 let test = String::from_utf8_lossy(raw);
432 assert!(test.contains("Content-Id: <8AE052EF-BC90-486F-BB78-58D3590308EC@fritz.box>"));
433 assert!(test.contains("cid:8AE052EF-BC90-486F-BB78-58D3590308EC@fritz.box"));
434 assert!(test.find("data:").is_none());
435
436 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
438 assert!(parser.html.contains("<html>"));
439 assert!(!parser.html.contains("Content-Id:"));
440 assert!(parser.html.contains("data:image/jpeg;base64,/9j/4AAQ"));
441 assert!(!parser.html.contains("cid:"));
442 }
443
444 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
445 async fn test_get_html_invalid_msgid() {
446 let t = TestContext::new().await;
447 let msg_id = MsgId::new(100);
448 assert!(msg_id.get_html(&t).await.is_err())
449 }
450
451 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
452 async fn test_html_forwarding() -> Result<()> {
453 let mut tcm = TestContextManager::new();
455 let alice = &tcm.alice().await;
456 let chat = alice
457 .create_chat_with_contact("", "sender@testrun.org")
458 .await;
459 let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
460 receive_imf(alice, raw, false).await.unwrap();
461 let msg = alice.get_last_msg_in(chat.get_id()).await;
462 assert_ne!(msg.get_from_id(), ContactId::SELF);
463 assert_eq!(msg.is_dc_message, MessengerMessage::No);
464 assert!(!msg.is_forwarded());
465 assert!(msg.get_text().contains("this is plain"));
466 assert!(msg.has_html());
467 let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
468 assert!(html.contains("this is <b>html</b>"));
469
470 let chat_alice = alice.create_chat_with_contact("", "bob@example.net").await;
472 forward_msgs(alice, &[msg.get_id()], chat_alice.get_id())
473 .await
474 .unwrap();
475 async fn check_sender(ctx: &TestContext, chat: &Chat) {
476 let msg = ctx.get_last_msg_in(chat.get_id()).await;
477 assert_eq!(msg.get_from_id(), ContactId::SELF);
478 assert_eq!(msg.is_dc_message, MessengerMessage::Yes);
479 assert!(msg.is_forwarded());
480 assert!(msg.get_text().contains("this is plain"));
481 assert!(msg.has_html());
482 let html = msg.get_id().get_html(ctx).await.unwrap().unwrap();
483 assert!(html.contains("this is <b>html</b>"));
484 }
485 check_sender(alice, &chat_alice).await;
486
487 let bob = &tcm.bob().await;
489 let chat_bob = bob.create_chat_with_contact("", "alice@example.org").await;
490 async fn check_receiver(ctx: &TestContext, chat: &Chat, sender: &TestContext) {
491 let msg = ctx.recv_msg(&sender.pop_sent_msg().await).await;
492 assert_eq!(chat.id, msg.chat_id);
493 assert_ne!(msg.get_from_id(), ContactId::SELF);
494 assert_eq!(msg.is_dc_message, MessengerMessage::Yes);
495 assert!(msg.is_forwarded());
496 assert!(msg.get_text().contains("this is plain"));
497 assert!(msg.has_html());
498 let html = msg.get_id().get_html(ctx).await.unwrap().unwrap();
499 assert!(html.contains("this is <b>html</b>"));
500 }
501 check_receiver(bob, &chat_bob, alice).await;
502
503 chat::forward_msgs_2ctx(alice, &[msg.get_id()], bob, chat_bob.get_id()).await?;
506 check_sender(bob, &chat_bob).await;
507 check_receiver(alice, &chat_alice, bob).await;
508
509 let line = "this text with 42 chars is just repeated.\n";
511 let long_txt = line.repeat(constants::DC_DESIRED_TEXT_LEN / line.len() + 2);
512 let mut msg = Message::new_text(long_txt);
513 alice.send_msg(chat_alice.id, &mut msg).await;
514 let msg = alice.get_last_msg_in(chat_alice.id).await;
515 assert!(msg.has_html());
516 let html = msg.id.get_html(alice).await?.unwrap();
517 chat::forward_msgs_2ctx(alice, &[msg.get_id()], bob, chat_bob.get_id()).await?;
518 let msg = bob.get_last_msg_in(chat_bob.id).await;
519 assert!(msg.has_html());
520 assert_eq!(msg.id.get_html(bob).await?.unwrap(), html);
521 Ok(())
522 }
523
524 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
525 async fn test_html_save_msg() -> Result<()> {
526 let alice = TestContext::new_alice().await;
528 let chat = alice
529 .create_chat_with_contact("", "sender@testrun.org")
530 .await;
531 let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
532 receive_imf(&alice, raw, false).await?;
533 let msg = alice.get_last_msg_in(chat.get_id()).await;
534
535 let self_chat = alice.get_self_chat().await;
537 save_msgs(&alice, &[msg.id]).await?;
538 let saved_msg = alice.get_last_msg_in(self_chat.get_id()).await;
539 assert_ne!(saved_msg.id, msg.id);
540 assert_eq!(
541 saved_msg.get_original_msg_id(&alice).await?.unwrap(),
542 msg.id
543 );
544 assert!(!saved_msg.is_forwarded()); assert_ne!(saved_msg.get_from_id(), ContactId::SELF);
546 assert_eq!(saved_msg.get_from_id(), msg.get_from_id());
547 assert_eq!(saved_msg.is_dc_message, MessengerMessage::No);
548 assert!(saved_msg.get_text().contains("this is plain"));
549 assert!(saved_msg.has_html());
550 let html = saved_msg.get_id().get_html(&alice).await?.unwrap();
551 assert!(html.contains("this is <b>html</b>"));
552
553 Ok(())
554 }
555
556 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
557 async fn test_html_forwarding_encrypted() {
558 let mut tcm = TestContextManager::new();
559 let alice = &tcm.alice().await;
563 alice
564 .set_config(Config::ShowEmails, Some("1"))
565 .await
566 .unwrap();
567 let chat = alice
568 .create_chat_with_contact("", "sender@testrun.org")
569 .await;
570 let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
571 receive_imf(alice, raw, false).await.unwrap();
572 let msg = alice.get_last_msg_in(chat.get_id()).await;
573
574 let chat = alice.get_self_chat().await;
577 forward_msgs(alice, &[msg.get_id()], chat.get_id())
578 .await
579 .unwrap();
580 let msg = alice.pop_sent_msg().await;
581
582 let alice = &tcm.alice().await;
584 alice
585 .set_config(Config::ShowEmails, Some("0"))
586 .await
587 .unwrap();
588 let msg = alice.recv_msg(&msg).await;
589 assert_eq!(msg.chat_id, alice.get_self_chat().await.id);
590 assert_eq!(msg.get_from_id(), ContactId::SELF);
591 assert_eq!(msg.is_dc_message, MessengerMessage::Yes);
592 assert!(msg.get_showpadlock());
593 assert!(msg.is_forwarded());
594 assert!(msg.get_text().contains("this is plain"));
595 assert!(msg.has_html());
596 let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
597 assert!(html.contains("this is <b>html</b>"));
598 }
599
600 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
601 async fn test_set_html() {
602 let mut tcm = TestContextManager::new();
603 let alice = &tcm.alice().await;
604 let bob = &tcm.bob().await;
605
606 let chat_id = alice.create_chat(bob).await.id;
608 let mut msg = Message::new_text("plain text".to_string());
609 msg.set_html(Some("<b>html</b> text".to_string()));
610 assert!(msg.mime_modified);
611 chat::send_msg(alice, chat_id, &mut msg).await.unwrap();
612
613 let msg = alice.get_last_msg_in(chat_id).await;
615 assert_eq!(msg.get_text(), "plain text");
616 assert!(!msg.is_forwarded());
617 assert!(msg.mime_modified);
618 let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
619 assert!(html.contains("<b>html</b> text"));
620
621 let chat_id = bob.create_chat(alice).await.id;
623 let msg = bob.recv_msg(&alice.pop_sent_msg().await).await;
624 assert_eq!(msg.chat_id, chat_id);
625 assert_eq!(msg.get_text(), "plain text");
626 assert!(!msg.is_forwarded());
627 assert!(msg.mime_modified);
628 let html = msg.get_id().get_html(bob).await.unwrap().unwrap();
629 assert!(html.contains("<b>html</b> text"));
630 }
631
632 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
633 async fn test_cp1252_html() -> Result<()> {
634 let t = TestContext::new_alice().await;
635 receive_imf(
636 &t,
637 include_bytes!("../test-data/message/cp1252-html.eml"),
638 false,
639 )
640 .await?;
641 let msg = t.get_last_msg().await;
642 assert_eq!(msg.viewtype, Viewtype::Text);
643 assert!(msg.text.contains("foo bar ä ö ü ß"));
644 assert!(msg.has_html());
645 let html = msg.get_id().get_html(&t).await?.unwrap();
646 println!("{html}");
647 assert!(html.contains("foo bar ä ö ü ß"));
648 Ok(())
649 }
650}