1use std::mem;
11
12use anyhow::{Context as _, Result};
13use base64::Engine as _;
14use mailparse::ParsedContentType;
15use mime::Mime;
16
17use crate::context::Context;
18use crate::headerdef::{HeaderDef, HeaderDefMap};
19use crate::log::warn;
20use crate::message::{self, Message, MsgId};
21use crate::mimeparser::parse_message_id;
22use crate::param::Param::SendHtml;
23use crate::plaintext::PlainText;
24
25impl Message {
26 pub fn has_html(&self) -> bool {
33 self.mime_modified
34 }
35
36 pub fn set_html(&mut self, html: Option<String>) {
45 if let Some(html) = html {
46 self.param.set(SendHtml, html);
47 self.mime_modified = true;
48 } else {
49 self.param.remove(SendHtml);
50 self.mime_modified = false;
51 }
52 }
53}
54
55enum MimeMultipartType {
59 Multiple,
60 Single,
61 Message,
62}
63
64fn get_mime_multipart_type(ctype: &ParsedContentType) -> MimeMultipartType {
67 let mimetype = ctype.mimetype.to_lowercase();
68 if mimetype.starts_with("multipart") && ctype.params.contains_key("boundary") {
69 MimeMultipartType::Multiple
70 } else if mimetype == "message/rfc822" {
71 MimeMultipartType::Message
72 } else {
73 MimeMultipartType::Single
74 }
75}
76
77#[derive(Debug)]
79struct HtmlMsgParser {
80 pub html: String,
81 pub plain: Option<PlainText>,
82 pub(crate) msg_html: String,
83}
84
85impl HtmlMsgParser {
86 pub fn from_bytes<'a>(
90 context: &Context,
91 rawmime: &'a [u8],
92 ) -> Result<(Self, mailparse::ParsedMail<'a>)> {
93 let mut parser = HtmlMsgParser {
94 html: "".to_string(),
95 plain: None,
96 msg_html: "".to_string(),
97 };
98
99 let parsedmail = mailparse::parse_mail(rawmime).context("Failed to parse mail")?;
100
101 parser.collect_texts_recursive(context, &parsedmail)?;
102
103 if parser.html.is_empty() {
104 if let Some(plain) = &parser.plain {
105 parser.html = plain.to_html();
106 }
107 } else {
108 parser.cid_to_data_recursive(context, &parsedmail)?;
109 }
110 parser.html += &mem::take(&mut parser.msg_html);
111 Ok((parser, parsedmail))
112 }
113
114 fn collect_texts_recursive<'a>(
123 &'a mut self,
124 context: &'a Context,
125 mail: &'a mailparse::ParsedMail<'a>,
126 ) -> Result<()> {
127 match get_mime_multipart_type(&mail.ctype) {
128 MimeMultipartType::Multiple => {
129 for cur_data in &mail.subparts {
130 self.collect_texts_recursive(context, cur_data)?
131 }
132 Ok(())
133 }
134 MimeMultipartType::Message => {
135 let raw = mail.get_body_raw()?;
136 if raw.is_empty() {
137 return Ok(());
138 }
139 let (parser, mail) = HtmlMsgParser::from_bytes(context, &raw)?;
140 if !parser.html.is_empty() {
141 let mut text = "\r\n\r\n".to_string();
142 for h in mail.headers {
143 let key = h.get_key();
144 if matches!(
145 key.to_lowercase().as_str(),
146 "date"
147 | "from"
148 | "sender"
149 | "reply-to"
150 | "to"
151 | "cc"
152 | "bcc"
153 | "subject"
154 ) {
155 text += &format!("{key}: {}\r\n", h.get_value());
156 }
157 }
158 text += "\r\n";
159 self.msg_html += &PlainText {
160 text,
161 flowed: false,
162 delsp: false,
163 }
164 .to_html();
165 self.msg_html += &parser.html;
166 }
167 Ok(())
168 }
169 MimeMultipartType::Single => {
170 let mimetype = mail.ctype.mimetype.parse::<Mime>()?;
171 if mimetype == mime::TEXT_HTML {
172 if self.html.is_empty()
173 && let Ok(decoded_data) = mail.get_body()
174 {
175 self.html = decoded_data;
176 }
177 } else if mimetype == mime::TEXT_PLAIN
178 && self.plain.is_none()
179 && let Ok(decoded_data) = mail.get_body()
180 {
181 self.plain = Some(PlainText {
182 text: decoded_data,
183 flowed: if let Some(format) = mail.ctype.params.get("format") {
184 format.as_str().eq_ignore_ascii_case("flowed")
185 } else {
186 false
187 },
188 delsp: if let Some(delsp) = mail.ctype.params.get("delsp") {
189 delsp.as_str().eq_ignore_ascii_case("yes")
190 } else {
191 false
192 },
193 });
194 }
195 Ok(())
196 }
197 }
198 }
199
200 fn cid_to_data_recursive<'a>(
203 &'a mut self,
204 context: &'a Context,
205 mail: &'a mailparse::ParsedMail<'a>,
206 ) -> Result<()> {
207 match get_mime_multipart_type(&mail.ctype) {
208 MimeMultipartType::Multiple => {
209 for cur_data in &mail.subparts {
210 self.cid_to_data_recursive(context, cur_data)?;
211 }
212 Ok(())
213 }
214 MimeMultipartType::Message => Ok(()),
215 MimeMultipartType::Single => {
216 let mimetype = mail.ctype.mimetype.parse::<Mime>()?;
217 if mimetype.type_() == mime::IMAGE
218 && let Some(cid) = mail.headers.get_header_value(HeaderDef::ContentId)
219 && let Ok(cid) = parse_message_id(&cid)
220 && let Ok(replacement) = mimepart_to_data_url(mail)
221 {
222 let re_string = format!(
223 "(<img[^>]*src[^>]*=[^>]*)(cid:{})([^>]*>)",
224 regex::escape(&cid)
225 );
226 match regex::Regex::new(&re_string) {
227 Ok(re) => {
228 self.html = re
229 .replace_all(
230 &self.html,
231 format!("${{1}}{replacement}${{3}}").as_str(),
232 )
233 .as_ref()
234 .to_string()
235 }
236 Err(e) => warn!(
237 context,
238 "Cannot create regex for cid: {} throws {}", re_string, e
239 ),
240 }
241 }
242 Ok(())
243 }
244 }
245 }
246}
247
248fn mimepart_to_data_url(mail: &mailparse::ParsedMail<'_>) -> Result<String> {
250 let data = mail.get_body_raw()?;
251 let data = base64::engine::general_purpose::STANDARD.encode(data);
252 Ok(format!("data:{};base64,{}", mail.ctype.mimetype, data))
253}
254
255impl MsgId {
256 pub async fn get_html(self, context: &Context) -> Result<Option<String>> {
261 let (param, rawmime) = tokio::join!(
263 self.get_param(context),
264 message::get_mime_headers(context, self)
265 );
266 if let Some(html) = param?.get(SendHtml) {
267 return Ok(Some(html.to_string()));
268 }
269
270 let rawmime = rawmime?;
271 if !rawmime.is_empty() {
272 match HtmlMsgParser::from_bytes(context, &rawmime) {
273 Err(err) => {
274 warn!(context, "get_html: parser error: {:#}", err);
275 Ok(None)
276 }
277 Ok((parser, _)) => Ok(Some(parser.html)),
278 }
279 } else {
280 warn!(context, "get_html: no mime for {}", self);
281 Ok(None)
282 }
283 }
284}
285
286#[cfg(test)]
287mod tests {
288 use super::*;
289 use crate::chat::{self, Chat, forward_msgs, save_msgs};
290
291 use crate::constants;
292 use crate::contact::ContactId;
293 use crate::message::{MessengerMessage, Viewtype};
294 use crate::receive_imf::receive_imf;
295 use crate::test_utils::{TestContext, TestContextManager};
296
297 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
298 async fn test_htmlparse_plain_unspecified() {
299 let t = TestContext::new().await;
300 let raw = include_bytes!("../test-data/message/text_plain_unspecified.eml");
301 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).unwrap();
302 assert_eq!(
303 parser.html,
304 r#"<!DOCTYPE html>
305<html><head>
306<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
307<meta name="color-scheme" content="light dark" />
308</head><body dir="auto" style="unicode-bidi: plaintext">
309This message does not have Content-Type nor Subject.<br/>
310</body></html>
311"#
312 );
313 }
314
315 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
316 async fn test_htmlparse_plain_iso88591() {
317 let t = TestContext::new().await;
318 let raw = include_bytes!("../test-data/message/text_plain_iso88591.eml");
319 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).unwrap();
320 assert_eq!(
321 parser.html,
322 r#"<!DOCTYPE html>
323<html><head>
324<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
325<meta name="color-scheme" content="light dark" />
326</head><body dir="auto" style="unicode-bidi: plaintext">
327message with a non-UTF-8 encoding: äöüßÄÖÜ<br/>
328</body></html>
329"#
330 );
331 }
332
333 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
334 async fn test_htmlparse_plain_flowed() {
335 let t = TestContext::new().await;
336 let raw = include_bytes!("../test-data/message/text_plain_flowed.eml");
337 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).unwrap();
338 assert!(parser.plain.unwrap().flowed);
339 assert_eq!(
340 parser.html,
341 r#"<!DOCTYPE html>
342<html><head>
343<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
344<meta name="color-scheme" content="light dark" />
345</head><body dir="auto" style="unicode-bidi: plaintext">
346This line ends with a space and will be merged with the next one due to format=flowed.<br/>
347<br/>
348This line does not end with a space<br/>
349and will be wrapped as usual.<br/>
350</body></html>
351"#
352 );
353 }
354
355 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
356 async fn test_htmlparse_alt_plain() {
357 let t = TestContext::new().await;
358 let raw = include_bytes!("../test-data/message/text_alt_plain.eml");
359 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).unwrap();
360 assert_eq!(
361 parser.html,
362 r#"<!DOCTYPE html>
363<html><head>
364<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
365<meta name="color-scheme" content="light dark" />
366</head><body dir="auto" style="unicode-bidi: plaintext">
367mime-modified should not be set set as there is no html and no special stuff;<br/>
368although not being a delta-message.<br/>
369test some special html-characters as < > and & but also " and ' :)<br/>
370</body></html>
371"#
372 );
373 }
374
375 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
376 async fn test_htmlparse_html() {
377 let t = TestContext::new().await;
378 let raw = include_bytes!("../test-data/message/text_html.eml");
379 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).unwrap();
380
381 assert_eq!(
385 parser.html.replace('\r', ""),
386 r##"
387<html>
388 <p>mime-modified <b>set</b>; simplify is always regarded as lossy.</p>
389</html>"##
390 );
391 }
392
393 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
394 async fn test_htmlparse_alt_html() {
395 let t = TestContext::new().await;
396 let raw = include_bytes!("../test-data/message/text_alt_html.eml");
397 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).unwrap();
398 assert_eq!(
399 parser.html.replace('\r', ""), r##"<html>
401 <p>mime-modified <b>set</b>; simplify is always regarded as lossy.</p>
402</html>
403"##
404 );
405 }
406
407 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
408 async fn test_htmlparse_alt_plain_html() {
409 let t = TestContext::new().await;
410 let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
411 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).unwrap();
412 assert_eq!(
413 parser.html.replace('\r', ""), r##"<html>
415 <p>
416 this is <b>html</b>
417 </p>
418</html>
419"##
420 );
421 }
422
423 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
424 async fn test_htmlparse_apple_cid_jpg() {
425 let t = TestContext::new().await;
428 let raw = include_bytes!("../test-data/message/apple_cid_jpg.eml");
429 let test = String::from_utf8_lossy(raw);
430 assert!(test.contains("Content-Id: <8AE052EF-BC90-486F-BB78-58D3590308EC@fritz.box>"));
431 assert!(test.contains("cid:8AE052EF-BC90-486F-BB78-58D3590308EC@fritz.box"));
432 assert!(test.find("data:").is_none());
433
434 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).unwrap();
436 assert!(parser.html.contains("<html>"));
437 assert!(!parser.html.contains("Content-Id:"));
438 assert!(parser.html.contains("data:image/jpeg;base64,/9j/4AAQ"));
439 assert!(!parser.html.contains("cid:"));
440 }
441
442 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
443 async fn test_get_html_invalid_msgid() {
444 let t = TestContext::new().await;
445 let msg_id = MsgId::new(100);
446 assert!(msg_id.get_html(&t).await.is_err())
447 }
448
449 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
450 async fn test_html_forwarding() -> Result<()> {
451 let mut tcm = TestContextManager::new();
453 let alice = &tcm.alice().await;
454 alice.allow_unencrypted().await?;
455 let chat = alice
456 .create_chat_with_contact("", "sender@testrun.org")
457 .await;
458 let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
459 receive_imf(alice, raw, false).await.unwrap();
460 let msg = alice.get_last_msg_in(chat.get_id()).await;
461 assert_ne!(msg.get_from_id(), ContactId::SELF);
462 assert_eq!(msg.is_dc_message, MessengerMessage::No);
463 assert!(!msg.is_forwarded());
464 assert!(msg.get_text().contains("this is plain"));
465 assert!(msg.has_html());
466 let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
467 assert!(html.contains("this is <b>html</b>"));
468
469 let chat_alice = alice.create_chat_with_contact("", "bob@example.net").await;
471 forward_msgs(alice, &[msg.get_id()], chat_alice.get_id())
472 .await
473 .unwrap();
474 async fn check_sender(ctx: &TestContext, chat: &Chat) {
475 let msg = ctx.get_last_msg_in(chat.get_id()).await;
476 assert_eq!(msg.get_from_id(), ContactId::SELF);
477 assert_eq!(msg.is_dc_message, MessengerMessage::Yes);
478 assert!(msg.is_forwarded());
479 assert!(msg.get_text().contains("this is plain"));
480 assert!(msg.has_html());
481 let html = msg.get_id().get_html(ctx).await.unwrap().unwrap();
482 assert!(html.contains("this is <b>html</b>"));
483 }
484 check_sender(alice, &chat_alice).await;
485
486 let bob = &tcm.bob().await;
488 bob.allow_unencrypted().await?;
489 let chat_bob = bob.create_chat_with_contact("", "alice@example.org").await;
490 async fn check_receiver(ctx: &TestContext, chat: &Chat, sender: &TestContext) {
491 let msg = ctx.recv_msg(&sender.pop_sent_msg().await).await;
492 assert_eq!(chat.id, msg.chat_id);
493 assert_ne!(msg.get_from_id(), ContactId::SELF);
494 assert_eq!(msg.is_dc_message, MessengerMessage::Yes);
495 assert!(msg.is_forwarded());
496 assert!(msg.get_text().contains("this is plain"));
497 assert!(msg.has_html());
498 let html = msg.get_id().get_html(ctx).await.unwrap().unwrap();
499 assert!(html.contains("this is <b>html</b>"));
500 }
501 check_receiver(bob, &chat_bob, alice).await;
502
503 chat::forward_msgs_2ctx(alice, &[msg.get_id()], bob, chat_bob.get_id()).await?;
506 check_sender(bob, &chat_bob).await;
507 check_receiver(alice, &chat_alice, bob).await;
508
509 let line = "this text with 42 chars is just repeated.\n";
511 let long_txt = line.repeat(constants::DC_DESIRED_TEXT_LEN / line.len() + 2);
512 let mut msg = Message::new_text(long_txt);
513 alice.send_msg(chat_alice.id, &mut msg).await;
514 let msg = alice.get_last_msg_in(chat_alice.id).await;
515 assert!(msg.has_html());
516 let html = msg.id.get_html(alice).await?.unwrap();
517 chat::forward_msgs_2ctx(alice, &[msg.get_id()], bob, chat_bob.get_id()).await?;
518 let msg = bob.get_last_msg_in(chat_bob.id).await;
519 assert!(msg.has_html());
520 assert_eq!(msg.id.get_html(bob).await?.unwrap(), html);
521 Ok(())
522 }
523
524 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
525 async fn test_html_save_msg() -> Result<()> {
526 let mut tcm = TestContextManager::new();
527 let alice = &tcm.alice().await;
528 alice.allow_unencrypted().await?;
529 let chat = alice
531 .create_chat_with_contact("", "sender@testrun.org")
532 .await;
533 let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
534 receive_imf(alice, raw, false).await?;
535 let msg = alice.get_last_msg_in(chat.get_id()).await;
536
537 let self_chat = alice.get_self_chat().await;
539 save_msgs(alice, &[msg.id]).await?;
540 let saved_msg = alice.get_last_msg_in(self_chat.get_id()).await;
541 assert_ne!(saved_msg.id, msg.id);
542 assert_eq!(saved_msg.get_original_msg_id(alice).await?.unwrap(), msg.id);
543 assert!(!saved_msg.is_forwarded()); assert_ne!(saved_msg.get_from_id(), ContactId::SELF);
545 assert_eq!(saved_msg.get_from_id(), msg.get_from_id());
546 assert_eq!(saved_msg.is_dc_message, MessengerMessage::No);
547 assert!(saved_msg.get_text().contains("this is plain"));
548 assert!(saved_msg.has_html());
549 let html = saved_msg.get_id().get_html(alice).await?.unwrap();
550 assert!(html.contains("this is <b>html</b>"));
551
552 Ok(())
553 }
554
555 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
556 async fn test_html_forwarding_encrypted() {
557 let mut tcm = TestContextManager::new();
558 let alice = &tcm.alice().await;
560 alice.allow_unencrypted().await.unwrap();
561 let chat = alice
562 .create_chat_with_contact("", "sender@testrun.org")
563 .await;
564 let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
565 receive_imf(alice, raw, false).await.unwrap();
566 let msg = alice.get_last_msg_in(chat.get_id()).await;
567
568 let chat = alice.get_self_chat().await;
571 forward_msgs(alice, &[msg.get_id()], chat.get_id())
572 .await
573 .unwrap();
574 let msg = alice.pop_sent_msg().await;
575
576 let alice = &tcm.alice().await;
578 let msg = alice.recv_msg(&msg).await;
579 assert_eq!(msg.chat_id, alice.get_self_chat().await.id);
580 assert_eq!(msg.get_from_id(), ContactId::SELF);
581 assert_eq!(msg.is_dc_message, MessengerMessage::Yes);
582 assert!(msg.get_showpadlock());
583 assert!(msg.is_forwarded());
584 assert!(msg.get_text().contains("this is plain"));
585 assert!(msg.has_html());
586 let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
587 assert!(html.contains("this is <b>html</b>"));
588 }
589
590 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
591 async fn test_set_html() {
592 let mut tcm = TestContextManager::new();
593 let alice = &tcm.alice().await;
594 let bob = &tcm.bob().await;
595
596 let chat_id = alice.create_chat(bob).await.id;
598 let mut msg = Message::new_text("plain text".to_string());
599 msg.set_html(Some("<b>html</b> text".to_string()));
600 assert!(msg.mime_modified);
601 chat::send_msg(alice, chat_id, &mut msg).await.unwrap();
602
603 let msg = alice.get_last_msg_in(chat_id).await;
605 assert_eq!(msg.get_text(), "plain text");
606 assert!(!msg.is_forwarded());
607 assert!(msg.mime_modified);
608 let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
609 assert!(html.contains("<b>html</b> text"));
610
611 let chat_id = bob.create_chat(alice).await.id;
613 let msg = bob.recv_msg(&alice.pop_sent_msg().await).await;
614 assert_eq!(msg.chat_id, chat_id);
615 assert_eq!(msg.get_text(), "plain text");
616 assert!(!msg.is_forwarded());
617 assert!(msg.mime_modified);
618 let html = msg.get_id().get_html(bob).await.unwrap().unwrap();
619 assert!(html.contains("<b>html</b> text"));
620 }
621
622 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
623 async fn test_cp1252_html() -> Result<()> {
624 let mut tcm = TestContextManager::new();
625 let alice = &tcm.alice().await;
626 alice.allow_unencrypted().await?;
627 receive_imf(
628 alice,
629 include_bytes!("../test-data/message/cp1252-html.eml"),
630 false,
631 )
632 .await?;
633 let msg = alice.get_last_msg().await;
634 assert_eq!(msg.viewtype, Viewtype::Text);
635 assert!(msg.text.contains("foo bar ä ö ü ß"));
636 assert!(msg.has_html());
637 let html = msg.get_id().get_html(alice).await?.unwrap();
638 println!("{html}");
639 assert!(html.contains("foo bar ä ö ü ß"));
640 Ok(())
641 }
642}