1use std::mem;
11
12use anyhow::{Context as _, Result, ensure};
13use base64::Engine as _;
14use mailparse::ParsedContentType;
15use mime::Mime;
16
17use crate::context::Context;
18use crate::headerdef::{HeaderDef, HeaderDefMap};
19use crate::log::warn;
20use crate::message::{Message, MsgId};
21use crate::mimeparser::parse_message_id;
22use crate::param::{Param::SendHtml, Params};
23use crate::plaintext::PlainText;
24use crate::sql;
25use crate::tools::{buf_compress, buf_decompress};
26
27impl Message {
28 pub fn has_html(&self) -> bool {
35 self.mime_modified
36 }
37
38 pub fn set_html(&mut self, html: Option<String>) {
47 if let Some(html) = html {
48 self.param.set(SendHtml, html);
49 self.mime_modified = true;
50 } else {
51 self.param.remove(SendHtml);
52 self.mime_modified = false;
53 }
54 }
55}
56
57enum MimeMultipartType {
61 Multiple,
62 Single,
63 Message,
64}
65
66fn get_mime_multipart_type(ctype: &ParsedContentType) -> MimeMultipartType {
69 let mimetype = ctype.mimetype.to_lowercase();
70 if mimetype.starts_with("multipart") && ctype.params.contains_key("boundary") {
71 MimeMultipartType::Multiple
72 } else if mimetype == "message/rfc822" {
73 MimeMultipartType::Message
74 } else {
75 MimeMultipartType::Single
76 }
77}
78
79#[derive(Debug)]
81struct HtmlMsgParser {
82 pub html: String,
83 pub plain: Option<PlainText>,
84 pub(crate) msg_html: String,
85}
86
87impl HtmlMsgParser {
88 pub fn from_bytes<'a>(
92 context: &Context,
93 rawmime: &'a [u8],
94 ) -> Result<(Self, mailparse::ParsedMail<'a>)> {
95 let mut parser = HtmlMsgParser {
96 html: "".to_string(),
97 plain: None,
98 msg_html: "".to_string(),
99 };
100
101 let parsedmail = mailparse::parse_mail(rawmime).context("Failed to parse mail")?;
102
103 parser.collect_texts_recursive(context, &parsedmail)?;
104
105 if parser.html.is_empty() {
106 if let Some(plain) = &parser.plain {
107 parser.html = plain.to_html();
108 }
109 } else {
110 parser.cid_to_data_recursive(context, &parsedmail)?;
111 }
112 parser.html += &mem::take(&mut parser.msg_html);
113 Ok((parser, parsedmail))
114 }
115
116 fn collect_texts_recursive<'a>(
125 &'a mut self,
126 context: &'a Context,
127 mail: &'a mailparse::ParsedMail<'a>,
128 ) -> Result<()> {
129 match get_mime_multipart_type(&mail.ctype) {
130 MimeMultipartType::Multiple => {
131 for cur_data in &mail.subparts {
132 self.collect_texts_recursive(context, cur_data)?
133 }
134 Ok(())
135 }
136 MimeMultipartType::Message => {
137 let raw = mail.get_body_raw()?;
138 if raw.is_empty() {
139 return Ok(());
140 }
141 let (parser, mail) = HtmlMsgParser::from_bytes(context, &raw)?;
142 if !parser.html.is_empty() {
143 let mut text = "\r\n\r\n".to_string();
144 for h in mail.headers {
145 let key = h.get_key();
146 if matches!(
147 key.to_lowercase().as_str(),
148 "date"
149 | "from"
150 | "sender"
151 | "reply-to"
152 | "to"
153 | "cc"
154 | "bcc"
155 | "subject"
156 ) {
157 text += &format!("{key}: {}\r\n", h.get_value());
158 }
159 }
160 text += "\r\n";
161 self.msg_html += &PlainText {
162 text,
163 flowed: false,
164 delsp: false,
165 }
166 .to_html();
167 self.msg_html += &parser.html;
168 }
169 Ok(())
170 }
171 MimeMultipartType::Single => {
172 let mimetype = mail.ctype.mimetype.parse::<Mime>()?;
173 if mimetype == mime::TEXT_HTML {
174 if self.html.is_empty()
175 && let Ok(decoded_data) = mail.get_body()
176 {
177 self.html = decoded_data;
178 }
179 } else if mimetype == mime::TEXT_PLAIN
180 && self.plain.is_none()
181 && let Ok(decoded_data) = mail.get_body()
182 {
183 self.plain = Some(PlainText {
184 text: decoded_data,
185 flowed: if let Some(format) = mail.ctype.params.get("format") {
186 format.as_str().eq_ignore_ascii_case("flowed")
187 } else {
188 false
189 },
190 delsp: if let Some(delsp) = mail.ctype.params.get("delsp") {
191 delsp.as_str().eq_ignore_ascii_case("yes")
192 } else {
193 false
194 },
195 });
196 }
197 Ok(())
198 }
199 }
200 }
201
202 fn cid_to_data_recursive<'a>(
205 &'a mut self,
206 context: &'a Context,
207 mail: &'a mailparse::ParsedMail<'a>,
208 ) -> Result<()> {
209 match get_mime_multipart_type(&mail.ctype) {
210 MimeMultipartType::Multiple => {
211 for cur_data in &mail.subparts {
212 self.cid_to_data_recursive(context, cur_data)?;
213 }
214 Ok(())
215 }
216 MimeMultipartType::Message => Ok(()),
217 MimeMultipartType::Single => {
218 let mimetype = mail.ctype.mimetype.parse::<Mime>()?;
219 if mimetype.type_() == mime::IMAGE
220 && let Some(cid) = mail.headers.get_header_value(HeaderDef::ContentId)
221 && let Ok(cid) = parse_message_id(&cid)
222 && let Ok(replacement) = mimepart_to_data_url(mail)
223 {
224 let re_string = format!(
225 "(<img[^>]*src[^>]*=[^>]*)(cid:{})([^>]*>)",
226 regex::escape(&cid)
227 );
228 match regex::Regex::new(&re_string) {
229 Ok(re) => {
230 self.html = re
231 .replace_all(
232 &self.html,
233 format!("${{1}}{replacement}${{3}}").as_str(),
234 )
235 .as_ref()
236 .to_string()
237 }
238 Err(e) => warn!(
239 context,
240 "Cannot create regex for cid: {} throws {}", re_string, e
241 ),
242 }
243 }
244 Ok(())
245 }
246 }
247 }
248}
249
250fn mimepart_to_data_url(mail: &mailparse::ParsedMail<'_>) -> Result<String> {
252 let data = mail.get_body_raw()?;
253 let data = base64::engine::general_purpose::STANDARD.encode(data);
254 Ok(format!("data:{};base64,{}", mail.ctype.mimetype, data))
255}
256
257impl MsgId {
258 pub async fn get_html(self, context: &Context) -> Result<Option<String>> {
263 let (param, headers, compressed) = context
264 .sql
265 .query_row(
266 "SELECT param, mime_headers, mime_compressed FROM msgs WHERE id=?",
267 (self,),
268 |row| {
269 let param: String = row.get(0)?;
270 let param: Params = param.parse().unwrap_or_default();
271 let headers = sql::row_get_vec(row, 1)?;
272 let compressed: bool = row.get(2)?;
273 Ok((param, headers, compressed))
274 },
275 )
276 .await?;
277 if let Some(html) = param.get(SendHtml) {
278 return Ok(Some(html.to_string()));
279 }
280 let from_rawmime = |rawmime: Vec<u8>| {
281 if !rawmime.is_empty() {
282 match HtmlMsgParser::from_bytes(context, &rawmime) {
283 Err(err) => {
284 warn!(context, "get_html: parser error: {:#}", err);
285 Ok(None)
286 }
287 Ok((parser, _)) => Ok(Some(parser.html)),
288 }
289 } else {
290 warn!(context, "get_html: no mime for {}", self);
291 Ok(None)
292 }
293 };
294
295 if compressed {
296 return from_rawmime(buf_decompress(&headers)?);
297 }
298 let headers2 = headers.clone();
299 let compressed = match tokio::task::block_in_place(move || buf_compress(&headers2)) {
300 Err(e) => {
301 warn!(context, "get_mime_headers: buf_compress() failed: {}", e);
302 return from_rawmime(headers);
303 }
304 Ok(o) => o,
305 };
306 let update = |conn: &mut rusqlite::Connection| {
307 match conn.execute(
308 "
309UPDATE msgs SET mime_headers=?, mime_compressed=1
310WHERE id=? AND mime_headers!='' AND mime_compressed=0",
311 (compressed, self),
312 ) {
313 Ok(rows_updated) => ensure!(rows_updated <= 1),
314 Err(e) => {
315 warn!(context, "get_mime_headers: UPDATE failed: {}", e);
316 return Err(e.into());
317 }
318 }
319 Ok(())
320 };
321 if let Err(e) = context.sql.call_write(update).await {
322 warn!(
323 context,
324 "get_mime_headers: failed to update mime_headers: {}", e
325 );
326 }
327 from_rawmime(headers)
328 }
329}
330
331#[cfg(test)]
332mod tests {
333 use super::*;
334 use crate::chat::{self, Chat, forward_msgs, save_msgs};
335
336 use crate::constants;
337 use crate::contact::ContactId;
338 use crate::message::{MessengerMessage, Viewtype};
339 use crate::receive_imf::receive_imf;
340 use crate::test_utils::{TestContext, TestContextManager};
341
342 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
343 async fn test_htmlparse_plain_unspecified() {
344 let t = TestContext::new().await;
345 let raw = include_bytes!("../test-data/message/text_plain_unspecified.eml");
346 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).unwrap();
347 assert_eq!(
348 parser.html,
349 r#"<!DOCTYPE html>
350<html><head>
351<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
352<meta name="color-scheme" content="light dark" />
353</head><body dir="auto" style="unicode-bidi: plaintext">
354This message does not have Content-Type nor Subject.<br/>
355</body></html>
356"#
357 );
358 }
359
360 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
361 async fn test_htmlparse_plain_iso88591() {
362 let t = TestContext::new().await;
363 let raw = include_bytes!("../test-data/message/text_plain_iso88591.eml");
364 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).unwrap();
365 assert_eq!(
366 parser.html,
367 r#"<!DOCTYPE html>
368<html><head>
369<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
370<meta name="color-scheme" content="light dark" />
371</head><body dir="auto" style="unicode-bidi: plaintext">
372message with a non-UTF-8 encoding: äöüßÄÖÜ<br/>
373</body></html>
374"#
375 );
376 }
377
378 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
379 async fn test_htmlparse_plain_flowed() {
380 let t = TestContext::new().await;
381 let raw = include_bytes!("../test-data/message/text_plain_flowed.eml");
382 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).unwrap();
383 assert!(parser.plain.unwrap().flowed);
384 assert_eq!(
385 parser.html,
386 r#"<!DOCTYPE html>
387<html><head>
388<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
389<meta name="color-scheme" content="light dark" />
390</head><body dir="auto" style="unicode-bidi: plaintext">
391This line ends with a space and will be merged with the next one due to format=flowed.<br/>
392<br/>
393This line does not end with a space<br/>
394and will be wrapped as usual.<br/>
395</body></html>
396"#
397 );
398 }
399
400 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
401 async fn test_htmlparse_alt_plain() {
402 let t = TestContext::new().await;
403 let raw = include_bytes!("../test-data/message/text_alt_plain.eml");
404 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).unwrap();
405 assert_eq!(
406 parser.html,
407 r#"<!DOCTYPE html>
408<html><head>
409<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
410<meta name="color-scheme" content="light dark" />
411</head><body dir="auto" style="unicode-bidi: plaintext">
412mime-modified should not be set set as there is no html and no special stuff;<br/>
413although not being a delta-message.<br/>
414test some special html-characters as < > and & but also " and ' :)<br/>
415</body></html>
416"#
417 );
418 }
419
420 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
421 async fn test_htmlparse_html() {
422 let t = TestContext::new().await;
423 let raw = include_bytes!("../test-data/message/text_html.eml");
424 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).unwrap();
425
426 assert_eq!(
430 parser.html.replace('\r', ""),
431 r##"
432<html>
433 <p>mime-modified <b>set</b>; simplify is always regarded as lossy.</p>
434</html>"##
435 );
436 }
437
438 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
439 async fn test_htmlparse_alt_html() {
440 let t = TestContext::new().await;
441 let raw = include_bytes!("../test-data/message/text_alt_html.eml");
442 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).unwrap();
443 assert_eq!(
444 parser.html.replace('\r', ""), r##"<html>
446 <p>mime-modified <b>set</b>; simplify is always regarded as lossy.</p>
447</html>
448"##
449 );
450 }
451
452 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
453 async fn test_htmlparse_alt_plain_html() {
454 let t = TestContext::new().await;
455 let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
456 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).unwrap();
457 assert_eq!(
458 parser.html.replace('\r', ""), r##"<html>
460 <p>
461 this is <b>html</b>
462 </p>
463</html>
464"##
465 );
466 }
467
468 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
469 async fn test_htmlparse_apple_cid_jpg() {
470 let t = TestContext::new().await;
473 let raw = include_bytes!("../test-data/message/apple_cid_jpg.eml");
474 let test = String::from_utf8_lossy(raw);
475 assert!(test.contains("Content-Id: <8AE052EF-BC90-486F-BB78-58D3590308EC@fritz.box>"));
476 assert!(test.contains("cid:8AE052EF-BC90-486F-BB78-58D3590308EC@fritz.box"));
477 assert!(test.find("data:").is_none());
478
479 let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).unwrap();
481 assert!(parser.html.contains("<html>"));
482 assert!(!parser.html.contains("Content-Id:"));
483 assert!(parser.html.contains("data:image/jpeg;base64,/9j/4AAQ"));
484 assert!(!parser.html.contains("cid:"));
485 }
486
487 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
488 async fn test_get_html_invalid_msgid() {
489 let t = TestContext::new().await;
490 let msg_id = MsgId::new(100);
491 assert!(msg_id.get_html(&t).await.is_err())
492 }
493
494 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
495 async fn test_html_forwarding() -> Result<()> {
496 let mut tcm = TestContextManager::new();
498 let alice = &tcm.alice().await;
499 alice.allow_unencrypted().await?;
500 let chat = alice
501 .create_chat_with_contact("", "sender@testrun.org")
502 .await;
503 let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
504 receive_imf(alice, raw, false).await.unwrap();
505 let msg = alice.get_last_msg_in(chat.get_id()).await;
506 assert_ne!(msg.get_from_id(), ContactId::SELF);
507 assert_eq!(msg.is_dc_message, MessengerMessage::No);
508 assert!(!msg.is_forwarded());
509 assert!(msg.get_text().contains("this is plain"));
510 assert!(msg.has_html());
511 let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
512 assert!(html.contains("this is <b>html</b>"));
513
514 let chat_alice = alice.create_chat_with_contact("", "bob@example.net").await;
516 forward_msgs(alice, &[msg.get_id()], chat_alice.get_id())
517 .await
518 .unwrap();
519 async fn check_sender(ctx: &TestContext, chat: &Chat) {
520 let msg = ctx.get_last_msg_in(chat.get_id()).await;
521 assert_eq!(msg.get_from_id(), ContactId::SELF);
522 assert_eq!(msg.is_dc_message, MessengerMessage::Yes);
523 assert!(msg.is_forwarded());
524 assert!(msg.get_text().contains("this is plain"));
525 assert!(msg.has_html());
526 let html = msg.get_id().get_html(ctx).await.unwrap().unwrap();
527 assert!(html.contains("this is <b>html</b>"));
528 }
529 check_sender(alice, &chat_alice).await;
530
531 let bob = &tcm.bob().await;
533 bob.allow_unencrypted().await?;
534 let chat_bob = bob.create_chat_with_contact("", "alice@example.org").await;
535 async fn check_receiver(ctx: &TestContext, chat: &Chat, sender: &TestContext) {
536 let msg = ctx.recv_msg(&sender.pop_sent_msg().await).await;
537 assert_eq!(chat.id, msg.chat_id);
538 assert_ne!(msg.get_from_id(), ContactId::SELF);
539 assert_eq!(msg.is_dc_message, MessengerMessage::Yes);
540 assert!(msg.is_forwarded());
541 assert!(msg.get_text().contains("this is plain"));
542 assert!(msg.has_html());
543 let html = msg.get_id().get_html(ctx).await.unwrap().unwrap();
544 assert!(html.contains("this is <b>html</b>"));
545 }
546 check_receiver(bob, &chat_bob, alice).await;
547
548 chat::forward_msgs_2ctx(alice, &[msg.get_id()], bob, chat_bob.get_id()).await?;
551 check_sender(bob, &chat_bob).await;
552 check_receiver(alice, &chat_alice, bob).await;
553
554 let line = "this text with 42 chars is just repeated.\n";
556 let long_txt = line.repeat(constants::DC_DESIRED_TEXT_LEN / line.len() + 2);
557 let mut msg = Message::new_text(long_txt);
558 alice.send_msg(chat_alice.id, &mut msg).await;
559 let msg = alice.get_last_msg_in(chat_alice.id).await;
560 assert!(msg.has_html());
561 let html = msg.id.get_html(alice).await?.unwrap();
562 chat::forward_msgs_2ctx(alice, &[msg.get_id()], bob, chat_bob.get_id()).await?;
563 let msg = bob.get_last_msg_in(chat_bob.id).await;
564 assert!(msg.has_html());
565 assert_eq!(msg.id.get_html(bob).await?.unwrap(), html);
566 Ok(())
567 }
568
569 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
570 async fn test_html_save_msg() -> Result<()> {
571 let mut tcm = TestContextManager::new();
572 let alice = &tcm.alice().await;
573 alice.allow_unencrypted().await?;
574 let chat = alice
576 .create_chat_with_contact("", "sender@testrun.org")
577 .await;
578 let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
579 receive_imf(alice, raw, false).await?;
580 let msg = alice.get_last_msg_in(chat.get_id()).await;
581
582 let self_chat = alice.get_self_chat().await;
584 save_msgs(alice, &[msg.id]).await?;
585 let saved_msg = alice.get_last_msg_in(self_chat.get_id()).await;
586 assert_ne!(saved_msg.id, msg.id);
587 assert_eq!(saved_msg.get_original_msg_id(alice).await?.unwrap(), msg.id);
588 assert!(!saved_msg.is_forwarded()); assert_ne!(saved_msg.get_from_id(), ContactId::SELF);
590 assert_eq!(saved_msg.get_from_id(), msg.get_from_id());
591 assert_eq!(saved_msg.is_dc_message, MessengerMessage::No);
592 assert!(saved_msg.get_text().contains("this is plain"));
593 assert!(saved_msg.has_html());
594 let html = saved_msg.get_id().get_html(alice).await?.unwrap();
595 assert!(html.contains("this is <b>html</b>"));
596
597 Ok(())
598 }
599
600 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
601 async fn test_html_forwarding_encrypted() {
602 let mut tcm = TestContextManager::new();
603 let alice = &tcm.alice().await;
605 alice.allow_unencrypted().await.unwrap();
606 let chat = alice
607 .create_chat_with_contact("", "sender@testrun.org")
608 .await;
609 let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
610 receive_imf(alice, raw, false).await.unwrap();
611 let msg = alice.get_last_msg_in(chat.get_id()).await;
612
613 let chat = alice.get_self_chat().await;
616 forward_msgs(alice, &[msg.get_id()], chat.get_id())
617 .await
618 .unwrap();
619 let msg = alice.pop_sent_msg().await;
620
621 let alice = &tcm.alice().await;
623 let msg = alice.recv_msg(&msg).await;
624 assert_eq!(msg.chat_id, alice.get_self_chat().await.id);
625 assert_eq!(msg.get_from_id(), ContactId::SELF);
626 assert_eq!(msg.is_dc_message, MessengerMessage::Yes);
627 assert!(msg.get_showpadlock());
628 assert!(msg.is_forwarded());
629 assert!(msg.get_text().contains("this is plain"));
630 assert!(msg.has_html());
631 let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
632 assert!(html.contains("this is <b>html</b>"));
633 }
634
635 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
636 async fn test_set_html() {
637 let mut tcm = TestContextManager::new();
638 let alice = &tcm.alice().await;
639 let bob = &tcm.bob().await;
640
641 let chat_id = alice.create_chat(bob).await.id;
643 let mut msg = Message::new_text("plain text".to_string());
644 msg.set_html(Some("<b>html</b> text".to_string()));
645 assert!(msg.mime_modified);
646 chat::send_msg(alice, chat_id, &mut msg).await.unwrap();
647
648 let msg = alice.get_last_msg_in(chat_id).await;
650 assert_eq!(msg.get_text(), "plain text");
651 assert!(!msg.is_forwarded());
652 assert!(msg.mime_modified);
653 let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
654 assert!(html.contains("<b>html</b> text"));
655
656 let chat_id = bob.create_chat(alice).await.id;
658 let msg = bob.recv_msg(&alice.pop_sent_msg().await).await;
659 assert_eq!(msg.chat_id, chat_id);
660 assert_eq!(msg.get_text(), "plain text");
661 assert!(!msg.is_forwarded());
662 assert!(msg.mime_modified);
663 let html = msg.get_id().get_html(bob).await.unwrap().unwrap();
664 assert!(html.contains("<b>html</b> text"));
665 }
666
667 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
668 async fn test_cp1252_html() -> Result<()> {
669 let mut tcm = TestContextManager::new();
670 let alice = &tcm.alice().await;
671 alice.allow_unencrypted().await?;
672 receive_imf(
673 alice,
674 include_bytes!("../test-data/message/cp1252-html.eml"),
675 false,
676 )
677 .await?;
678 let msg = alice.get_last_msg().await;
679 assert_eq!(msg.viewtype, Viewtype::Text);
680 assert!(msg.text.contains("foo bar ä ö ü ß"));
681 assert!(msg.has_html());
682 let html = msg.get_id().get_html(alice).await?.unwrap();
683 println!("{html}");
684 assert!(html.contains("foo bar ä ö ü ß"));
685 Ok(())
686 }
687}