deltachat/
html.rs

1//! # Get message as HTML.
2//!
3//! Use `Message.has_html()` to check if the UI shall render a
4//! corresponding button and `MsgId.get_html()` to get the full message.
5//!
6//! Even when the original mime-message is not HTML,
7//! `MsgId.get_html()` will return HTML -
8//! this allows nice quoting, handling linebreaks properly etc.
9
10use std::mem;
11
12use anyhow::{Context as _, Result};
13use base64::Engine as _;
14use mailparse::ParsedContentType;
15use mime::Mime;
16
17use crate::context::Context;
18use crate::headerdef::{HeaderDef, HeaderDefMap};
19use crate::message::{self, Message, MsgId};
20use crate::mimeparser::parse_message_id;
21use crate::param::Param::SendHtml;
22use crate::plaintext::PlainText;
23
24impl Message {
25    /// Check if the message can be retrieved as HTML.
26    /// Typically, this is the case, when the mime structure of a Message is modified,
27    /// meaning that some text is cut or the original message
28    /// is in HTML and `simplify()` may hide some maybe important information.
29    /// The corresponding ffi-function is `dc_msg_has_html()`.
30    /// To get the HTML-code of the message, use `MsgId.get_html()`.
31    pub fn has_html(&self) -> bool {
32        self.mime_modified
33    }
34
35    /// Set HTML-part part of a message that is about to be sent.
36    /// The HTML-part is written to the database before sending and
37    /// used as the `text/html` part in the MIME-structure.
38    ///
39    /// Received HTML parts are handled differently,
40    /// they are saved together with the whole MIME-structure
41    /// in `mime_headers` and the HTML-part is extracted using `MsgId::get_html()`.
42    /// (To underline this asynchronicity, we are using the wording "SendHtml")
43    pub fn set_html(&mut self, html: Option<String>) {
44        if let Some(html) = html {
45            self.param.set(SendHtml, html);
46            self.mime_modified = true;
47        } else {
48            self.param.remove(SendHtml);
49            self.mime_modified = false;
50        }
51    }
52}
53
54/// Type defining a rough mime-type.
55/// This is mainly useful on iterating
56/// to decide whether a mime-part has subtypes.
57enum MimeMultipartType {
58    Multiple,
59    Single,
60    Message,
61}
62
63/// Function takes a content type from a ParsedMail structure
64/// and checks and returns the rough mime-type.
65fn get_mime_multipart_type(ctype: &ParsedContentType) -> MimeMultipartType {
66    let mimetype = ctype.mimetype.to_lowercase();
67    if mimetype.starts_with("multipart") && ctype.params.contains_key("boundary") {
68        MimeMultipartType::Multiple
69    } else if mimetype == "message/rfc822" {
70        MimeMultipartType::Message
71    } else {
72        MimeMultipartType::Single
73    }
74}
75
76/// HtmlMsgParser converts a mime-message to HTML.
77#[derive(Debug)]
78struct HtmlMsgParser {
79    pub html: String,
80    pub plain: Option<PlainText>,
81    pub(crate) msg_html: String,
82}
83
84impl HtmlMsgParser {
85    /// Function takes a raw mime-message string,
86    /// searches for the main-text part
87    /// and returns that as parser.html
88    pub async fn from_bytes<'a>(
89        context: &Context,
90        rawmime: &'a [u8],
91    ) -> Result<(Self, mailparse::ParsedMail<'a>)> {
92        let mut parser = HtmlMsgParser {
93            html: "".to_string(),
94            plain: None,
95            msg_html: "".to_string(),
96        };
97
98        let parsedmail = mailparse::parse_mail(rawmime).context("Failed to parse mail")?;
99
100        parser.collect_texts_recursive(context, &parsedmail).await?;
101
102        if parser.html.is_empty() {
103            if let Some(plain) = &parser.plain {
104                parser.html = plain.to_html();
105            }
106        } else {
107            parser.cid_to_data_recursive(context, &parsedmail).await?;
108        }
109        parser.html += &mem::take(&mut parser.msg_html);
110        Ok((parser, parsedmail))
111    }
112
113    /// Function iterates over all mime-parts
114    /// and searches for text/plain and text/html parts and saves the
115    /// first one found.
116    /// in the corresponding structure fields.
117    ///
118    /// Usually, there is at most one plain-text and one HTML-text part,
119    /// multiple plain-text parts might be used for mailinglist-footers,
120    /// therefore we use the first one.
121    async fn collect_texts_recursive<'a>(
122        &'a mut self,
123        context: &'a Context,
124        mail: &'a mailparse::ParsedMail<'a>,
125    ) -> Result<()> {
126        match get_mime_multipart_type(&mail.ctype) {
127            MimeMultipartType::Multiple => {
128                for cur_data in &mail.subparts {
129                    Box::pin(self.collect_texts_recursive(context, cur_data)).await?
130                }
131                Ok(())
132            }
133            MimeMultipartType::Message => {
134                let raw = mail.get_body_raw()?;
135                if raw.is_empty() {
136                    return Ok(());
137                }
138                let (parser, mail) = Box::pin(HtmlMsgParser::from_bytes(context, &raw)).await?;
139                if !parser.html.is_empty() {
140                    let mut text = "\r\n\r\n".to_string();
141                    for h in mail.headers {
142                        let key = h.get_key();
143                        if matches!(
144                            key.to_lowercase().as_str(),
145                            "date"
146                                | "from"
147                                | "sender"
148                                | "reply-to"
149                                | "to"
150                                | "cc"
151                                | "bcc"
152                                | "subject"
153                        ) {
154                            text += &format!("{key}: {}\r\n", h.get_value());
155                        }
156                    }
157                    text += "\r\n";
158                    self.msg_html += &PlainText {
159                        text,
160                        flowed: false,
161                        delsp: false,
162                    }
163                    .to_html();
164                    self.msg_html += &parser.html;
165                }
166                Ok(())
167            }
168            MimeMultipartType::Single => {
169                let mimetype = mail.ctype.mimetype.parse::<Mime>()?;
170                if mimetype == mime::TEXT_HTML {
171                    if self.html.is_empty() {
172                        if let Ok(decoded_data) = mail.get_body() {
173                            self.html = decoded_data;
174                        }
175                    }
176                } else if mimetype == mime::TEXT_PLAIN && self.plain.is_none() {
177                    if let Ok(decoded_data) = mail.get_body() {
178                        self.plain = Some(PlainText {
179                            text: decoded_data,
180                            flowed: if let Some(format) = mail.ctype.params.get("format") {
181                                format.as_str().eq_ignore_ascii_case("flowed")
182                            } else {
183                                false
184                            },
185                            delsp: if let Some(delsp) = mail.ctype.params.get("delsp") {
186                                delsp.as_str().eq_ignore_ascii_case("yes")
187                            } else {
188                                false
189                            },
190                        });
191                    }
192                }
193                Ok(())
194            }
195        }
196    }
197
198    /// Replace cid:-protocol by the data:-protocol where appropriate.
199    /// This allows the final html-file to be self-contained.
200    async fn cid_to_data_recursive<'a>(
201        &'a mut self,
202        context: &'a Context,
203        mail: &'a mailparse::ParsedMail<'a>,
204    ) -> Result<()> {
205        match get_mime_multipart_type(&mail.ctype) {
206            MimeMultipartType::Multiple => {
207                for cur_data in &mail.subparts {
208                    Box::pin(self.cid_to_data_recursive(context, cur_data)).await?;
209                }
210                Ok(())
211            }
212            MimeMultipartType::Message => Ok(()),
213            MimeMultipartType::Single => {
214                let mimetype = mail.ctype.mimetype.parse::<Mime>()?;
215                if mimetype.type_() == mime::IMAGE {
216                    if let Some(cid) = mail.headers.get_header_value(HeaderDef::ContentId) {
217                        if let Ok(cid) = parse_message_id(&cid) {
218                            if let Ok(replacement) = mimepart_to_data_url(mail) {
219                                let re_string = format!(
220                                    "(<img[^>]*src[^>]*=[^>]*)(cid:{})([^>]*>)",
221                                    regex::escape(&cid)
222                                );
223                                match regex::Regex::new(&re_string) {
224                                    Ok(re) => {
225                                        self.html = re
226                                            .replace_all(
227                                                &self.html,
228                                                format!("${{1}}{replacement}${{3}}").as_str(),
229                                            )
230                                            .as_ref()
231                                            .to_string()
232                                    }
233                                    Err(e) => warn!(
234                                        context,
235                                        "Cannot create regex for cid: {} throws {}", re_string, e
236                                    ),
237                                }
238                            }
239                        }
240                    }
241                }
242                Ok(())
243            }
244        }
245    }
246}
247
248/// Convert a mime part to a data: url as defined in [RFC 2397](https://tools.ietf.org/html/rfc2397).
249fn mimepart_to_data_url(mail: &mailparse::ParsedMail<'_>) -> Result<String> {
250    let data = mail.get_body_raw()?;
251    let data = base64::engine::general_purpose::STANDARD.encode(data);
252    Ok(format!("data:{};base64,{}", mail.ctype.mimetype, data))
253}
254
255impl MsgId {
256    /// Get HTML by database message id.
257    /// This requires `mime_headers` field to be set for the message;
258    /// this is the case at least when `Message.has_html()` returns true
259    /// (we do not save raw mime unconditionally in the database to save space).
260    /// The corresponding ffi-function is `dc_get_msg_html()`.
261    pub async fn get_html(self, context: &Context) -> Result<Option<String>> {
262        let rawmime = message::get_mime_headers(context, self).await?;
263
264        if !rawmime.is_empty() {
265            match HtmlMsgParser::from_bytes(context, &rawmime).await {
266                Err(err) => {
267                    warn!(context, "get_html: parser error: {:#}", err);
268                    Ok(None)
269                }
270                Ok((parser, _)) => Ok(Some(parser.html)),
271            }
272        } else {
273            warn!(context, "get_html: no mime for {}", self);
274            Ok(None)
275        }
276    }
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282    use crate::chat;
283    use crate::chat::{forward_msgs, save_msgs};
284    use crate::config::Config;
285    use crate::contact::ContactId;
286    use crate::message::{MessengerMessage, Viewtype};
287    use crate::receive_imf::receive_imf;
288    use crate::test_utils::{TestContext, TestContextManager};
289
290    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
291    async fn test_htmlparse_plain_unspecified() {
292        let t = TestContext::new().await;
293        let raw = include_bytes!("../test-data/message/text_plain_unspecified.eml");
294        let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
295        assert_eq!(
296            parser.html,
297            r#"<!DOCTYPE html>
298<html><head>
299<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
300<meta name="color-scheme" content="light dark" />
301</head><body>
302This message does not have Content-Type nor Subject.<br/>
303</body></html>
304"#
305        );
306    }
307
308    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
309    async fn test_htmlparse_plain_iso88591() {
310        let t = TestContext::new().await;
311        let raw = include_bytes!("../test-data/message/text_plain_iso88591.eml");
312        let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
313        assert_eq!(
314            parser.html,
315            r#"<!DOCTYPE html>
316<html><head>
317<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
318<meta name="color-scheme" content="light dark" />
319</head><body>
320message with a non-UTF-8 encoding: äöüßÄÖÜ<br/>
321</body></html>
322"#
323        );
324    }
325
326    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
327    async fn test_htmlparse_plain_flowed() {
328        let t = TestContext::new().await;
329        let raw = include_bytes!("../test-data/message/text_plain_flowed.eml");
330        let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
331        assert!(parser.plain.unwrap().flowed);
332        assert_eq!(
333            parser.html,
334            r#"<!DOCTYPE html>
335<html><head>
336<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
337<meta name="color-scheme" content="light dark" />
338</head><body>
339This line ends with a space and will be merged with the next one due to format=flowed.<br/>
340<br/>
341This line does not end with a space<br/>
342and will be wrapped as usual.<br/>
343</body></html>
344"#
345        );
346    }
347
348    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
349    async fn test_htmlparse_alt_plain() {
350        let t = TestContext::new().await;
351        let raw = include_bytes!("../test-data/message/text_alt_plain.eml");
352        let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
353        assert_eq!(
354            parser.html,
355            r#"<!DOCTYPE html>
356<html><head>
357<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
358<meta name="color-scheme" content="light dark" />
359</head><body>
360mime-modified should not be set set as there is no html and no special stuff;<br/>
361although not being a delta-message.<br/>
362test some special html-characters as &lt; &gt; and &amp; but also &quot; and &#x27; :)<br/>
363</body></html>
364"#
365        );
366    }
367
368    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
369    async fn test_htmlparse_html() {
370        let t = TestContext::new().await;
371        let raw = include_bytes!("../test-data/message/text_html.eml");
372        let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
373
374        // on windows, `\r\n` linends are returned from mimeparser,
375        // however, rust multiline-strings use just `\n`;
376        // therefore, we just remove `\r` before comparison.
377        assert_eq!(
378            parser.html.replace('\r', ""),
379            r##"
380<html>
381  <p>mime-modified <b>set</b>; simplify is always regarded as lossy.</p>
382</html>"##
383        );
384    }
385
386    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
387    async fn test_htmlparse_alt_html() {
388        let t = TestContext::new().await;
389        let raw = include_bytes!("../test-data/message/text_alt_html.eml");
390        let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
391        assert_eq!(
392            parser.html.replace('\r', ""), // see comment in test_htmlparse_html()
393            r##"<html>
394  <p>mime-modified <b>set</b>; simplify is always regarded as lossy.</p>
395</html>
396"##
397        );
398    }
399
400    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
401    async fn test_htmlparse_alt_plain_html() {
402        let t = TestContext::new().await;
403        let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
404        let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
405        assert_eq!(
406            parser.html.replace('\r', ""), // see comment in test_htmlparse_html()
407            r##"<html>
408  <p>
409    this is <b>html</b>
410  </p>
411</html>
412"##
413        );
414    }
415
416    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
417    async fn test_htmlparse_apple_cid_jpg() {
418        // load raw mime html-data with related image-part (cid:)
419        // and make sure, Content-Id has angle-brackets that are removed correctly.
420        let t = TestContext::new().await;
421        let raw = include_bytes!("../test-data/message/apple_cid_jpg.eml");
422        let test = String::from_utf8_lossy(raw);
423        assert!(test.contains("Content-Id: <8AE052EF-BC90-486F-BB78-58D3590308EC@fritz.box>"));
424        assert!(test.contains("cid:8AE052EF-BC90-486F-BB78-58D3590308EC@fritz.box"));
425        assert!(test.find("data:").is_none());
426
427        // parsing converts cid: to data:
428        let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
429        assert!(parser.html.contains("<html>"));
430        assert!(!parser.html.contains("Content-Id:"));
431        assert!(parser.html.contains("data:image/jpeg;base64,/9j/4AAQ"));
432        assert!(!parser.html.contains("cid:"));
433    }
434
435    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
436    async fn test_get_html_invalid_msgid() {
437        let t = TestContext::new().await;
438        let msg_id = MsgId::new(100);
439        assert!(msg_id.get_html(&t).await.is_err())
440    }
441
442    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
443    async fn test_html_forwarding() {
444        // alice receives a non-delta html-message
445        let mut tcm = TestContextManager::new();
446        let alice = &tcm.alice().await;
447        let chat = alice
448            .create_chat_with_contact("", "sender@testrun.org")
449            .await;
450        let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
451        receive_imf(alice, raw, false).await.unwrap();
452        let msg = alice.get_last_msg_in(chat.get_id()).await;
453        assert_ne!(msg.get_from_id(), ContactId::SELF);
454        assert_eq!(msg.is_dc_message, MessengerMessage::No);
455        assert!(!msg.is_forwarded());
456        assert!(msg.get_text().contains("this is plain"));
457        assert!(msg.has_html());
458        let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
459        assert!(html.contains("this is <b>html</b>"));
460
461        // alice: create chat with bob and forward received html-message there
462        let chat = alice.create_chat_with_contact("", "bob@example.net").await;
463        forward_msgs(alice, &[msg.get_id()], chat.get_id())
464            .await
465            .unwrap();
466        let msg = alice.get_last_msg_in(chat.get_id()).await;
467        assert_eq!(msg.get_from_id(), ContactId::SELF);
468        assert_eq!(msg.is_dc_message, MessengerMessage::Yes);
469        assert!(msg.is_forwarded());
470        assert!(msg.get_text().contains("this is plain"));
471        assert!(msg.has_html());
472        let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
473        assert!(html.contains("this is <b>html</b>"));
474
475        // bob: check that bob also got the html-part of the forwarded message
476        let bob = &tcm.bob().await;
477        let chat = bob.create_chat_with_contact("", "alice@example.org").await;
478        let msg = bob.recv_msg(&alice.pop_sent_msg().await).await;
479        assert_eq!(chat.id, msg.chat_id);
480        assert_ne!(msg.get_from_id(), ContactId::SELF);
481        assert_eq!(msg.is_dc_message, MessengerMessage::Yes);
482        assert!(msg.is_forwarded());
483        assert!(msg.get_text().contains("this is plain"));
484        assert!(msg.has_html());
485        let html = msg.get_id().get_html(bob).await.unwrap().unwrap();
486        assert!(html.contains("this is <b>html</b>"));
487    }
488
489    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
490    async fn test_html_save_msg() -> Result<()> {
491        // Alice receives a non-delta html-message
492        let alice = TestContext::new_alice().await;
493        let chat = alice
494            .create_chat_with_contact("", "sender@testrun.org")
495            .await;
496        let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
497        receive_imf(&alice, raw, false).await?;
498        let msg = alice.get_last_msg_in(chat.get_id()).await;
499
500        // Alice saves the message
501        let self_chat = alice.get_self_chat().await;
502        save_msgs(&alice, &[msg.id]).await?;
503        let saved_msg = alice.get_last_msg_in(self_chat.get_id()).await;
504        assert_ne!(saved_msg.id, msg.id);
505        assert_eq!(
506            saved_msg.get_original_msg_id(&alice).await?.unwrap(),
507            msg.id
508        );
509        assert!(!saved_msg.is_forwarded()); // UI should not flag "saved messages" as "forwarded"
510        assert_ne!(saved_msg.get_from_id(), ContactId::SELF);
511        assert_eq!(saved_msg.get_from_id(), msg.get_from_id());
512        assert_eq!(saved_msg.is_dc_message, MessengerMessage::No);
513        assert!(saved_msg.get_text().contains("this is plain"));
514        assert!(saved_msg.has_html());
515        let html = saved_msg.get_id().get_html(&alice).await?.unwrap();
516        assert!(html.contains("this is <b>html</b>"));
517
518        Ok(())
519    }
520
521    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
522    async fn test_html_forwarding_encrypted() {
523        let mut tcm = TestContextManager::new();
524        // Alice receives a non-delta html-message
525        // (`ShowEmails=AcceptedContacts` lets Alice actually receive non-delta messages for known
526        // contacts, the contact is marked as known by creating a chat using `chat_with_contact()`)
527        let alice = &tcm.alice().await;
528        alice
529            .set_config(Config::ShowEmails, Some("1"))
530            .await
531            .unwrap();
532        let chat = alice
533            .create_chat_with_contact("", "sender@testrun.org")
534            .await;
535        let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
536        receive_imf(alice, raw, false).await.unwrap();
537        let msg = alice.get_last_msg_in(chat.get_id()).await;
538
539        // forward the message to saved-messages,
540        // this will encrypt the message as new_alice() has set up keys
541        let chat = alice.get_self_chat().await;
542        forward_msgs(alice, &[msg.get_id()], chat.get_id())
543            .await
544            .unwrap();
545        let msg = alice.pop_sent_msg().await;
546
547        // receive the message on another device
548        let alice = &tcm.alice().await;
549        alice
550            .set_config(Config::ShowEmails, Some("0"))
551            .await
552            .unwrap();
553        let msg = alice.recv_msg(&msg).await;
554        assert_eq!(msg.chat_id, alice.get_self_chat().await.id);
555        assert_eq!(msg.get_from_id(), ContactId::SELF);
556        assert_eq!(msg.is_dc_message, MessengerMessage::Yes);
557        assert!(msg.get_showpadlock());
558        assert!(msg.is_forwarded());
559        assert!(msg.get_text().contains("this is plain"));
560        assert!(msg.has_html());
561        let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
562        assert!(html.contains("this is <b>html</b>"));
563    }
564
565    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
566    async fn test_set_html() {
567        let mut tcm = TestContextManager::new();
568        let alice = &tcm.alice().await;
569        let bob = &tcm.bob().await;
570
571        // alice sends a message with html-part to bob
572        let chat_id = alice.create_chat(bob).await.id;
573        let mut msg = Message::new_text("plain text".to_string());
574        msg.set_html(Some("<b>html</b> text".to_string()));
575        assert!(msg.mime_modified);
576        chat::send_msg(alice, chat_id, &mut msg).await.unwrap();
577
578        // check the message is written correctly to alice's db
579        let msg = alice.get_last_msg_in(chat_id).await;
580        assert_eq!(msg.get_text(), "plain text");
581        assert!(!msg.is_forwarded());
582        assert!(msg.mime_modified);
583        let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
584        assert!(html.contains("<b>html</b> text"));
585
586        // let bob receive the message
587        let chat_id = bob.create_chat(alice).await.id;
588        let msg = bob.recv_msg(&alice.pop_sent_msg().await).await;
589        assert_eq!(msg.chat_id, chat_id);
590        assert_eq!(msg.get_text(), "plain text");
591        assert!(!msg.is_forwarded());
592        assert!(msg.mime_modified);
593        let html = msg.get_id().get_html(bob).await.unwrap().unwrap();
594        assert!(html.contains("<b>html</b> text"));
595    }
596
597    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
598    async fn test_cp1252_html() -> Result<()> {
599        let t = TestContext::new_alice().await;
600        receive_imf(
601            &t,
602            include_bytes!("../test-data/message/cp1252-html.eml"),
603            false,
604        )
605        .await?;
606        let msg = t.get_last_msg().await;
607        assert_eq!(msg.viewtype, Viewtype::Text);
608        assert!(msg.text.contains("foo bar ä ö ü ß"));
609        assert!(msg.has_html());
610        let html = msg.get_id().get_html(&t).await?.unwrap();
611        println!("{html}");
612        assert!(html.contains("foo bar ä ö ü ß"));
613        Ok(())
614    }
615}