deltachat/
html.rs

1//! # Get message as HTML.
2//!
3//! Use `Message.has_html()` to check if the UI shall render a
4//! corresponding button and `MsgId.get_html()` to get the full message.
5//!
6//! Even when the original mime-message is not HTML,
7//! `MsgId.get_html()` will return HTML -
8//! this allows nice quoting, handling linebreaks properly etc.
9
10use std::mem;
11
12use anyhow::{Context as _, Result};
13use base64::Engine as _;
14use mailparse::ParsedContentType;
15use mime::Mime;
16
17use crate::context::Context;
18use crate::headerdef::{HeaderDef, HeaderDefMap};
19use crate::log::warn;
20use crate::message::{self, Message, MsgId};
21use crate::mimeparser::parse_message_id;
22use crate::param::Param::SendHtml;
23use crate::plaintext::PlainText;
24
25impl Message {
26    /// Check if the message can be retrieved as HTML.
27    /// Typically, this is the case, when the mime structure of a Message is modified,
28    /// meaning that some text is cut or the original message
29    /// is in HTML and `simplify()` may hide some maybe important information.
30    /// The corresponding ffi-function is `dc_msg_has_html()`.
31    /// To get the HTML-code of the message, use `MsgId.get_html()`.
32    pub fn has_html(&self) -> bool {
33        self.mime_modified
34    }
35
36    /// Set HTML-part part of a message that is about to be sent.
37    /// The HTML-part is written to the database before sending and
38    /// used as the `text/html` part in the MIME-structure.
39    ///
40    /// Received HTML parts are handled differently,
41    /// they are saved together with the whole MIME-structure
42    /// in `mime_headers` and the HTML-part is extracted using `MsgId::get_html()`.
43    /// (To underline this asynchronicity, we are using the wording "SendHtml")
44    pub fn set_html(&mut self, html: Option<String>) {
45        if let Some(html) = html {
46            self.param.set(SendHtml, html);
47            self.mime_modified = true;
48        } else {
49            self.param.remove(SendHtml);
50            self.mime_modified = false;
51        }
52    }
53}
54
55/// Type defining a rough mime-type.
56/// This is mainly useful on iterating
57/// to decide whether a mime-part has subtypes.
58enum MimeMultipartType {
59    Multiple,
60    Single,
61    Message,
62}
63
64/// Function takes a content type from a ParsedMail structure
65/// and checks and returns the rough mime-type.
66fn get_mime_multipart_type(ctype: &ParsedContentType) -> MimeMultipartType {
67    let mimetype = ctype.mimetype.to_lowercase();
68    if mimetype.starts_with("multipart") && ctype.params.contains_key("boundary") {
69        MimeMultipartType::Multiple
70    } else if mimetype == "message/rfc822" {
71        MimeMultipartType::Message
72    } else {
73        MimeMultipartType::Single
74    }
75}
76
77/// HtmlMsgParser converts a mime-message to HTML.
78#[derive(Debug)]
79struct HtmlMsgParser {
80    pub html: String,
81    pub plain: Option<PlainText>,
82    pub(crate) msg_html: String,
83}
84
85impl HtmlMsgParser {
86    /// Function takes a raw mime-message string,
87    /// searches for the main-text part
88    /// and returns that as parser.html
89    pub async fn from_bytes<'a>(
90        context: &Context,
91        rawmime: &'a [u8],
92    ) -> Result<(Self, mailparse::ParsedMail<'a>)> {
93        let mut parser = HtmlMsgParser {
94            html: "".to_string(),
95            plain: None,
96            msg_html: "".to_string(),
97        };
98
99        let parsedmail = mailparse::parse_mail(rawmime).context("Failed to parse mail")?;
100
101        parser.collect_texts_recursive(context, &parsedmail).await?;
102
103        if parser.html.is_empty() {
104            if let Some(plain) = &parser.plain {
105                parser.html = plain.to_html();
106            }
107        } else {
108            parser.cid_to_data_recursive(context, &parsedmail).await?;
109        }
110        parser.html += &mem::take(&mut parser.msg_html);
111        Ok((parser, parsedmail))
112    }
113
114    /// Function iterates over all mime-parts
115    /// and searches for text/plain and text/html parts and saves the
116    /// first one found.
117    /// in the corresponding structure fields.
118    ///
119    /// Usually, there is at most one plain-text and one HTML-text part,
120    /// multiple plain-text parts might be used for mailinglist-footers,
121    /// therefore we use the first one.
122    async fn collect_texts_recursive<'a>(
123        &'a mut self,
124        context: &'a Context,
125        mail: &'a mailparse::ParsedMail<'a>,
126    ) -> Result<()> {
127        match get_mime_multipart_type(&mail.ctype) {
128            MimeMultipartType::Multiple => {
129                for cur_data in &mail.subparts {
130                    Box::pin(self.collect_texts_recursive(context, cur_data)).await?
131                }
132                Ok(())
133            }
134            MimeMultipartType::Message => {
135                let raw = mail.get_body_raw()?;
136                if raw.is_empty() {
137                    return Ok(());
138                }
139                let (parser, mail) = Box::pin(HtmlMsgParser::from_bytes(context, &raw)).await?;
140                if !parser.html.is_empty() {
141                    let mut text = "\r\n\r\n".to_string();
142                    for h in mail.headers {
143                        let key = h.get_key();
144                        if matches!(
145                            key.to_lowercase().as_str(),
146                            "date"
147                                | "from"
148                                | "sender"
149                                | "reply-to"
150                                | "to"
151                                | "cc"
152                                | "bcc"
153                                | "subject"
154                        ) {
155                            text += &format!("{key}: {}\r\n", h.get_value());
156                        }
157                    }
158                    text += "\r\n";
159                    self.msg_html += &PlainText {
160                        text,
161                        flowed: false,
162                        delsp: false,
163                    }
164                    .to_html();
165                    self.msg_html += &parser.html;
166                }
167                Ok(())
168            }
169            MimeMultipartType::Single => {
170                let mimetype = mail.ctype.mimetype.parse::<Mime>()?;
171                if mimetype == mime::TEXT_HTML {
172                    if self.html.is_empty() {
173                        if let Ok(decoded_data) = mail.get_body() {
174                            self.html = decoded_data;
175                        }
176                    }
177                } else if mimetype == mime::TEXT_PLAIN && self.plain.is_none() {
178                    if let Ok(decoded_data) = mail.get_body() {
179                        self.plain = Some(PlainText {
180                            text: decoded_data,
181                            flowed: if let Some(format) = mail.ctype.params.get("format") {
182                                format.as_str().eq_ignore_ascii_case("flowed")
183                            } else {
184                                false
185                            },
186                            delsp: if let Some(delsp) = mail.ctype.params.get("delsp") {
187                                delsp.as_str().eq_ignore_ascii_case("yes")
188                            } else {
189                                false
190                            },
191                        });
192                    }
193                }
194                Ok(())
195            }
196        }
197    }
198
199    /// Replace cid:-protocol by the data:-protocol where appropriate.
200    /// This allows the final html-file to be self-contained.
201    async fn cid_to_data_recursive<'a>(
202        &'a mut self,
203        context: &'a Context,
204        mail: &'a mailparse::ParsedMail<'a>,
205    ) -> Result<()> {
206        match get_mime_multipart_type(&mail.ctype) {
207            MimeMultipartType::Multiple => {
208                for cur_data in &mail.subparts {
209                    Box::pin(self.cid_to_data_recursive(context, cur_data)).await?;
210                }
211                Ok(())
212            }
213            MimeMultipartType::Message => Ok(()),
214            MimeMultipartType::Single => {
215                let mimetype = mail.ctype.mimetype.parse::<Mime>()?;
216                if mimetype.type_() == mime::IMAGE {
217                    if let Some(cid) = mail.headers.get_header_value(HeaderDef::ContentId) {
218                        if let Ok(cid) = parse_message_id(&cid) {
219                            if let Ok(replacement) = mimepart_to_data_url(mail) {
220                                let re_string = format!(
221                                    "(<img[^>]*src[^>]*=[^>]*)(cid:{})([^>]*>)",
222                                    regex::escape(&cid)
223                                );
224                                match regex::Regex::new(&re_string) {
225                                    Ok(re) => {
226                                        self.html = re
227                                            .replace_all(
228                                                &self.html,
229                                                format!("${{1}}{replacement}${{3}}").as_str(),
230                                            )
231                                            .as_ref()
232                                            .to_string()
233                                    }
234                                    Err(e) => warn!(
235                                        context,
236                                        "Cannot create regex for cid: {} throws {}", re_string, e
237                                    ),
238                                }
239                            }
240                        }
241                    }
242                }
243                Ok(())
244            }
245        }
246    }
247}
248
249/// Convert a mime part to a data: url as defined in [RFC 2397](https://tools.ietf.org/html/rfc2397).
250fn mimepart_to_data_url(mail: &mailparse::ParsedMail<'_>) -> Result<String> {
251    let data = mail.get_body_raw()?;
252    let data = base64::engine::general_purpose::STANDARD.encode(data);
253    Ok(format!("data:{};base64,{}", mail.ctype.mimetype, data))
254}
255
256impl MsgId {
257    /// Get HTML by database message id.
258    /// This requires `mime_headers` field to be set for the message;
259    /// this is the case at least when `Message.has_html()` returns true
260    /// (we do not save raw mime unconditionally in the database to save space).
261    /// The corresponding ffi-function is `dc_get_msg_html()`.
262    pub async fn get_html(self, context: &Context) -> Result<Option<String>> {
263        let rawmime = message::get_mime_headers(context, self).await?;
264
265        if !rawmime.is_empty() {
266            match HtmlMsgParser::from_bytes(context, &rawmime).await {
267                Err(err) => {
268                    warn!(context, "get_html: parser error: {:#}", err);
269                    Ok(None)
270                }
271                Ok((parser, _)) => Ok(Some(parser.html)),
272            }
273        } else {
274            warn!(context, "get_html: no mime for {}", self);
275            Ok(None)
276        }
277    }
278}
279
280#[cfg(test)]
281mod tests {
282    use super::*;
283    use crate::chat;
284    use crate::chat::{forward_msgs, save_msgs};
285    use crate::config::Config;
286    use crate::contact::ContactId;
287    use crate::message::{MessengerMessage, Viewtype};
288    use crate::receive_imf::receive_imf;
289    use crate::test_utils::{TestContext, TestContextManager};
290
291    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
292    async fn test_htmlparse_plain_unspecified() {
293        let t = TestContext::new().await;
294        let raw = include_bytes!("../test-data/message/text_plain_unspecified.eml");
295        let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
296        assert_eq!(
297            parser.html,
298            r#"<!DOCTYPE html>
299<html><head>
300<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
301<meta name="color-scheme" content="light dark" />
302</head><body>
303This message does not have Content-Type nor Subject.<br/>
304</body></html>
305"#
306        );
307    }
308
309    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
310    async fn test_htmlparse_plain_iso88591() {
311        let t = TestContext::new().await;
312        let raw = include_bytes!("../test-data/message/text_plain_iso88591.eml");
313        let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
314        assert_eq!(
315            parser.html,
316            r#"<!DOCTYPE html>
317<html><head>
318<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
319<meta name="color-scheme" content="light dark" />
320</head><body>
321message with a non-UTF-8 encoding: äöüßÄÖÜ<br/>
322</body></html>
323"#
324        );
325    }
326
327    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
328    async fn test_htmlparse_plain_flowed() {
329        let t = TestContext::new().await;
330        let raw = include_bytes!("../test-data/message/text_plain_flowed.eml");
331        let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
332        assert!(parser.plain.unwrap().flowed);
333        assert_eq!(
334            parser.html,
335            r#"<!DOCTYPE html>
336<html><head>
337<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
338<meta name="color-scheme" content="light dark" />
339</head><body>
340This line ends with a space and will be merged with the next one due to format=flowed.<br/>
341<br/>
342This line does not end with a space<br/>
343and will be wrapped as usual.<br/>
344</body></html>
345"#
346        );
347    }
348
349    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
350    async fn test_htmlparse_alt_plain() {
351        let t = TestContext::new().await;
352        let raw = include_bytes!("../test-data/message/text_alt_plain.eml");
353        let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
354        assert_eq!(
355            parser.html,
356            r#"<!DOCTYPE html>
357<html><head>
358<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
359<meta name="color-scheme" content="light dark" />
360</head><body>
361mime-modified should not be set set as there is no html and no special stuff;<br/>
362although not being a delta-message.<br/>
363test some special html-characters as &lt; &gt; and &amp; but also &quot; and &#x27; :)<br/>
364</body></html>
365"#
366        );
367    }
368
369    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
370    async fn test_htmlparse_html() {
371        let t = TestContext::new().await;
372        let raw = include_bytes!("../test-data/message/text_html.eml");
373        let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
374
375        // on windows, `\r\n` linends are returned from mimeparser,
376        // however, rust multiline-strings use just `\n`;
377        // therefore, we just remove `\r` before comparison.
378        assert_eq!(
379            parser.html.replace('\r', ""),
380            r##"
381<html>
382  <p>mime-modified <b>set</b>; simplify is always regarded as lossy.</p>
383</html>"##
384        );
385    }
386
387    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
388    async fn test_htmlparse_alt_html() {
389        let t = TestContext::new().await;
390        let raw = include_bytes!("../test-data/message/text_alt_html.eml");
391        let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
392        assert_eq!(
393            parser.html.replace('\r', ""), // see comment in test_htmlparse_html()
394            r##"<html>
395  <p>mime-modified <b>set</b>; simplify is always regarded as lossy.</p>
396</html>
397"##
398        );
399    }
400
401    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
402    async fn test_htmlparse_alt_plain_html() {
403        let t = TestContext::new().await;
404        let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
405        let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
406        assert_eq!(
407            parser.html.replace('\r', ""), // see comment in test_htmlparse_html()
408            r##"<html>
409  <p>
410    this is <b>html</b>
411  </p>
412</html>
413"##
414        );
415    }
416
417    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
418    async fn test_htmlparse_apple_cid_jpg() {
419        // load raw mime html-data with related image-part (cid:)
420        // and make sure, Content-Id has angle-brackets that are removed correctly.
421        let t = TestContext::new().await;
422        let raw = include_bytes!("../test-data/message/apple_cid_jpg.eml");
423        let test = String::from_utf8_lossy(raw);
424        assert!(test.contains("Content-Id: <8AE052EF-BC90-486F-BB78-58D3590308EC@fritz.box>"));
425        assert!(test.contains("cid:8AE052EF-BC90-486F-BB78-58D3590308EC@fritz.box"));
426        assert!(test.find("data:").is_none());
427
428        // parsing converts cid: to data:
429        let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
430        assert!(parser.html.contains("<html>"));
431        assert!(!parser.html.contains("Content-Id:"));
432        assert!(parser.html.contains(""));
433        assert!(!parser.html.contains("cid:"));
434    }
435
436    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
437    async fn test_get_html_invalid_msgid() {
438        let t = TestContext::new().await;
439        let msg_id = MsgId::new(100);
440        assert!(msg_id.get_html(&t).await.is_err())
441    }
442
443    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
444    async fn test_html_forwarding() {
445        // alice receives a non-delta html-message
446        let mut tcm = TestContextManager::new();
447        let alice = &tcm.alice().await;
448        let chat = alice
449            .create_chat_with_contact("", "sender@testrun.org")
450            .await;
451        let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
452        receive_imf(alice, raw, false).await.unwrap();
453        let msg = alice.get_last_msg_in(chat.get_id()).await;
454        assert_ne!(msg.get_from_id(), ContactId::SELF);
455        assert_eq!(msg.is_dc_message, MessengerMessage::No);
456        assert!(!msg.is_forwarded());
457        assert!(msg.get_text().contains("this is plain"));
458        assert!(msg.has_html());
459        let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
460        assert!(html.contains("this is <b>html</b>"));
461
462        // alice: create chat with bob and forward received html-message there
463        let chat = alice.create_chat_with_contact("", "bob@example.net").await;
464        forward_msgs(alice, &[msg.get_id()], chat.get_id())
465            .await
466            .unwrap();
467        let msg = alice.get_last_msg_in(chat.get_id()).await;
468        assert_eq!(msg.get_from_id(), ContactId::SELF);
469        assert_eq!(msg.is_dc_message, MessengerMessage::Yes);
470        assert!(msg.is_forwarded());
471        assert!(msg.get_text().contains("this is plain"));
472        assert!(msg.has_html());
473        let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
474        assert!(html.contains("this is <b>html</b>"));
475
476        // bob: check that bob also got the html-part of the forwarded message
477        let bob = &tcm.bob().await;
478        let chat = bob.create_chat_with_contact("", "alice@example.org").await;
479        let msg = bob.recv_msg(&alice.pop_sent_msg().await).await;
480        assert_eq!(chat.id, msg.chat_id);
481        assert_ne!(msg.get_from_id(), ContactId::SELF);
482        assert_eq!(msg.is_dc_message, MessengerMessage::Yes);
483        assert!(msg.is_forwarded());
484        assert!(msg.get_text().contains("this is plain"));
485        assert!(msg.has_html());
486        let html = msg.get_id().get_html(bob).await.unwrap().unwrap();
487        assert!(html.contains("this is <b>html</b>"));
488    }
489
490    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
491    async fn test_html_save_msg() -> Result<()> {
492        // Alice receives a non-delta html-message
493        let alice = TestContext::new_alice().await;
494        let chat = alice
495            .create_chat_with_contact("", "sender@testrun.org")
496            .await;
497        let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
498        receive_imf(&alice, raw, false).await?;
499        let msg = alice.get_last_msg_in(chat.get_id()).await;
500
501        // Alice saves the message
502        let self_chat = alice.get_self_chat().await;
503        save_msgs(&alice, &[msg.id]).await?;
504        let saved_msg = alice.get_last_msg_in(self_chat.get_id()).await;
505        assert_ne!(saved_msg.id, msg.id);
506        assert_eq!(
507            saved_msg.get_original_msg_id(&alice).await?.unwrap(),
508            msg.id
509        );
510        assert!(!saved_msg.is_forwarded()); // UI should not flag "saved messages" as "forwarded"
511        assert_ne!(saved_msg.get_from_id(), ContactId::SELF);
512        assert_eq!(saved_msg.get_from_id(), msg.get_from_id());
513        assert_eq!(saved_msg.is_dc_message, MessengerMessage::No);
514        assert!(saved_msg.get_text().contains("this is plain"));
515        assert!(saved_msg.has_html());
516        let html = saved_msg.get_id().get_html(&alice).await?.unwrap();
517        assert!(html.contains("this is <b>html</b>"));
518
519        Ok(())
520    }
521
522    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
523    async fn test_html_forwarding_encrypted() {
524        let mut tcm = TestContextManager::new();
525        // Alice receives a non-delta html-message
526        // (`ShowEmails=AcceptedContacts` lets Alice actually receive non-delta messages for known
527        // contacts, the contact is marked as known by creating a chat using `chat_with_contact()`)
528        let alice = &tcm.alice().await;
529        alice
530            .set_config(Config::ShowEmails, Some("1"))
531            .await
532            .unwrap();
533        let chat = alice
534            .create_chat_with_contact("", "sender@testrun.org")
535            .await;
536        let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
537        receive_imf(alice, raw, false).await.unwrap();
538        let msg = alice.get_last_msg_in(chat.get_id()).await;
539
540        // forward the message to saved-messages,
541        // this will encrypt the message as new_alice() has set up keys
542        let chat = alice.get_self_chat().await;
543        forward_msgs(alice, &[msg.get_id()], chat.get_id())
544            .await
545            .unwrap();
546        let msg = alice.pop_sent_msg().await;
547
548        // receive the message on another device
549        let alice = &tcm.alice().await;
550        alice
551            .set_config(Config::ShowEmails, Some("0"))
552            .await
553            .unwrap();
554        let msg = alice.recv_msg(&msg).await;
555        assert_eq!(msg.chat_id, alice.get_self_chat().await.id);
556        assert_eq!(msg.get_from_id(), ContactId::SELF);
557        assert_eq!(msg.is_dc_message, MessengerMessage::Yes);
558        assert!(msg.get_showpadlock());
559        assert!(msg.is_forwarded());
560        assert!(msg.get_text().contains("this is plain"));
561        assert!(msg.has_html());
562        let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
563        assert!(html.contains("this is <b>html</b>"));
564    }
565
566    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
567    async fn test_set_html() {
568        let mut tcm = TestContextManager::new();
569        let alice = &tcm.alice().await;
570        let bob = &tcm.bob().await;
571
572        // alice sends a message with html-part to bob
573        let chat_id = alice.create_chat(bob).await.id;
574        let mut msg = Message::new_text("plain text".to_string());
575        msg.set_html(Some("<b>html</b> text".to_string()));
576        assert!(msg.mime_modified);
577        chat::send_msg(alice, chat_id, &mut msg).await.unwrap();
578
579        // check the message is written correctly to alice's db
580        let msg = alice.get_last_msg_in(chat_id).await;
581        assert_eq!(msg.get_text(), "plain text");
582        assert!(!msg.is_forwarded());
583        assert!(msg.mime_modified);
584        let html = msg.get_id().get_html(alice).await.unwrap().unwrap();
585        assert!(html.contains("<b>html</b> text"));
586
587        // let bob receive the message
588        let chat_id = bob.create_chat(alice).await.id;
589        let msg = bob.recv_msg(&alice.pop_sent_msg().await).await;
590        assert_eq!(msg.chat_id, chat_id);
591        assert_eq!(msg.get_text(), "plain text");
592        assert!(!msg.is_forwarded());
593        assert!(msg.mime_modified);
594        let html = msg.get_id().get_html(bob).await.unwrap().unwrap();
595        assert!(html.contains("<b>html</b> text"));
596    }
597
598    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
599    async fn test_cp1252_html() -> Result<()> {
600        let t = TestContext::new_alice().await;
601        receive_imf(
602            &t,
603            include_bytes!("../test-data/message/cp1252-html.eml"),
604            false,
605        )
606        .await?;
607        let msg = t.get_last_msg().await;
608        assert_eq!(msg.viewtype, Viewtype::Text);
609        assert!(msg.text.contains("foo bar ä ö ü ß"));
610        assert!(msg.has_html());
611        let html = msg.get_id().get_html(&t).await?.unwrap();
612        println!("{html}");
613        assert!(html.contains("foo bar ä ö ü ß"));
614        Ok(())
615    }
616}