deltachat/
plaintext.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
//! Handle plain text together with some attributes.

use once_cell::sync::Lazy;

use crate::simplify::remove_message_footer;

/// Plaintext message body together with format=flowed attributes.
#[derive(Debug)]
pub struct PlainText {
    /// The text itself.
    pub text: String,

    /// Text may "flowed" as defined in [RFC 2646](https://tools.ietf.org/html/rfc2646).
    /// At a glance, that means, if a line ends with a space, it is merged with the next one
    /// and the first leading spaces is ignored
    /// (to allow lines starting with `>` that normally indicates a quote)
    pub flowed: bool,

    /// If set together with "flowed",
    /// The space indicating merging two lines is removed.
    pub delsp: bool,
}

impl PlainText {
    /// Convert plain text to HTML.
    /// The function handles quotes, links, fixed and floating text paragraphs.
    pub fn to_html(&self) -> String {
        static LINKIFY_MAIL_RE: Lazy<regex::Regex> =
            Lazy::new(|| regex::Regex::new(r"\b([\w.\-+]+@[\w.\-]+)\b").unwrap());

        static LINKIFY_URL_RE: Lazy<regex::Regex> = Lazy::new(|| {
            regex::Regex::new(r"\b((http|https|ftp|ftps):[\w.,:;$/@!?&%\-~=#+]+)").unwrap()
        });

        let lines: Vec<&str> = self.text.lines().collect();
        let (lines, _footer) = remove_message_footer(&lines);

        let mut ret = r#"<!DOCTYPE html>
<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="color-scheme" content="light dark" />
</head><body>
"#
        .to_string();

        for line in lines {
            let is_quote = line.starts_with('>');

            // we need to do html-entity-encoding after linkify, as otherwise encapsulated links
            // as <http://example.org> cannot be handled correctly
            // (they would become &lt;http://example.org&gt; where the trailing &gt; would become a valid url part).
            // to avoid double encoding, we escape our html-entities by \r that must not be used in the string elsewhere.
            let line = line.to_string().replace('\r', "");

            let mut line = LINKIFY_MAIL_RE
                .replace_all(&line, "\rLTa href=\rQUOTmailto:$1\rQUOT\rGT$1\rLT/a\rGT")
                .as_ref()
                .to_string();

            line = LINKIFY_URL_RE
                .replace_all(&line, "\rLTa href=\rQUOT$1\rQUOT\rGT$1\rLT/a\rGT")
                .as_ref()
                .to_string();

            // encode html-entities after linkify the raw string
            line = escaper::encode_minimal(&line);

            // make our escaped html-entities real after encoding all others
            line = line.replace("\rLT", "<");
            line = line.replace("\rGT", ">");
            line = line.replace("\rQUOT", "\"");

            if self.flowed {
                // flowed text as of RFC 3676 -
                // a leading space shall be removed
                // and is only there to allow > at the beginning of a line that is no quote.
                line = line.strip_prefix(' ').unwrap_or(&line).to_string();
                if is_quote {
                    line = "<em>".to_owned() + &line + "</em>";
                }

                // a trailing space indicates that the line can be merged with the next one;
                // for sake of simplicity, we skip merging for quotes (quotes may be combined with
                // delsp, so `> >` is different from `>>` etc. see RFC 3676 for details)
                if line.ends_with(' ') && !is_quote {
                    if self.delsp {
                        line.pop();
                    }
                } else {
                    line += "<br/>\n";
                }
            } else {
                // normal, fixed text
                if is_quote {
                    line = "<em>".to_owned() + &line + "</em>";
                }
                line += "<br/>\n";
            }

            let len_with_indentation = line.len();
            let line = line.trim_start_matches(' ');
            for _ in line.len()..len_with_indentation {
                ret += "&nbsp;";
            }
            ret += line;
        }
        ret += "</body></html>\n";
        ret
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_plain_to_html() {
        let html = PlainText {
            text: r##"line 1
line 2
line with https://link-mid-of-line.org and http://link-end-of-line.com/file?foo=bar%20
http://link-at-start-of-line.org
"##
            .to_string(),
            flowed: false,
            delsp: false,
        }
        .to_html();
        assert_eq!(
            html,
            r#"<!DOCTYPE html>
<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="color-scheme" content="light dark" />
</head><body>
line 1<br/>
line 2<br/>
line with <a href="https://link-mid-of-line.org">https://link-mid-of-line.org</a> and <a href="http://link-end-of-line.com/file?foo=bar%20">http://link-end-of-line.com/file?foo=bar%20</a><br/>
<a href="http://link-at-start-of-line.org">http://link-at-start-of-line.org</a><br/>
</body></html>
"#
        );
    }

    #[test]
    fn test_plain_remove_signature() {
        let html = PlainText {
            text: "Foo\nbar\n-- \nSignature here".to_string(),
            flowed: false,
            delsp: false,
        }
        .to_html();
        assert_eq!(
            html,
            r#"<!DOCTYPE html>
<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="color-scheme" content="light dark" />
</head><body>
Foo<br/>
bar<br/>
</body></html>
"#
        );
    }

    #[test]
    fn test_plain_to_html_encapsulated() {
        let html = PlainText {
            text: r#"line with <http://encapsulated.link/?foo=_bar> here!"#.to_string(),
            flowed: false,
            delsp: false,
        }
        .to_html();
        assert_eq!(
            html,
            r#"<!DOCTYPE html>
<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="color-scheme" content="light dark" />
</head><body>
line with &lt;<a href="http://encapsulated.link/?foo=_bar">http://encapsulated.link/?foo=_bar</a>&gt; here!<br/>
</body></html>
"#
        );
    }

    #[test]
    fn test_plain_to_html_nolink() {
        let html = PlainText {
            text: r#"line with nohttp://no.link here"#.to_string(),
            flowed: false,
            delsp: false,
        }
        .to_html();
        assert_eq!(
            html,
            r#"<!DOCTYPE html>
<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="color-scheme" content="light dark" />
</head><body>
line with nohttp://no.link here<br/>
</body></html>
"#
        );
    }

    #[test]
    fn test_plain_to_html_mailto() {
        let html = PlainText {
            text: r#"just an address: foo@bar.org another@one.de"#.to_string(),
            flowed: false,
            delsp: false,
        }
        .to_html();
        assert_eq!(
            html,
            r#"<!DOCTYPE html>
<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="color-scheme" content="light dark" />
</head><body>
just an address: <a href="mailto:foo@bar.org">foo@bar.org</a> <a href="mailto:another@one.de">another@one.de</a><br/>
</body></html>
"#
        );
    }

    #[test]
    fn test_plain_to_html_flowed() {
        let html = PlainText {
            text: "line \nstill line\n>quote \n>still quote\n >no quote".to_string(),
            flowed: true,
            delsp: false,
        }
        .to_html();
        assert_eq!(
            html,
            r#"<!DOCTYPE html>
<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="color-scheme" content="light dark" />
</head><body>
line still line<br/>
<em>&gt;quote </em><br/>
<em>&gt;still quote</em><br/>
&gt;no quote<br/>
</body></html>
"#
        );
    }

    #[test]
    fn test_plain_to_html_flowed_delsp() {
        let html = PlainText {
            text: "line \nstill line\n>quote \n>still quote\n >no quote".to_string(),
            flowed: true,
            delsp: true,
        }
        .to_html();
        assert_eq!(
            html,
            r#"<!DOCTYPE html>
<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="color-scheme" content="light dark" />
</head><body>
linestill line<br/>
<em>&gt;quote </em><br/>
<em>&gt;still quote</em><br/>
&gt;no quote<br/>
</body></html>
"#
        );
    }

    #[test]
    fn test_plain_to_html_fixed() {
        let html = PlainText {
            text: "line \nstill line\n>quote \n>still quote\n >no quote".to_string(),
            flowed: false,
            delsp: false,
        }
        .to_html();
        assert_eq!(
            html,
            r#"<!DOCTYPE html>
<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="color-scheme" content="light dark" />
</head><body>
line <br/>
still line<br/>
<em>&gt;quote </em><br/>
<em>&gt;still quote</em><br/>
&nbsp;&gt;no quote<br/>
</body></html>
"#
        );
    }

    #[test]
    fn test_plain_to_html_indentation() {
        let html = PlainText {
            text: "def foo():\n    pass\n\ndef bar(x):\n    return x + 5".to_string(),
            flowed: false,
            delsp: false,
        }
        .to_html();
        assert_eq!(
            html,
            r#"<!DOCTYPE html>
<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="color-scheme" content="light dark" />
</head><body>
def foo():<br/>
&nbsp;&nbsp;&nbsp;&nbsp;pass<br/>
<br/>
def bar(x):<br/>
&nbsp;&nbsp;&nbsp;&nbsp;return x + 5<br/>
</body></html>
"#
        );
    }
}