deltachat/
simplify.rs

1//! # Simplify incoming plaintext.
2use crate::tools::IsNoneOrEmpty;
3
4/// Protects lines starting with `--` against being treated as a footer.
5/// for that, we insert a ZERO WIDTH SPACE (ZWSP, 0x200B);
6/// this should be invisible on most systems and there is no need to unescape it again
7/// (which won't be done by non-deltas anyway).
8///
9/// This escapes a bit more than actually needed by delta (e.g. also lines as "-- footer"),
10/// but for non-delta-compatibility, that seems to be better.
11/// (to be only compatible with delta, only "[\r\n|\n]-- {0,2}[\r\n|\n]" needs to be replaced)
12#[expect(clippy::arithmetic_side_effects)]
13pub fn escape_message_footer_marks(text: &str) -> String {
14    if let Some(text) = text.strip_prefix("--") {
15        "-\u{200B}-".to_string() + &text.replace("\n--", "\n-\u{200B}-")
16    } else {
17        text.replace("\n--", "\n-\u{200B}-")
18    }
19}
20
21/// Remove standard (RFC 3676, §4.3) footer if it is found.
22/// Returns `(lines, footer_lines)` tuple;
23/// `footer_lines` is set to `Some` if the footer was actually removed from `lines`
24/// (which is equal to the input array otherwise).
25#[expect(clippy::arithmetic_side_effects)]
26pub(crate) fn remove_message_footer<'a>(
27    lines: &'a [&str],
28) -> (&'a [&'a str], Option<&'a [&'a str]>) {
29    let mut nearly_standard_footer = None;
30    for (ix, &line) in lines.iter().enumerate() {
31        match line {
32            // some providers encode `-- ` to `-- =20` which results in `--  `
33            "-- " | "--  " => return (lines.get(..ix).unwrap_or(lines), lines.get(ix + 1..)),
34            // some providers encode `-- ` to `=2D-` which results in only `--`;
35            // use that only when no other footer is found
36            // and if the line before is empty and the line after is not empty
37            "--" => {
38                if (ix == 0 || lines.get(ix.saturating_sub(1)).is_none_or_empty())
39                    && !lines.get(ix + 1).is_none_or_empty()
40                {
41                    nearly_standard_footer = Some(ix);
42                }
43            }
44            _ => (),
45        }
46    }
47    if let Some(ix) = nearly_standard_footer {
48        return (lines.get(..ix).unwrap_or(lines), lines.get(ix + 1..));
49    }
50    (lines, None)
51}
52
53/// Remove nonstandard footer and a boolean indicating whether such footer was removed.
54/// Returns `(lines, is_footer_removed)` tuple;
55/// `is_footer_removed` is set to `true` if the footer was actually removed from `lines`
56/// (which is equal to the input array otherwise).
57fn remove_nonstandard_footer<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) {
58    for (ix, &line) in lines.iter().enumerate() {
59        if line == "--"
60            || line.starts_with("---")
61            || line.starts_with("_____")
62            || line.starts_with("=====")
63            || line.starts_with("*****")
64            || line.starts_with("~~~~~")
65        {
66            // `get` should always return `Some` here.
67            if let Some(lines) = lines.get(..ix) {
68                return (lines, true);
69            }
70        }
71    }
72    (lines, false)
73}
74
75/// Remove footers if any.
76/// This also makes all newlines "\n", but why not.
77pub(crate) fn remove_footers(msg: &str) -> String {
78    let lines = split_lines(msg);
79    let lines = remove_message_footer(&lines).0;
80    let lines = remove_nonstandard_footer(lines).0;
81    lines.join("\n")
82}
83
84pub(crate) fn split_lines(buf: &str) -> Vec<&str> {
85    buf.split('\n').collect()
86}
87
88/// Simplified text and some additional information gained from the input.
89#[derive(Debug, Default, PartialEq, Eq)]
90pub(crate) struct SimplifiedText {
91    /// The text itself.
92    pub text: String,
93
94    /// True if the message is forwarded.
95    pub is_forwarded: bool,
96
97    /// True if nonstandard footer was removed
98    /// or if the message contains quotes other than `top_quote`.
99    pub is_cut: bool,
100
101    /// Top quote, if any.
102    pub top_quote: Option<String>,
103
104    /// Footer, if any.
105    pub footer: Option<String>,
106}
107
108pub(crate) fn simplify_quote(quote: &str) -> (String, bool) {
109    let quote_lines = split_lines(quote);
110    let (quote_lines, quote_footer_lines) = remove_message_footer(&quote_lines);
111    let is_cut = quote_footer_lines.is_some();
112
113    (render_message(quote_lines, false), is_cut)
114}
115
116/// Simplify message text for chat display.
117/// Remove quotes, signatures, trailing empty lines etc.
118pub(crate) fn simplify(mut input: String, is_chat_message: bool) -> SimplifiedText {
119    let mut is_cut = false;
120
121    input.retain(|c| c != '\r');
122    let lines = split_lines(&input);
123    let (lines, is_forwarded) = skip_forward_header(&lines);
124
125    let (lines, mut top_quote) = remove_top_quote(lines, is_chat_message);
126    let original_lines = &lines;
127    let (lines, footer_lines) = remove_message_footer(lines);
128    let footer = footer_lines.map(|footer_lines| render_message(footer_lines, false));
129
130    let text = if is_chat_message {
131        render_message(lines, false)
132    } else {
133        let (lines, has_nonstandard_footer) = remove_nonstandard_footer(lines);
134        let (lines, mut bottom_quote) = remove_bottom_quote(lines);
135
136        if top_quote.is_none() && bottom_quote.is_some() {
137            std::mem::swap(&mut top_quote, &mut bottom_quote);
138        }
139
140        if lines.iter().all(|it| it.trim().is_empty()) {
141            render_message(original_lines, false)
142        } else {
143            is_cut = is_cut || has_nonstandard_footer || bottom_quote.is_some();
144            render_message(lines, has_nonstandard_footer || bottom_quote.is_some())
145        }
146    };
147
148    if !is_chat_message {
149        top_quote = top_quote.map(|quote| {
150            let (quote, quote_cut) = simplify_quote(&quote);
151            is_cut |= quote_cut;
152            quote
153        });
154    }
155
156    SimplifiedText {
157        text,
158        is_forwarded,
159        is_cut,
160        top_quote,
161        footer,
162    }
163}
164
165/// Skips "forwarded message" header.
166/// Returns message body lines and a boolean indicating whether
167/// a message is forwarded or not.
168fn skip_forward_header<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) {
169    match lines {
170        [
171            "---------- Forwarded message ----------",
172            first_line,
173            "",
174            rest @ ..,
175        ] if first_line.starts_with("From: ") => (rest, true),
176        _ => (lines, false),
177    }
178}
179
180#[expect(clippy::arithmetic_side_effects)]
181fn remove_bottom_quote<'a>(lines: &'a [&str]) -> (&'a [&'a str], Option<String>) {
182    let mut first_quoted_line = lines.len();
183    let mut last_quoted_line = None;
184    for (l, line) in lines.iter().enumerate().rev() {
185        if is_plain_quote(line) {
186            if last_quoted_line.is_none() {
187                first_quoted_line = l + 1;
188            }
189            last_quoted_line = Some(l)
190        } else if !is_empty_line(line) {
191            break;
192        }
193    }
194    if let Some(mut l_last) = last_quoted_line {
195        let quoted_text = lines
196            .iter()
197            .take(first_quoted_line)
198            .skip(l_last)
199            .map(|s| {
200                s.strip_prefix('>')
201                    .map_or(*s, |u| u.strip_prefix(' ').unwrap_or(u))
202            })
203            .collect::<Vec<&str>>()
204            .join("\n");
205        if l_last > 1
206            && let Some(line) = lines.get(l_last - 1)
207            && is_empty_line(line)
208        {
209            l_last -= 1
210        }
211        if l_last > 1
212            && let Some(line) = lines.get(l_last - 1)
213            && is_quoted_headline(line)
214        {
215            l_last -= 1
216        }
217        (lines.get(..l_last).unwrap_or(lines), Some(quoted_text))
218    } else {
219        (lines, None)
220    }
221}
222
223#[expect(clippy::arithmetic_side_effects)]
224fn remove_top_quote<'a>(
225    lines: &'a [&str],
226    is_chat_message: bool,
227) -> (&'a [&'a str], Option<String>) {
228    let mut first_quoted_line = 0;
229    let mut last_quoted_line = None;
230    let mut has_quoted_headline = false;
231    for (l, line) in lines.iter().enumerate() {
232        if is_plain_quote(line) {
233            if last_quoted_line.is_none() {
234                first_quoted_line = l;
235            }
236            last_quoted_line = Some(l)
237        } else if !is_chat_message
238            && is_quoted_headline(line)
239            && !has_quoted_headline
240            && last_quoted_line.is_none()
241        {
242            has_quoted_headline = true
243        } else {
244            /* non-quoting line found */
245            break;
246        }
247    }
248    if let Some(last_quoted_line) = last_quoted_line {
249        (
250            lines.get(last_quoted_line + 1..).unwrap_or(lines),
251            Some(
252                lines
253                    .iter()
254                    .take(last_quoted_line + 1)
255                    .skip(first_quoted_line)
256                    .map(|s| {
257                        s.strip_prefix('>')
258                            .map_or(*s, |u| u.strip_prefix(' ').unwrap_or(u))
259                    })
260                    .collect::<Vec<&str>>()
261                    .join("\n"),
262            ),
263        )
264    } else {
265        (lines, None)
266    }
267}
268
269#[expect(clippy::arithmetic_side_effects)]
270fn render_message(lines: &[&str], is_cut_at_end: bool) -> String {
271    let mut ret = String::new();
272    /* we write empty lines only in case and non-empty line follows */
273    let mut pending_linebreaks = 0;
274    for line in lines {
275        if is_empty_line(line) {
276            pending_linebreaks += 1
277        } else {
278            if !ret.is_empty() {
279                if pending_linebreaks > 2 {
280                    pending_linebreaks = 2
281                }
282                while 0 != pending_linebreaks {
283                    ret += "\n";
284                    pending_linebreaks -= 1
285                }
286            }
287            // the incoming message might contain invalid UTF8
288            ret += line;
289            pending_linebreaks = 1
290        }
291    }
292    if is_cut_at_end && !ret.is_empty() {
293        ret += " [...]";
294    }
295    // redo escaping done by escape_message_footer_marks()
296    ret.replace('\u{200B}', "")
297}
298
299/// Returns true if the line contains only whitespace.
300fn is_empty_line(buf: &str) -> bool {
301    buf.chars().all(char::is_whitespace)
302    // for some time, this checked for `char <= ' '`,
303    // see discussion at: <https://github.com/deltachat/deltachat-core-rust/pull/402#discussion_r317062392>
304    // and <https://github.com/deltachat/deltachat-core-rust/pull/2104/files#r538973613>
305}
306
307fn is_quoted_headline(buf: &str) -> bool {
308    /* This function may be called for the line _directly_ before a quote.
309    The function checks if the line contains sth. like "On 01.02.2016, xy@z wrote:" in various languages.
310    - Currently, we simply check if the last character is a ':'.
311    - Checking for the existence of an email address may fail (headlines may show the user's name instead of the address) */
312
313    buf.len() <= 120 && buf.ends_with(':')
314}
315
316fn is_plain_quote(buf: &str) -> bool {
317    buf.starts_with('>')
318}
319
320#[cfg(test)]
321mod tests {
322    use proptest::prelude::*;
323
324    use super::*;
325
326    proptest! {
327        #[test]
328        // proptest does not support [[:graphical:][:space:]] regex.
329        fn test_simplify_plain_text_fuzzy(input in "[!-~\t \n]+") {
330            let SimplifiedText {
331                text,
332                ..
333            } = simplify(input, true);
334            assert!(text.split('\n').all(|s| s != "-- "));
335        }
336    }
337
338    #[test]
339    fn test_dont_remove_whole_message() {
340        let input = "\n------\nFailed\n------\n\nUh-oh, this workflow did not succeed!\n\nlots of other text".to_string();
341        let SimplifiedText {
342            text,
343            is_forwarded,
344            is_cut,
345            ..
346        } = simplify(input, false);
347        assert_eq!(
348            text,
349            "------\nFailed\n------\n\nUh-oh, this workflow did not succeed!\n\nlots of other text"
350        );
351        assert!(!is_forwarded);
352        assert!(!is_cut);
353    }
354
355    #[test]
356    fn test_chat_message() {
357        let input = "Hi! How are you?\n\n---\n\nI am good.\n-- \nSent with my Delta Chat Messenger: https://delta.chat".to_string();
358        let SimplifiedText {
359            text,
360            is_forwarded,
361            is_cut,
362            footer,
363            ..
364        } = simplify(input, true);
365        assert_eq!(text, "Hi! How are you?\n\n---\n\nI am good.");
366        assert!(!is_forwarded);
367        assert!(!is_cut);
368        assert_eq!(
369            footer.unwrap(),
370            "Sent with my Delta Chat Messenger: https://delta.chat"
371        );
372    }
373
374    #[test]
375    fn test_simplify_trim() {
376        let input = "line1\n\r\r\rline2".to_string();
377        let SimplifiedText {
378            text,
379            is_forwarded,
380            is_cut,
381            ..
382        } = simplify(input, false);
383
384        assert_eq!(text, "line1\nline2");
385        assert!(!is_forwarded);
386        assert!(!is_cut);
387    }
388
389    #[test]
390    fn test_simplify_forwarded_message() {
391        let input = "---------- Forwarded message ----------\r\nFrom: test@example.com\r\n\r\nForwarded message\r\n-- \r\nSignature goes here".to_string();
392        let SimplifiedText {
393            text,
394            is_forwarded,
395            is_cut,
396            footer,
397            ..
398        } = simplify(input, false);
399
400        assert_eq!(text, "Forwarded message");
401        assert!(is_forwarded);
402        assert!(!is_cut);
403        assert_eq!(footer.unwrap(), "Signature goes here");
404    }
405
406    #[test]
407    fn test_simplify_utilities() {
408        assert!(is_empty_line(" \t"));
409        assert!(is_empty_line(""));
410        assert!(is_empty_line(" \r"));
411        assert!(!is_empty_line(" x"));
412        assert!(is_plain_quote("> hello world"));
413        assert!(is_plain_quote(">>"));
414        assert!(!is_plain_quote("Life is pain"));
415        assert!(!is_plain_quote(""));
416    }
417
418    #[test]
419    fn test_is_quoted_headline() {
420        assert!(is_quoted_headline("On 2024-08-28, Bob wrote:"));
421        assert!(is_quoted_headline("Am 11. November 2024 schrieb Alice:"));
422        assert!(is_quoted_headline("Anonymous Longer Name a écrit:"));
423        assert!(is_quoted_headline("There is not really a pattern wrote:"));
424        assert!(is_quoted_headline(
425            "On Mon, 3 Jan, 2022 at 8:34 PM \"Anonymous Longer Name\" <anonymous-longer-name@example.com> wrote:"
426        ));
427        assert!(!is_quoted_headline(
428            "How are you? I just want to say that this line does not belong to the quote!"
429        ));
430        assert!(!is_quoted_headline(
431            "No quote headline as not ending with a colon"
432        ));
433        assert!(!is_quoted_headline(
434            "Even though this ends with a colon, \
435            this is no quote-headline as just too long for most cases of date+name+address. \
436            it's all heuristics only, it is expected to go wrong sometimes. there is always the 'Show full message' button:"
437        ));
438    }
439
440    #[test]
441    fn test_remove_top_quote() {
442        let (lines, top_quote) = remove_top_quote(&["> first", "> second"], true);
443        assert!(lines.is_empty());
444        assert_eq!(top_quote.unwrap(), "first\nsecond");
445
446        let (lines, top_quote) = remove_top_quote(&["> first", "> second", "not a quote"], true);
447        assert_eq!(lines, &["not a quote"]);
448        assert_eq!(top_quote.unwrap(), "first\nsecond");
449
450        let (lines, top_quote) = remove_top_quote(&["not a quote", "> first", "> second"], true);
451        assert_eq!(lines, &["not a quote", "> first", "> second"]);
452        assert!(top_quote.is_none());
453
454        let (lines, top_quote) = remove_top_quote(
455            &["On 2024-08-28, Bob wrote:", "> quote", "not a quote"],
456            false,
457        );
458        assert_eq!(lines, &["not a quote"]);
459        assert_eq!(top_quote.unwrap(), "quote");
460
461        let (lines, top_quote) = remove_top_quote(
462            &["On 2024-08-28, Bob wrote:", "> quote", "not a quote"],
463            true,
464        );
465        assert_eq!(
466            lines,
467            &["On 2024-08-28, Bob wrote:", "> quote", "not a quote"]
468        );
469        assert!(top_quote.is_none());
470    }
471
472    #[test]
473    fn test_escape_message_footer_marks() {
474        let esc = escape_message_footer_marks("--\n--text --in line");
475        assert_eq!(esc, "-\u{200B}-\n-\u{200B}-text --in line");
476
477        let esc = escape_message_footer_marks("--\r\n--text");
478        assert_eq!(esc, "-\u{200B}-\r\n-\u{200B}-text");
479    }
480
481    #[test]
482    fn test_remove_message_footer() {
483        let input = "text\n--\nno footer".to_string();
484        let SimplifiedText {
485            text,
486            is_cut,
487            footer,
488            ..
489        } = simplify(input, true);
490        assert_eq!(text, "text\n--\nno footer");
491        assert_eq!(footer, None);
492        assert!(!is_cut);
493
494        let input = "text\n\n--\n\nno footer".to_string();
495        let SimplifiedText {
496            text,
497            is_cut,
498            footer,
499            ..
500        } = simplify(input, true);
501        assert_eq!(text, "text\n\n--\n\nno footer");
502        assert_eq!(footer, None);
503        assert!(!is_cut);
504
505        let input = "text\n\n-- no footer\n\n".to_string();
506        let SimplifiedText { text, footer, .. } = simplify(input, true);
507        assert_eq!(text, "text\n\n-- no footer");
508        assert_eq!(footer, None);
509
510        let input = "text\n\n--\nno footer\n-- \nfooter".to_string();
511        let SimplifiedText {
512            text,
513            is_cut,
514            footer,
515            ..
516        } = simplify(input, true);
517        assert_eq!(text, "text\n\n--\nno footer");
518        assert!(!is_cut);
519        assert_eq!(footer.unwrap(), "footer");
520
521        let input = "text\n\n--\ntreated as footer when unescaped".to_string();
522        let SimplifiedText {
523            text,
524            is_cut,
525            footer,
526            ..
527        } = simplify(input.clone(), true);
528        assert_eq!(text, "text"); // see remove_message_footer() for some explanations
529        assert!(!is_cut);
530        assert_eq!(footer.unwrap(), "treated as footer when unescaped");
531        let escaped = escape_message_footer_marks(&input);
532        let SimplifiedText {
533            text,
534            is_cut,
535            footer,
536            ..
537        } = simplify(escaped, true);
538        assert_eq!(text, "text\n\n--\ntreated as footer when unescaped");
539        assert!(!is_cut);
540        assert_eq!(footer, None);
541
542        // Nonstandard footer sent by <https://siju.es/>
543        let input = "Message text here\n---Desde mi teléfono con SIJÚ\n\nQuote here".to_string();
544        let SimplifiedText {
545            text,
546            is_cut,
547            footer,
548            ..
549        } = simplify(input.clone(), false);
550        assert_eq!(text, "Message text here [...]");
551        assert!(is_cut);
552        assert_eq!(footer, None);
553        let SimplifiedText {
554            text,
555            is_cut,
556            footer,
557            ..
558        } = simplify(input.clone(), true);
559        assert_eq!(text, input);
560        assert!(!is_cut);
561        assert_eq!(footer, None);
562
563        let input = "--\ntreated as footer when unescaped".to_string();
564        let SimplifiedText {
565            text,
566            is_cut,
567            footer,
568            ..
569        } = simplify(input.clone(), true);
570        assert_eq!(text, ""); // see remove_message_footer() for some explanations
571        assert!(!is_cut);
572        assert_eq!(footer.unwrap(), "treated as footer when unescaped");
573
574        let escaped = escape_message_footer_marks(&input);
575        let SimplifiedText {
576            text,
577            is_cut,
578            footer,
579            ..
580        } = simplify(escaped, true);
581        assert_eq!(text, "--\ntreated as footer when unescaped");
582        assert!(!is_cut);
583        assert_eq!(footer, None);
584    }
585}