deltachat/
simplify.rs

1//! # Simplify incoming plaintext.
2use crate::tools::IsNoneOrEmpty;
3
4/// Protects lines starting with `--` against being treated as a footer.
5/// for that, we insert a ZERO WIDTH SPACE (ZWSP, 0x200B);
6/// this should be invisible on most systems and there is no need to unescape it again
7/// (which won't be done by non-deltas anyway).
8///
9/// This escapes a bit more than actually needed by delta (e.g. also lines as "-- footer"),
10/// but for non-delta-compatibility, that seems to be better.
11/// (to be only compatible with delta, only "[\r\n|\n]-- {0,2}[\r\n|\n]" needs to be replaced)
12#[expect(clippy::arithmetic_side_effects)]
13pub fn escape_message_footer_marks(text: &str) -> String {
14    if let Some(text) = text.strip_prefix("--") {
15        "-\u{200B}-".to_string() + &text.replace("\n--", "\n-\u{200B}-")
16    } else {
17        text.replace("\n--", "\n-\u{200B}-")
18    }
19}
20
21/// Remove standard (RFC 3676, §4.3) footer if it is found.
22/// Returns `(lines, footer_lines)` tuple;
23/// `footer_lines` is set to `Some` if the footer was actually removed from `lines`
24/// (which is equal to the input array otherwise).
25#[expect(clippy::arithmetic_side_effects)]
26pub(crate) fn remove_message_footer<'a>(
27    lines: &'a [&str],
28) -> (&'a [&'a str], Option<&'a [&'a str]>) {
29    let mut nearly_standard_footer = None;
30    for (ix, &line) in lines.iter().enumerate() {
31        match line {
32            // some providers encode `-- ` to `-- =20` which results in `--  `
33            "-- " | "--  " => return (lines.get(..ix).unwrap_or(lines), lines.get(ix + 1..)),
34            // some providers encode `-- ` to `=2D-` which results in only `--`;
35            // use that only when no other footer is found
36            // and if the line before is empty and the line after is not empty
37            "--" if (ix == 0 || lines.get(ix.saturating_sub(1)).is_none_or_empty())
38                && !lines.get(ix + 1).is_none_or_empty() =>
39            {
40                nearly_standard_footer = Some(ix);
41            }
42            _ => (),
43        }
44    }
45    if let Some(ix) = nearly_standard_footer {
46        return (lines.get(..ix).unwrap_or(lines), lines.get(ix + 1..));
47    }
48    (lines, None)
49}
50
51/// Remove nonstandard footer and a boolean indicating whether such footer was removed.
52/// Returns `(lines, is_footer_removed)` tuple;
53/// `is_footer_removed` is set to `true` if the footer was actually removed from `lines`
54/// (which is equal to the input array otherwise).
55fn remove_nonstandard_footer<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) {
56    for (ix, &line) in lines.iter().enumerate() {
57        if line == "--"
58            || line.starts_with("---")
59            || line.starts_with("_____")
60            || line.starts_with("=====")
61            || line.starts_with("*****")
62            || line.starts_with("~~~~~")
63        {
64            // `get` should always return `Some` here.
65            if let Some(lines) = lines.get(..ix) {
66                return (lines, true);
67            }
68        }
69    }
70    (lines, false)
71}
72
73/// Remove footers if any.
74/// This also makes all newlines "\n", but why not.
75pub(crate) fn remove_footers(msg: &str) -> String {
76    let lines = split_lines(msg);
77    let lines = remove_message_footer(&lines).0;
78    let lines = remove_nonstandard_footer(lines).0;
79    lines.join("\n")
80}
81
82pub(crate) fn split_lines(buf: &str) -> Vec<&str> {
83    buf.split('\n').collect()
84}
85
86/// Simplified text and some additional information gained from the input.
87#[derive(Debug, Default, PartialEq, Eq)]
88pub(crate) struct SimplifiedText {
89    /// The text itself.
90    pub text: String,
91
92    /// True if the message is forwarded.
93    pub is_forwarded: bool,
94
95    /// True if nonstandard footer was removed
96    /// or if the message contains quotes other than `top_quote`.
97    pub is_cut: bool,
98
99    /// Top quote, if any.
100    pub top_quote: Option<String>,
101
102    /// Footer, if any.
103    pub footer: Option<String>,
104}
105
106pub(crate) fn simplify_quote(quote: &str) -> (String, bool) {
107    let quote_lines = split_lines(quote);
108    let (quote_lines, quote_footer_lines) = remove_message_footer(&quote_lines);
109    let is_cut = quote_footer_lines.is_some();
110
111    (render_message(quote_lines, false), is_cut)
112}
113
114/// Simplify message text for chat display.
115/// Remove quotes, signatures, trailing empty lines etc.
116pub(crate) fn simplify(mut input: String, is_chat_message: bool) -> SimplifiedText {
117    let mut is_cut = false;
118
119    input.retain(|c| c != '\r');
120    let lines = split_lines(&input);
121    let (lines, is_forwarded) = skip_forward_header(&lines);
122
123    let (lines, mut top_quote) = remove_top_quote(lines, is_chat_message);
124    let original_lines = &lines;
125    let (lines, footer_lines) = remove_message_footer(lines);
126    let footer = footer_lines.map(|footer_lines| render_message(footer_lines, false));
127
128    let text = if is_chat_message {
129        render_message(lines, false)
130    } else {
131        let (lines, has_nonstandard_footer) = remove_nonstandard_footer(lines);
132        let (lines, mut bottom_quote) = remove_bottom_quote(lines);
133
134        if top_quote.is_none() && bottom_quote.is_some() {
135            std::mem::swap(&mut top_quote, &mut bottom_quote);
136        }
137
138        if lines.iter().all(|it| it.trim().is_empty()) {
139            render_message(original_lines, false)
140        } else {
141            is_cut = is_cut || has_nonstandard_footer || bottom_quote.is_some();
142            render_message(lines, has_nonstandard_footer || bottom_quote.is_some())
143        }
144    };
145
146    if !is_chat_message {
147        top_quote = top_quote.map(|quote| {
148            let (quote, quote_cut) = simplify_quote(&quote);
149            is_cut |= quote_cut;
150            quote
151        });
152    }
153
154    SimplifiedText {
155        text,
156        is_forwarded,
157        is_cut,
158        top_quote,
159        footer,
160    }
161}
162
163/// Skips "forwarded message" header.
164/// Returns message body lines and a boolean indicating whether
165/// a message is forwarded or not.
166fn skip_forward_header<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) {
167    match lines {
168        [
169            "---------- Forwarded message ----------",
170            first_line,
171            "",
172            rest @ ..,
173        ] if first_line.starts_with("From: ") => (rest, true),
174        _ => (lines, false),
175    }
176}
177
178#[expect(clippy::arithmetic_side_effects)]
179fn remove_bottom_quote<'a>(lines: &'a [&str]) -> (&'a [&'a str], Option<String>) {
180    let mut first_quoted_line = lines.len();
181    let mut last_quoted_line = None;
182    for (l, line) in lines.iter().enumerate().rev() {
183        if is_plain_quote(line) {
184            if last_quoted_line.is_none() {
185                first_quoted_line = l + 1;
186            }
187            last_quoted_line = Some(l)
188        } else if !is_empty_line(line) {
189            break;
190        }
191    }
192    if let Some(mut l_last) = last_quoted_line {
193        let quoted_text = lines
194            .iter()
195            .take(first_quoted_line)
196            .skip(l_last)
197            .map(|s| {
198                s.strip_prefix('>')
199                    .map_or(*s, |u| u.strip_prefix(' ').unwrap_or(u))
200            })
201            .collect::<Vec<&str>>()
202            .join("\n");
203        if l_last > 1
204            && let Some(line) = lines.get(l_last - 1)
205            && is_empty_line(line)
206        {
207            l_last -= 1
208        }
209        if l_last > 1
210            && let Some(line) = lines.get(l_last - 1)
211            && is_quoted_headline(line)
212        {
213            l_last -= 1
214        }
215        (lines.get(..l_last).unwrap_or(lines), Some(quoted_text))
216    } else {
217        (lines, None)
218    }
219}
220
221#[expect(clippy::arithmetic_side_effects)]
222fn remove_top_quote<'a>(
223    lines: &'a [&str],
224    is_chat_message: bool,
225) -> (&'a [&'a str], Option<String>) {
226    let mut first_quoted_line = 0;
227    let mut last_quoted_line = None;
228    let mut has_quoted_headline = false;
229    for (l, line) in lines.iter().enumerate() {
230        if is_plain_quote(line) {
231            if last_quoted_line.is_none() {
232                first_quoted_line = l;
233            }
234            last_quoted_line = Some(l)
235        } else if !is_chat_message
236            && is_quoted_headline(line)
237            && !has_quoted_headline
238            && last_quoted_line.is_none()
239        {
240            has_quoted_headline = true
241        } else {
242            /* non-quoting line found */
243            break;
244        }
245    }
246    if let Some(last_quoted_line) = last_quoted_line {
247        (
248            lines.get(last_quoted_line + 1..).unwrap_or(lines),
249            Some(
250                lines
251                    .iter()
252                    .take(last_quoted_line + 1)
253                    .skip(first_quoted_line)
254                    .map(|s| {
255                        s.strip_prefix('>')
256                            .map_or(*s, |u| u.strip_prefix(' ').unwrap_or(u))
257                    })
258                    .collect::<Vec<&str>>()
259                    .join("\n"),
260            ),
261        )
262    } else {
263        (lines, None)
264    }
265}
266
267#[expect(clippy::arithmetic_side_effects)]
268fn render_message(lines: &[&str], is_cut_at_end: bool) -> String {
269    let mut ret = String::new();
270    /* we write empty lines only in case and non-empty line follows */
271    let mut pending_linebreaks = 0;
272    for line in lines {
273        if is_empty_line(line) {
274            pending_linebreaks += 1
275        } else {
276            if !ret.is_empty() {
277                if pending_linebreaks > 2 {
278                    pending_linebreaks = 2
279                }
280                while 0 != pending_linebreaks {
281                    ret += "\n";
282                    pending_linebreaks -= 1
283                }
284            }
285            // the incoming message might contain invalid UTF8
286            ret += line;
287            pending_linebreaks = 1
288        }
289    }
290    if is_cut_at_end && !ret.is_empty() {
291        ret += " [...]";
292    }
293    // redo escaping done by escape_message_footer_marks()
294    ret.replace('\u{200B}', "")
295}
296
297/// Returns true if the line contains only whitespace.
298fn is_empty_line(buf: &str) -> bool {
299    buf.chars().all(char::is_whitespace)
300    // for some time, this checked for `char <= ' '`,
301    // see discussion at: <https://github.com/deltachat/deltachat-core-rust/pull/402#discussion_r317062392>
302    // and <https://github.com/deltachat/deltachat-core-rust/pull/2104/files#r538973613>
303}
304
305fn is_quoted_headline(buf: &str) -> bool {
306    /* This function may be called for the line _directly_ before a quote.
307    The function checks if the line contains sth. like "On 01.02.2016, xy@z wrote:" in various languages.
308    - Currently, we simply check if the last character is a ':'.
309    - Checking for the existence of an email address may fail (headlines may show the user's name instead of the address) */
310
311    buf.len() <= 120 && buf.ends_with(':')
312}
313
314fn is_plain_quote(buf: &str) -> bool {
315    buf.starts_with('>')
316}
317
318#[cfg(test)]
319mod tests {
320    use proptest::prelude::*;
321
322    use super::*;
323
324    proptest! {
325        #[test]
326        // proptest does not support [[:graphical:][:space:]] regex.
327        fn test_simplify_plain_text_fuzzy(input in "[!-~\t \n]+") {
328            let SimplifiedText {
329                text,
330                ..
331            } = simplify(input, true);
332            assert!(text.split('\n').all(|s| s != "-- "));
333        }
334    }
335
336    #[test]
337    fn test_dont_remove_whole_message() {
338        let input = "\n------\nFailed\n------\n\nUh-oh, this workflow did not succeed!\n\nlots of other text".to_string();
339        let SimplifiedText {
340            text,
341            is_forwarded,
342            is_cut,
343            ..
344        } = simplify(input, false);
345        assert_eq!(
346            text,
347            "------\nFailed\n------\n\nUh-oh, this workflow did not succeed!\n\nlots of other text"
348        );
349        assert!(!is_forwarded);
350        assert!(!is_cut);
351    }
352
353    #[test]
354    fn test_chat_message() {
355        let input = "Hi! How are you?\n\n---\n\nI am good.\n-- \nSent with my Delta Chat Messenger: https://delta.chat".to_string();
356        let SimplifiedText {
357            text,
358            is_forwarded,
359            is_cut,
360            footer,
361            ..
362        } = simplify(input, true);
363        assert_eq!(text, "Hi! How are you?\n\n---\n\nI am good.");
364        assert!(!is_forwarded);
365        assert!(!is_cut);
366        assert_eq!(
367            footer.unwrap(),
368            "Sent with my Delta Chat Messenger: https://delta.chat"
369        );
370    }
371
372    #[test]
373    fn test_simplify_trim() {
374        let input = "line1\n\r\r\rline2".to_string();
375        let SimplifiedText {
376            text,
377            is_forwarded,
378            is_cut,
379            ..
380        } = simplify(input, false);
381
382        assert_eq!(text, "line1\nline2");
383        assert!(!is_forwarded);
384        assert!(!is_cut);
385    }
386
387    #[test]
388    fn test_simplify_forwarded_message() {
389        let input = "---------- Forwarded message ----------\r\nFrom: test@example.com\r\n\r\nForwarded message\r\n-- \r\nSignature goes here".to_string();
390        let SimplifiedText {
391            text,
392            is_forwarded,
393            is_cut,
394            footer,
395            ..
396        } = simplify(input, false);
397
398        assert_eq!(text, "Forwarded message");
399        assert!(is_forwarded);
400        assert!(!is_cut);
401        assert_eq!(footer.unwrap(), "Signature goes here");
402    }
403
404    #[test]
405    fn test_simplify_utilities() {
406        assert!(is_empty_line(" \t"));
407        assert!(is_empty_line(""));
408        assert!(is_empty_line(" \r"));
409        assert!(!is_empty_line(" x"));
410        assert!(is_plain_quote("> hello world"));
411        assert!(is_plain_quote(">>"));
412        assert!(!is_plain_quote("Life is pain"));
413        assert!(!is_plain_quote(""));
414    }
415
416    #[test]
417    fn test_is_quoted_headline() {
418        assert!(is_quoted_headline("On 2024-08-28, Bob wrote:"));
419        assert!(is_quoted_headline("Am 11. November 2024 schrieb Alice:"));
420        assert!(is_quoted_headline("Anonymous Longer Name a écrit:"));
421        assert!(is_quoted_headline("There is not really a pattern wrote:"));
422        assert!(is_quoted_headline(
423            "On Mon, 3 Jan, 2022 at 8:34 PM \"Anonymous Longer Name\" <anonymous-longer-name@example.com> wrote:"
424        ));
425        assert!(!is_quoted_headline(
426            "How are you? I just want to say that this line does not belong to the quote!"
427        ));
428        assert!(!is_quoted_headline(
429            "No quote headline as not ending with a colon"
430        ));
431        assert!(!is_quoted_headline(
432            "Even though this ends with a colon, \
433            this is no quote-headline as just too long for most cases of date+name+address. \
434            it's all heuristics only, it is expected to go wrong sometimes. there is always the 'Show full message' button:"
435        ));
436    }
437
438    #[test]
439    fn test_remove_top_quote() {
440        let (lines, top_quote) = remove_top_quote(&["> first", "> second"], true);
441        assert!(lines.is_empty());
442        assert_eq!(top_quote.unwrap(), "first\nsecond");
443
444        let (lines, top_quote) = remove_top_quote(&["> first", "> second", "not a quote"], true);
445        assert_eq!(lines, &["not a quote"]);
446        assert_eq!(top_quote.unwrap(), "first\nsecond");
447
448        let (lines, top_quote) = remove_top_quote(&["not a quote", "> first", "> second"], true);
449        assert_eq!(lines, &["not a quote", "> first", "> second"]);
450        assert!(top_quote.is_none());
451
452        let (lines, top_quote) = remove_top_quote(
453            &["On 2024-08-28, Bob wrote:", "> quote", "not a quote"],
454            false,
455        );
456        assert_eq!(lines, &["not a quote"]);
457        assert_eq!(top_quote.unwrap(), "quote");
458
459        let (lines, top_quote) = remove_top_quote(
460            &["On 2024-08-28, Bob wrote:", "> quote", "not a quote"],
461            true,
462        );
463        assert_eq!(
464            lines,
465            &["On 2024-08-28, Bob wrote:", "> quote", "not a quote"]
466        );
467        assert!(top_quote.is_none());
468    }
469
470    #[test]
471    fn test_escape_message_footer_marks() {
472        let esc = escape_message_footer_marks("--\n--text --in line");
473        assert_eq!(esc, "-\u{200B}-\n-\u{200B}-text --in line");
474
475        let esc = escape_message_footer_marks("--\r\n--text");
476        assert_eq!(esc, "-\u{200B}-\r\n-\u{200B}-text");
477    }
478
479    #[test]
480    fn test_remove_message_footer() {
481        let input = "text\n--\nno footer".to_string();
482        let SimplifiedText {
483            text,
484            is_cut,
485            footer,
486            ..
487        } = simplify(input, true);
488        assert_eq!(text, "text\n--\nno footer");
489        assert_eq!(footer, None);
490        assert!(!is_cut);
491
492        let input = "text\n\n--\n\nno footer".to_string();
493        let SimplifiedText {
494            text,
495            is_cut,
496            footer,
497            ..
498        } = simplify(input, true);
499        assert_eq!(text, "text\n\n--\n\nno footer");
500        assert_eq!(footer, None);
501        assert!(!is_cut);
502
503        let input = "text\n\n-- no footer\n\n".to_string();
504        let SimplifiedText { text, footer, .. } = simplify(input, true);
505        assert_eq!(text, "text\n\n-- no footer");
506        assert_eq!(footer, None);
507
508        let input = "text\n\n--\nno footer\n-- \nfooter".to_string();
509        let SimplifiedText {
510            text,
511            is_cut,
512            footer,
513            ..
514        } = simplify(input, true);
515        assert_eq!(text, "text\n\n--\nno footer");
516        assert!(!is_cut);
517        assert_eq!(footer.unwrap(), "footer");
518
519        let input = "text\n\n--\ntreated as footer when unescaped".to_string();
520        let SimplifiedText {
521            text,
522            is_cut,
523            footer,
524            ..
525        } = simplify(input.clone(), true);
526        assert_eq!(text, "text"); // see remove_message_footer() for some explanations
527        assert!(!is_cut);
528        assert_eq!(footer.unwrap(), "treated as footer when unescaped");
529        let escaped = escape_message_footer_marks(&input);
530        let SimplifiedText {
531            text,
532            is_cut,
533            footer,
534            ..
535        } = simplify(escaped, true);
536        assert_eq!(text, "text\n\n--\ntreated as footer when unescaped");
537        assert!(!is_cut);
538        assert_eq!(footer, None);
539
540        // Nonstandard footer sent by <https://siju.es/>
541        let input = "Message text here\n---Desde mi teléfono con SIJÚ\n\nQuote here".to_string();
542        let SimplifiedText {
543            text,
544            is_cut,
545            footer,
546            ..
547        } = simplify(input.clone(), false);
548        assert_eq!(text, "Message text here [...]");
549        assert!(is_cut);
550        assert_eq!(footer, None);
551        let SimplifiedText {
552            text,
553            is_cut,
554            footer,
555            ..
556        } = simplify(input.clone(), true);
557        assert_eq!(text, input);
558        assert!(!is_cut);
559        assert_eq!(footer, None);
560
561        let input = "--\ntreated as footer when unescaped".to_string();
562        let SimplifiedText {
563            text,
564            is_cut,
565            footer,
566            ..
567        } = simplify(input.clone(), true);
568        assert_eq!(text, ""); // see remove_message_footer() for some explanations
569        assert!(!is_cut);
570        assert_eq!(footer.unwrap(), "treated as footer when unescaped");
571
572        let escaped = escape_message_footer_marks(&input);
573        let SimplifiedText {
574            text,
575            is_cut,
576            footer,
577            ..
578        } = simplify(escaped, true);
579        assert_eq!(text, "--\ntreated as footer when unescaped");
580        assert!(!is_cut);
581        assert_eq!(footer, None);
582    }
583}