deltachat/
blob.rs

1//! # Blob directory management.
2
3use core::cmp::max;
4use std::io::{Cursor, Seek};
5use std::iter::FusedIterator;
6use std::mem;
7use std::path::{Path, PathBuf};
8
9use anyhow::{Context as _, Result, ensure, format_err};
10use base64::Engine as _;
11use futures::StreamExt;
12use image::ImageReader;
13use image::codecs::jpeg::JpegEncoder;
14use image::{DynamicImage, GenericImage, GenericImageView, ImageFormat, Pixel, Rgba};
15use num_traits::FromPrimitive;
16use tokio::{fs, task};
17use tokio_stream::wrappers::ReadDirStream;
18
19use crate::config::Config;
20use crate::constants::{self, MediaQuality};
21use crate::context::Context;
22use crate::events::EventType;
23use crate::log::{LogExt, error, info, warn};
24use crate::tools::sanitize_filename;
25
26/// Represents a file in the blob directory.
27///
28/// The object has a name, which will always be valid UTF-8.  Having a
29/// blob object does not imply the respective file exists, however
30/// when using one of the `create*()` methods a unique file is
31/// created.
32#[derive(Debug, Clone, PartialEq, Eq)]
33pub struct BlobObject<'a> {
34    blobdir: &'a Path,
35
36    /// The name of the file on the disc.
37    /// Note that this is NOT the user-visible filename,
38    /// which is only stored in Param::Filename on the message.
39    name: String,
40}
41
42#[derive(Debug, Clone)]
43enum ImageOutputFormat {
44    Png,
45    Jpeg { quality: u8 },
46}
47
48impl<'a> BlobObject<'a> {
49    /// Creates a blob object by copying or renaming an existing file.
50    /// If the source file is already in the blobdir, it will be renamed,
51    /// otherwise it will be copied to the blobdir first.
52    ///
53    /// In order to deduplicate files that contain the same data,
54    /// the file will be named `<hash>.<extension>`, e.g. `ce940175885d7b78f7b7e9f1396611f.jpg`.
55    /// The `original_name` param is only used to get the extension.
56    ///
57    /// This is done in a in way which avoids race-conditions when multiple files are
58    /// concurrently created.
59    pub fn create_and_deduplicate(
60        context: &'a Context,
61        src: &Path,
62        original_name: &Path,
63    ) -> Result<BlobObject<'a>> {
64        // `create_and_deduplicate{_from_bytes}()` do blocking I/O, but can still be called
65        // from an async context thanks to `block_in_place()`.
66        // Tokio's "async" I/O functions are also just thin wrappers around the blocking I/O syscalls,
67        // so we are doing essentially the same here.
68        task::block_in_place(|| {
69            let temp_path;
70            let src_in_blobdir: &Path;
71            let blobdir = context.get_blobdir();
72
73            if src.starts_with(blobdir) {
74                src_in_blobdir = src;
75            } else {
76                info!(
77                    context,
78                    "Source file not in blobdir. Copying instead of moving in order to prevent moving a file that was still needed."
79                );
80                temp_path = blobdir.join(format!("tmp-{}", rand::random::<u64>()));
81                if std::fs::copy(src, &temp_path).is_err() {
82                    // Maybe the blobdir didn't exist
83                    std::fs::create_dir_all(blobdir).log_err(context).ok();
84                    std::fs::copy(src, &temp_path).context("Copying new blobfile failed")?;
85                };
86                src_in_blobdir = &temp_path;
87            }
88
89            let hash = file_hash(src_in_blobdir)?.to_hex();
90            let hash = hash.as_str();
91            let hash = hash.get(0..31).unwrap_or(hash);
92            let new_file =
93                if let Some(extension) = original_name.extension().filter(|e| e.len() <= 32) {
94                    let extension = extension.to_string_lossy().to_lowercase();
95                    let extension = sanitize_filename(&extension);
96                    format!("$BLOBDIR/{hash}.{extension}")
97                } else {
98                    format!("$BLOBDIR/{hash}")
99                };
100
101            let blob = BlobObject {
102                blobdir,
103                name: new_file,
104            };
105            let new_path = blob.to_abs_path();
106
107            // This will also replace an already-existing file.
108            // Renaming is atomic, so this will avoid race conditions.
109            std::fs::rename(src_in_blobdir, &new_path)?;
110
111            context.emit_event(EventType::NewBlobFile(blob.as_name().to_string()));
112            Ok(blob)
113        })
114    }
115
116    /// Creates a new blob object with the file contents in `data`.
117    /// In order to deduplicate files that contain the same data,
118    /// the file will be named `<hash>.<extension>`, e.g. `ce940175885d7b78f7b7e9f1396611f.jpg`.
119    /// The `original_name` param is only used to get the extension.
120    ///
121    /// The `data` will be written into the file without race-conditions.
122    ///
123    /// This function does blocking I/O, but it can still be called from an async context
124    /// because `block_in_place()` is used to leave the async runtime if necessary.
125    pub fn create_and_deduplicate_from_bytes(
126        context: &'a Context,
127        data: &[u8],
128        original_name: &str,
129    ) -> Result<BlobObject<'a>> {
130        task::block_in_place(|| {
131            let blobdir = context.get_blobdir();
132            let temp_path = blobdir.join(format!("tmp-{}", rand::random::<u64>()));
133            if std::fs::write(&temp_path, data).is_err() {
134                // Maybe the blobdir didn't exist
135                std::fs::create_dir_all(blobdir).log_err(context).ok();
136                std::fs::write(&temp_path, data).context("writing new blobfile failed")?;
137            };
138
139            BlobObject::create_and_deduplicate(context, &temp_path, Path::new(original_name))
140        })
141    }
142
143    /// Returns a [BlobObject] for an existing blob from a path.
144    ///
145    /// The path must designate a file directly in the blobdir and
146    /// must use a valid blob name.  That is after sanitisation the
147    /// name must still be the same, that means it must be valid UTF-8
148    /// and not have any special characters in it.
149    pub fn from_path(context: &'a Context, path: &Path) -> Result<BlobObject<'a>> {
150        let rel_path = path
151            .strip_prefix(context.get_blobdir())
152            .with_context(|| format!("wrong blobdir: {}", path.display()))?;
153        let name = rel_path.to_str().context("wrong name")?;
154        if !BlobObject::is_acceptible_blob_name(name) {
155            return Err(format_err!("bad blob name: {}", rel_path.display()));
156        }
157        BlobObject::from_name(context, name)
158    }
159
160    /// Returns a [BlobObject] for an existing blob.
161    ///
162    /// The `name` may optionally be prefixed with the `$BLOBDIR/`
163    /// prefixed, as returned by [BlobObject::as_name].  This is how
164    /// you want to create a [BlobObject] for a filename read from the
165    /// database.
166    pub fn from_name(context: &'a Context, name: &str) -> Result<BlobObject<'a>> {
167        let name = match name.starts_with("$BLOBDIR/") {
168            true => name.splitn(2, '/').last().unwrap(),
169            false => name,
170        };
171        if !BlobObject::is_acceptible_blob_name(name) {
172            return Err(format_err!("not an acceptable blob name: {}", name));
173        }
174        Ok(BlobObject {
175            blobdir: context.get_blobdir(),
176            name: format!("$BLOBDIR/{name}"),
177        })
178    }
179
180    /// Returns the absolute path to the blob in the filesystem.
181    pub fn to_abs_path(&self) -> PathBuf {
182        let fname = Path::new(&self.name).strip_prefix("$BLOBDIR/").unwrap();
183        self.blobdir.join(fname)
184    }
185
186    /// Returns the blob name, as stored in the database.
187    ///
188    /// This returns the blob in the `$BLOBDIR/<name>` format used in
189    /// the database.  Do not use this unless you're about to store
190    /// this string in the database or [Params].  Eventually even
191    /// those conversions should be handled by the type system.
192    ///
193    /// Note that this is NOT the user-visible filename,
194    /// which is only stored in Param::Filename on the message.
195    ///
196    #[allow(rustdoc::private_intra_doc_links)]
197    /// [Params]: crate::param::Params
198    pub fn as_name(&self) -> &str {
199        &self.name
200    }
201
202    /// Returns the extension of the blob.
203    ///
204    /// If a blob's filename has an extension, it is always guaranteed
205    /// to be lowercase.
206    pub fn suffix(&self) -> Option<&str> {
207        let ext = self.name.rsplit('.').next();
208        if ext == Some(&self.name) { None } else { ext }
209    }
210
211    /// Checks whether a name is a valid blob name.
212    ///
213    /// This is slightly less strict than stanitise_name, presumably
214    /// someone already created a file with such a name so we just
215    /// ensure it's not actually a path in disguise.
216    ///
217    /// Acceptible blob name always have to be valid utf-8.
218    fn is_acceptible_blob_name(name: &str) -> bool {
219        if name.find('/').is_some() {
220            return false;
221        }
222        if name.find('\\').is_some() {
223            return false;
224        }
225        if name.find('\0').is_some() {
226            return false;
227        }
228        true
229    }
230
231    /// Returns path to the stored Base64-decoded blob.
232    ///
233    /// If `data` represents an image of known format, this adds the corresponding extension.
234    ///
235    /// Even though this function is not async, it's OK to call it from an async context.
236    pub(crate) fn store_from_base64(context: &Context, data: &str) -> Result<String> {
237        let buf = base64::engine::general_purpose::STANDARD.decode(data)?;
238        let name = if let Ok(format) = image::guess_format(&buf) {
239            if let Some(ext) = format.extensions_str().first() {
240                format!("file.{ext}")
241            } else {
242                String::new()
243            }
244        } else {
245            String::new()
246        };
247        let blob = BlobObject::create_and_deduplicate_from_bytes(context, &buf, &name)?;
248        Ok(blob.as_name().to_string())
249    }
250
251    /// Recode image to avatar size.
252    pub async fn recode_to_avatar_size(&mut self, context: &Context) -> Result<()> {
253        let (img_wh, max_bytes) =
254            match MediaQuality::from_i32(context.get_config_int(Config::MediaQuality).await?)
255                .unwrap_or_default()
256            {
257                MediaQuality::Balanced => (
258                    constants::BALANCED_AVATAR_SIZE,
259                    constants::BALANCED_AVATAR_BYTES,
260                ),
261                MediaQuality::Worse => {
262                    (constants::WORSE_AVATAR_SIZE, constants::WORSE_AVATAR_BYTES)
263                }
264            };
265
266        let maybe_sticker = &mut false;
267        let is_avatar = true;
268        self.recode_to_size(
269            context,
270            None, // The name of an avatar doesn't matter
271            maybe_sticker,
272            img_wh,
273            max_bytes,
274            is_avatar,
275        )?;
276
277        Ok(())
278    }
279
280    /// Recodes an image pointed by a [BlobObject] so that it fits into limits on the image width,
281    /// height and file size specified by the config.
282    ///
283    /// On some platforms images are passed to the core as [`crate::message::Viewtype::Sticker`] in
284    /// which case `maybe_sticker` flag should be set. We recheck if an image is a true sticker
285    /// assuming that it must have at least one fully transparent corner, otherwise this flag is
286    /// reset.
287    pub async fn recode_to_image_size(
288        &mut self,
289        context: &Context,
290        name: Option<String>,
291        maybe_sticker: &mut bool,
292    ) -> Result<String> {
293        let (img_wh, max_bytes) =
294            match MediaQuality::from_i32(context.get_config_int(Config::MediaQuality).await?)
295                .unwrap_or_default()
296            {
297                MediaQuality::Balanced => (
298                    constants::BALANCED_IMAGE_SIZE,
299                    constants::BALANCED_IMAGE_BYTES,
300                ),
301                MediaQuality::Worse => (constants::WORSE_IMAGE_SIZE, constants::WORSE_IMAGE_BYTES),
302            };
303        let is_avatar = false;
304        let new_name =
305            self.recode_to_size(context, name, maybe_sticker, img_wh, max_bytes, is_avatar)?;
306
307        Ok(new_name)
308    }
309
310    /// Recodes the image so that it fits into limits on width/height and byte size.
311    ///
312    /// If `!is_avatar`, then if `max_bytes` is exceeded, reduces the image to `img_wh` and proceeds
313    /// with the result without rechecking.
314    ///
315    /// This modifies the blob object in-place.
316    ///
317    /// Additionally, if you pass the user-visible filename as `name`
318    /// then the updated user-visible filename will be returned;
319    /// this may be necessary because the format may be changed to JPG,
320    /// i.e. "image.png" -> "image.jpg".
321    fn recode_to_size(
322        &mut self,
323        context: &Context,
324        name: Option<String>,
325        maybe_sticker: &mut bool,
326        mut img_wh: u32,
327        max_bytes: usize,
328        is_avatar: bool,
329    ) -> Result<String> {
330        // Add white background only to avatars to spare the CPU.
331        let mut add_white_bg = is_avatar;
332        let mut no_exif = false;
333        let no_exif_ref = &mut no_exif;
334        let mut name = name.unwrap_or_else(|| self.name.clone());
335        let original_name = name.clone();
336        let res: Result<String> = tokio::task::block_in_place(move || {
337            let mut file = std::fs::File::open(self.to_abs_path())?;
338            let (nr_bytes, exif) = image_metadata(&file)?;
339            *no_exif_ref = exif.is_none();
340            // It's strange that BufReader modifies a file position while it takes a non-mut
341            // reference. Ok, just rewind it.
342            file.rewind()?;
343            let imgreader = ImageReader::new(std::io::BufReader::new(&file)).with_guessed_format();
344            let imgreader = match imgreader {
345                Ok(ir) => ir,
346                _ => {
347                    file.rewind()?;
348                    ImageReader::with_format(
349                        std::io::BufReader::new(&file),
350                        ImageFormat::from_path(self.to_abs_path())?,
351                    )
352                }
353            };
354            let fmt = imgreader.format().context("No format??")?;
355            let mut img = imgreader.decode().context("image decode failure")?;
356            let orientation = exif.as_ref().map(|exif| exif_orientation(exif, context));
357            let mut encoded = Vec::new();
358
359            if *maybe_sticker {
360                let x_max = img.width().saturating_sub(1);
361                let y_max = img.height().saturating_sub(1);
362                *maybe_sticker = img.in_bounds(x_max, y_max)
363                    && (img.get_pixel(0, 0).0[3] == 0
364                        || img.get_pixel(x_max, 0).0[3] == 0
365                        || img.get_pixel(0, y_max).0[3] == 0
366                        || img.get_pixel(x_max, y_max).0[3] == 0);
367            }
368            if *maybe_sticker && exif.is_none() {
369                return Ok(name);
370            }
371
372            img = match orientation {
373                Some(90) => img.rotate90(),
374                Some(180) => img.rotate180(),
375                Some(270) => img.rotate270(),
376                _ => img,
377            };
378
379            let exceeds_wh = img.width() > img_wh || img.height() > img_wh;
380            let exceeds_max_bytes = nr_bytes > max_bytes as u64;
381
382            let jpeg_quality = 75;
383            let ofmt = match fmt {
384                ImageFormat::Png if !exceeds_max_bytes => ImageOutputFormat::Png,
385                ImageFormat::Jpeg => {
386                    add_white_bg = false;
387                    ImageOutputFormat::Jpeg {
388                        quality: jpeg_quality,
389                    }
390                }
391                _ => ImageOutputFormat::Jpeg {
392                    quality: jpeg_quality,
393                },
394            };
395            // We need to rewrite images with Exif to remove metadata such as location,
396            // camera model, etc.
397            //
398            // TODO: Fix lost animation and transparency when recoding using the `image` crate. And
399            // also `Viewtype::Gif` (maybe renamed to `Animation`) should be used for animated
400            // images.
401            let do_scale = exceeds_max_bytes
402                || is_avatar
403                    && (exceeds_wh
404                        || exif.is_some() && {
405                            if mem::take(&mut add_white_bg) {
406                                self::add_white_bg(&mut img);
407                            }
408                            encoded_img_exceeds_bytes(
409                                context,
410                                &img,
411                                ofmt.clone(),
412                                max_bytes,
413                                &mut encoded,
414                            )?
415                        });
416
417            if do_scale {
418                if !exceeds_wh {
419                    img_wh = max(img.width(), img.height());
420                    // PNGs and WebPs may be huge because of animation, which is lost by the `image`
421                    // crate when recoding, so don't scale them down.
422                    if matches!(fmt, ImageFormat::Jpeg) || !encoded.is_empty() {
423                        img_wh = img_wh * 2 / 3;
424                    }
425                }
426
427                loop {
428                    if mem::take(&mut add_white_bg) {
429                        self::add_white_bg(&mut img);
430                    }
431
432                    // resize() results in often slightly better quality,
433                    // however, comes at high price of being 4+ times slower than thumbnail().
434                    // for a typical camera image that is sent, this may be a change from "instant" (500ms) to "long time waiting" (3s).
435                    // as we do not have recoding in background while chat has already a preview,
436                    // we vote for speed.
437                    // exception is the avatar image: this is far more often sent than recoded,
438                    // usually has less pixels by cropping, UI that needs to wait anyways,
439                    // and also benefits from slightly better (5%) encoding of Triangle-filtered images.
440                    let new_img = if is_avatar {
441                        img.resize(img_wh, img_wh, image::imageops::FilterType::Triangle)
442                    } else {
443                        img.thumbnail(img_wh, img_wh)
444                    };
445
446                    if encoded_img_exceeds_bytes(
447                        context,
448                        &new_img,
449                        ofmt.clone(),
450                        max_bytes,
451                        &mut encoded,
452                    )? && is_avatar
453                    {
454                        if img_wh < 20 {
455                            return Err(format_err!(
456                                "Failed to scale image to below {}B.",
457                                max_bytes,
458                            ));
459                        }
460
461                        img_wh = img_wh * 2 / 3;
462                    } else {
463                        info!(
464                            context,
465                            "Final scaled-down image size: {}B ({}px).",
466                            encoded.len(),
467                            img_wh
468                        );
469                        break;
470                    }
471                }
472            }
473
474            if do_scale || exif.is_some() {
475                // The file format is JPEG/PNG now, we may have to change the file extension
476                if !matches!(fmt, ImageFormat::Jpeg)
477                    && matches!(ofmt, ImageOutputFormat::Jpeg { .. })
478                {
479                    name = Path::new(&name)
480                        .with_extension("jpg")
481                        .to_string_lossy()
482                        .into_owned();
483                }
484
485                if encoded.is_empty() {
486                    if mem::take(&mut add_white_bg) {
487                        self::add_white_bg(&mut img);
488                    }
489                    encode_img(&img, ofmt, &mut encoded)?;
490                }
491
492                self.name = BlobObject::create_and_deduplicate_from_bytes(context, &encoded, &name)
493                    .context("failed to write recoded blob to file")?
494                    .name;
495            }
496
497            Ok(name)
498        });
499        match res {
500            Ok(_) => res,
501            Err(err) => {
502                if !is_avatar && no_exif {
503                    warn!(
504                        context,
505                        "Cannot recode image, using original data: {err:#}.",
506                    );
507                    Ok(original_name)
508                } else {
509                    Err(err)
510                }
511            }
512        }
513    }
514}
515
516fn file_hash(src: &Path) -> Result<blake3::Hash> {
517    ensure!(
518        !src.starts_with("$BLOBDIR/"),
519        "Use `get_abs_path()` to get the absolute path of the blobfile"
520    );
521    let mut hasher = blake3::Hasher::new();
522    let mut src_file = std::fs::File::open(src)
523        .with_context(|| format!("Failed to open file {}", src.display()))?;
524    hasher
525        .update_reader(&mut src_file)
526        .context("update_reader")?;
527    let hash = hasher.finalize();
528    Ok(hash)
529}
530
531/// Returns image file size and Exif.
532fn image_metadata(file: &std::fs::File) -> Result<(u64, Option<exif::Exif>)> {
533    let len = file.metadata()?.len();
534    let mut bufreader = std::io::BufReader::new(file);
535    let exif = exif::Reader::new().read_from_container(&mut bufreader).ok();
536    Ok((len, exif))
537}
538
539fn exif_orientation(exif: &exif::Exif, context: &Context) -> i32 {
540    if let Some(orientation) = exif.get_field(exif::Tag::Orientation, exif::In::PRIMARY) {
541        // possible orientation values are described at http://sylvana.net/jpegcrop/exif_orientation.html
542        // we only use rotation, in practise, flipping is not used.
543        match orientation.value.get_uint(0) {
544            Some(3) => return 180,
545            Some(6) => return 90,
546            Some(8) => return 270,
547            other => warn!(context, "Exif orientation value ignored: {other:?}."),
548        }
549    }
550    0
551}
552
553/// All files in the blobdir.
554///
555/// This exists so we can have a [`BlobDirIter`] which needs something to own the data of
556/// it's `&Path`.  Use [`BlobDirContents::iter`] to create the iterator.
557///
558/// Additionally pre-allocating this means we get a length for progress report.
559pub(crate) struct BlobDirContents<'a> {
560    inner: Vec<PathBuf>,
561    context: &'a Context,
562}
563
564impl<'a> BlobDirContents<'a> {
565    pub(crate) async fn new(context: &'a Context) -> Result<BlobDirContents<'a>> {
566        let readdir = fs::read_dir(context.get_blobdir()).await?;
567        let inner = ReadDirStream::new(readdir)
568            .filter_map(|entry| async move {
569                match entry {
570                    Ok(entry) => Some(entry),
571                    Err(err) => {
572                        error!(context, "Failed to read blob file: {err}.");
573                        None
574                    }
575                }
576            })
577            .filter_map(|entry| async move {
578                match entry.file_type().await.ok()?.is_file() {
579                    true => Some(entry.path()),
580                    false => {
581                        warn!(
582                            context,
583                            "Export: Found blob dir entry {} that is not a file, ignoring.",
584                            entry.path().display()
585                        );
586                        None
587                    }
588                }
589            })
590            .collect()
591            .await;
592        Ok(Self { inner, context })
593    }
594
595    pub(crate) fn iter(&self) -> BlobDirIter<'_> {
596        BlobDirIter::new(self.context, self.inner.iter())
597    }
598}
599
600/// A iterator over all the [`BlobObject`]s in the blobdir.
601pub(crate) struct BlobDirIter<'a> {
602    iter: std::slice::Iter<'a, PathBuf>,
603    context: &'a Context,
604}
605
606impl<'a> BlobDirIter<'a> {
607    fn new(context: &'a Context, iter: std::slice::Iter<'a, PathBuf>) -> BlobDirIter<'a> {
608        Self { iter, context }
609    }
610}
611
612impl<'a> Iterator for BlobDirIter<'a> {
613    type Item = BlobObject<'a>;
614
615    fn next(&mut self) -> Option<Self::Item> {
616        for path in self.iter.by_ref() {
617            // In theory this can error but we'd have corrupted filenames in the blobdir, so
618            // silently skipping them is fine.
619            match BlobObject::from_path(self.context, path) {
620                Ok(blob) => return Some(blob),
621                Err(err) => warn!(self.context, "{err}"),
622            }
623        }
624        None
625    }
626}
627
628impl FusedIterator for BlobDirIter<'_> {}
629
630fn encode_img(
631    img: &DynamicImage,
632    fmt: ImageOutputFormat,
633    encoded: &mut Vec<u8>,
634) -> anyhow::Result<()> {
635    encoded.clear();
636    let mut buf = Cursor::new(encoded);
637    match fmt {
638        ImageOutputFormat::Png => img.write_to(&mut buf, ImageFormat::Png)?,
639        ImageOutputFormat::Jpeg { quality } => {
640            let encoder = JpegEncoder::new_with_quality(&mut buf, quality);
641            // Convert image into RGB8 to avoid the error
642            // "The encoder or decoder for Jpeg does not support the color type Rgba8"
643            // (<https://github.com/image-rs/image/issues/2211>).
644            img.clone().into_rgb8().write_with_encoder(encoder)?;
645        }
646    }
647    Ok(())
648}
649
650fn encoded_img_exceeds_bytes(
651    context: &Context,
652    img: &DynamicImage,
653    fmt: ImageOutputFormat,
654    max_bytes: usize,
655    encoded: &mut Vec<u8>,
656) -> anyhow::Result<bool> {
657    encode_img(img, fmt, encoded)?;
658    if encoded.len() > max_bytes {
659        info!(
660            context,
661            "Image size {}B ({}x{}px) exceeds {}B, need to scale down.",
662            encoded.len(),
663            img.width(),
664            img.height(),
665            max_bytes,
666        );
667        return Ok(true);
668    }
669    Ok(false)
670}
671
672/// Removes transparency from an image using a white background.
673fn add_white_bg(img: &mut DynamicImage) {
674    for y in 0..img.height() {
675        for x in 0..img.width() {
676            let mut p = Rgba([255u8, 255, 255, 255]);
677            p.blend(&img.get_pixel(x, y));
678            img.put_pixel(x, y, p);
679        }
680    }
681}
682
683#[cfg(test)]
684mod blob_tests;