deltachat/
blob.rs

1//! # Blob directory management.
2
3use core::cmp::max;
4use std::io::{Cursor, Seek};
5use std::iter::FusedIterator;
6use std::mem;
7use std::path::{Path, PathBuf};
8
9use anyhow::{ensure, format_err, Context as _, Result};
10use base64::Engine as _;
11use futures::StreamExt;
12use image::codecs::jpeg::JpegEncoder;
13use image::ImageReader;
14use image::{DynamicImage, GenericImage, GenericImageView, ImageFormat, Pixel, Rgba};
15use num_traits::FromPrimitive;
16use tokio::{fs, task};
17use tokio_stream::wrappers::ReadDirStream;
18
19use crate::config::Config;
20use crate::constants::{self, MediaQuality};
21use crate::context::Context;
22use crate::events::EventType;
23use crate::log::LogExt;
24use crate::tools::sanitize_filename;
25
26/// Represents a file in the blob directory.
27///
28/// The object has a name, which will always be valid UTF-8.  Having a
29/// blob object does not imply the respective file exists, however
30/// when using one of the `create*()` methods a unique file is
31/// created.
32#[derive(Debug, Clone, PartialEq, Eq)]
33pub struct BlobObject<'a> {
34    blobdir: &'a Path,
35
36    /// The name of the file on the disc.
37    /// Note that this is NOT the user-visible filename,
38    /// which is only stored in Param::Filename on the message.
39    name: String,
40}
41
42#[derive(Debug, Clone)]
43enum ImageOutputFormat {
44    Png,
45    Jpeg { quality: u8 },
46}
47
48impl<'a> BlobObject<'a> {
49    /// Creates a blob object by copying or renaming an existing file.
50    /// If the source file is already in the blobdir, it will be renamed,
51    /// otherwise it will be copied to the blobdir first.
52    ///
53    /// In order to deduplicate files that contain the same data,
54    /// the file will be named `<hash>.<extension>`, e.g. `ce940175885d7b78f7b7e9f1396611f.jpg`.
55    /// The `original_name` param is only used to get the extension.
56    ///
57    /// This is done in a in way which avoids race-conditions when multiple files are
58    /// concurrently created.
59    pub fn create_and_deduplicate(
60        context: &'a Context,
61        src: &Path,
62        original_name: &Path,
63    ) -> Result<BlobObject<'a>> {
64        // `create_and_deduplicate{_from_bytes}()` do blocking I/O, but can still be called
65        // from an async context thanks to `block_in_place()`.
66        // Tokio's "async" I/O functions are also just thin wrappers around the blocking I/O syscalls,
67        // so we are doing essentially the same here.
68        task::block_in_place(|| {
69            let temp_path;
70            let src_in_blobdir: &Path;
71            let blobdir = context.get_blobdir();
72
73            if src.starts_with(blobdir) {
74                src_in_blobdir = src;
75            } else {
76                info!(
77                    context,
78                    "Source file not in blobdir. Copying instead of moving in order to prevent moving a file that was still needed."
79                );
80                temp_path = blobdir.join(format!("tmp-{}", rand::random::<u64>()));
81                if std::fs::copy(src, &temp_path).is_err() {
82                    // Maybe the blobdir didn't exist
83                    std::fs::create_dir_all(blobdir).log_err(context).ok();
84                    std::fs::copy(src, &temp_path).context("Copying new blobfile failed")?;
85                };
86                src_in_blobdir = &temp_path;
87            }
88
89            let hash = file_hash(src_in_blobdir)?.to_hex();
90            let hash = hash.as_str();
91            let hash = hash.get(0..31).unwrap_or(hash);
92            let new_file =
93                if let Some(extension) = original_name.extension().filter(|e| e.len() <= 32) {
94                    let extension = extension.to_string_lossy().to_lowercase();
95                    let extension = sanitize_filename(&extension);
96                    format!("$BLOBDIR/{hash}.{}", extension)
97                } else {
98                    format!("$BLOBDIR/{hash}")
99                };
100
101            let blob = BlobObject {
102                blobdir,
103                name: new_file,
104            };
105            let new_path = blob.to_abs_path();
106
107            // This will also replace an already-existing file.
108            // Renaming is atomic, so this will avoid race conditions.
109            std::fs::rename(src_in_blobdir, &new_path)?;
110
111            context.emit_event(EventType::NewBlobFile(blob.as_name().to_string()));
112            Ok(blob)
113        })
114    }
115
116    /// Creates a new blob object with the file contents in `data`.
117    /// In order to deduplicate files that contain the same data,
118    /// the file will be named `<hash>.<extension>`, e.g. `ce940175885d7b78f7b7e9f1396611f.jpg`.
119    /// The `original_name` param is only used to get the extension.
120    ///
121    /// The `data` will be written into the file without race-conditions.
122    ///
123    /// This function does blocking I/O, but it can still be called from an async context
124    /// because `block_in_place()` is used to leave the async runtime if necessary.
125    pub fn create_and_deduplicate_from_bytes(
126        context: &'a Context,
127        data: &[u8],
128        original_name: &str,
129    ) -> Result<BlobObject<'a>> {
130        task::block_in_place(|| {
131            let blobdir = context.get_blobdir();
132            let temp_path = blobdir.join(format!("tmp-{}", rand::random::<u64>()));
133            if std::fs::write(&temp_path, data).is_err() {
134                // Maybe the blobdir didn't exist
135                std::fs::create_dir_all(blobdir).log_err(context).ok();
136                std::fs::write(&temp_path, data).context("writing new blobfile failed")?;
137            };
138
139            BlobObject::create_and_deduplicate(context, &temp_path, Path::new(original_name))
140        })
141    }
142
143    /// Returns a [BlobObject] for an existing blob from a path.
144    ///
145    /// The path must designate a file directly in the blobdir and
146    /// must use a valid blob name.  That is after sanitisation the
147    /// name must still be the same, that means it must be valid UTF-8
148    /// and not have any special characters in it.
149    pub fn from_path(context: &'a Context, path: &Path) -> Result<BlobObject<'a>> {
150        let rel_path = path
151            .strip_prefix(context.get_blobdir())
152            .with_context(|| format!("wrong blobdir: {}", path.display()))?;
153        let name = rel_path.to_str().context("wrong name")?;
154        if !BlobObject::is_acceptible_blob_name(name) {
155            return Err(format_err!("bad blob name: {}", rel_path.display()));
156        }
157        BlobObject::from_name(context, name)
158    }
159
160    /// Returns a [BlobObject] for an existing blob.
161    ///
162    /// The `name` may optionally be prefixed with the `$BLOBDIR/`
163    /// prefixed, as returned by [BlobObject::as_name].  This is how
164    /// you want to create a [BlobObject] for a filename read from the
165    /// database.
166    pub fn from_name(context: &'a Context, name: &str) -> Result<BlobObject<'a>> {
167        let name = match name.starts_with("$BLOBDIR/") {
168            true => name.splitn(2, '/').last().unwrap(),
169            false => name,
170        };
171        if !BlobObject::is_acceptible_blob_name(name) {
172            return Err(format_err!("not an acceptable blob name: {}", name));
173        }
174        Ok(BlobObject {
175            blobdir: context.get_blobdir(),
176            name: format!("$BLOBDIR/{name}"),
177        })
178    }
179
180    /// Returns the absolute path to the blob in the filesystem.
181    pub fn to_abs_path(&self) -> PathBuf {
182        let fname = Path::new(&self.name).strip_prefix("$BLOBDIR/").unwrap();
183        self.blobdir.join(fname)
184    }
185
186    /// Returns the blob name, as stored in the database.
187    ///
188    /// This returns the blob in the `$BLOBDIR/<name>` format used in
189    /// the database.  Do not use this unless you're about to store
190    /// this string in the database or [Params].  Eventually even
191    /// those conversions should be handled by the type system.
192    ///
193    /// Note that this is NOT the user-visible filename,
194    /// which is only stored in Param::Filename on the message.
195    ///
196    #[allow(rustdoc::private_intra_doc_links)]
197    /// [Params]: crate::param::Params
198    pub fn as_name(&self) -> &str {
199        &self.name
200    }
201
202    /// Returns the extension of the blob.
203    ///
204    /// If a blob's filename has an extension, it is always guaranteed
205    /// to be lowercase.
206    pub fn suffix(&self) -> Option<&str> {
207        let ext = self.name.rsplit('.').next();
208        if ext == Some(&self.name) {
209            None
210        } else {
211            ext
212        }
213    }
214
215    /// Checks whether a name is a valid blob name.
216    ///
217    /// This is slightly less strict than stanitise_name, presumably
218    /// someone already created a file with such a name so we just
219    /// ensure it's not actually a path in disguise.
220    ///
221    /// Acceptible blob name always have to be valid utf-8.
222    fn is_acceptible_blob_name(name: &str) -> bool {
223        if name.find('/').is_some() {
224            return false;
225        }
226        if name.find('\\').is_some() {
227            return false;
228        }
229        if name.find('\0').is_some() {
230            return false;
231        }
232        true
233    }
234
235    /// Returns path to the stored Base64-decoded blob.
236    ///
237    /// If `data` represents an image of known format, this adds the corresponding extension.
238    ///
239    /// Even though this function is not async, it's OK to call it from an async context.
240    pub(crate) fn store_from_base64(context: &Context, data: &str) -> Result<String> {
241        let buf = base64::engine::general_purpose::STANDARD.decode(data)?;
242        let name = if let Ok(format) = image::guess_format(&buf) {
243            if let Some(ext) = format.extensions_str().first() {
244                format!("file.{ext}")
245            } else {
246                String::new()
247            }
248        } else {
249            String::new()
250        };
251        let blob = BlobObject::create_and_deduplicate_from_bytes(context, &buf, &name)?;
252        Ok(blob.as_name().to_string())
253    }
254
255    /// Recode image to avatar size.
256    pub async fn recode_to_avatar_size(&mut self, context: &Context) -> Result<()> {
257        let (img_wh, max_bytes) =
258            match MediaQuality::from_i32(context.get_config_int(Config::MediaQuality).await?)
259                .unwrap_or_default()
260            {
261                MediaQuality::Balanced => (
262                    constants::BALANCED_AVATAR_SIZE,
263                    constants::BALANCED_AVATAR_BYTES,
264                ),
265                MediaQuality::Worse => {
266                    (constants::WORSE_AVATAR_SIZE, constants::WORSE_AVATAR_BYTES)
267                }
268            };
269
270        let maybe_sticker = &mut false;
271        let is_avatar = true;
272        self.recode_to_size(
273            context,
274            None, // The name of an avatar doesn't matter
275            maybe_sticker,
276            img_wh,
277            max_bytes,
278            is_avatar,
279        )?;
280
281        Ok(())
282    }
283
284    /// Recodes an image pointed by a [BlobObject] so that it fits into limits on the image width,
285    /// height and file size specified by the config.
286    ///
287    /// On some platforms images are passed to the core as [`crate::message::Viewtype::Sticker`] in
288    /// which case `maybe_sticker` flag should be set. We recheck if an image is a true sticker
289    /// assuming that it must have at least one fully transparent corner, otherwise this flag is
290    /// reset.
291    pub async fn recode_to_image_size(
292        &mut self,
293        context: &Context,
294        name: Option<String>,
295        maybe_sticker: &mut bool,
296    ) -> Result<String> {
297        let (img_wh, max_bytes) =
298            match MediaQuality::from_i32(context.get_config_int(Config::MediaQuality).await?)
299                .unwrap_or_default()
300            {
301                MediaQuality::Balanced => (
302                    constants::BALANCED_IMAGE_SIZE,
303                    constants::BALANCED_IMAGE_BYTES,
304                ),
305                MediaQuality::Worse => (constants::WORSE_IMAGE_SIZE, constants::WORSE_IMAGE_BYTES),
306            };
307        let is_avatar = false;
308        let new_name =
309            self.recode_to_size(context, name, maybe_sticker, img_wh, max_bytes, is_avatar)?;
310
311        Ok(new_name)
312    }
313
314    /// Recodes the image so that it fits into limits on width/height and byte size.
315    ///
316    /// If `!is_avatar`, then if `max_bytes` is exceeded, reduces the image to `img_wh` and proceeds
317    /// with the result without rechecking.
318    ///
319    /// This modifies the blob object in-place.
320    ///
321    /// Additionally, if you pass the user-visible filename as `name`
322    /// then the updated user-visible filename will be returned;
323    /// this may be necessary because the format may be changed to JPG,
324    /// i.e. "image.png" -> "image.jpg".
325    fn recode_to_size(
326        &mut self,
327        context: &Context,
328        name: Option<String>,
329        maybe_sticker: &mut bool,
330        mut img_wh: u32,
331        max_bytes: usize,
332        is_avatar: bool,
333    ) -> Result<String> {
334        // Add white background only to avatars to spare the CPU.
335        let mut add_white_bg = is_avatar;
336        let mut no_exif = false;
337        let no_exif_ref = &mut no_exif;
338        let mut name = name.unwrap_or_else(|| self.name.clone());
339        let original_name = name.clone();
340        let res: Result<String> = tokio::task::block_in_place(move || {
341            let mut file = std::fs::File::open(self.to_abs_path())?;
342            let (nr_bytes, exif) = image_metadata(&file)?;
343            *no_exif_ref = exif.is_none();
344            // It's strange that BufReader modifies a file position while it takes a non-mut
345            // reference. Ok, just rewind it.
346            file.rewind()?;
347            let imgreader = ImageReader::new(std::io::BufReader::new(&file)).with_guessed_format();
348            let imgreader = match imgreader {
349                Ok(ir) => ir,
350                _ => {
351                    file.rewind()?;
352                    ImageReader::with_format(
353                        std::io::BufReader::new(&file),
354                        ImageFormat::from_path(self.to_abs_path())?,
355                    )
356                }
357            };
358            let fmt = imgreader.format().context("No format??")?;
359            let mut img = imgreader.decode().context("image decode failure")?;
360            let orientation = exif.as_ref().map(|exif| exif_orientation(exif, context));
361            let mut encoded = Vec::new();
362
363            if *maybe_sticker {
364                let x_max = img.width().saturating_sub(1);
365                let y_max = img.height().saturating_sub(1);
366                *maybe_sticker = img.in_bounds(x_max, y_max)
367                    && (img.get_pixel(0, 0).0[3] == 0
368                        || img.get_pixel(x_max, 0).0[3] == 0
369                        || img.get_pixel(0, y_max).0[3] == 0
370                        || img.get_pixel(x_max, y_max).0[3] == 0);
371            }
372            if *maybe_sticker && exif.is_none() {
373                return Ok(name);
374            }
375
376            img = match orientation {
377                Some(90) => img.rotate90(),
378                Some(180) => img.rotate180(),
379                Some(270) => img.rotate270(),
380                _ => img,
381            };
382
383            let exceeds_wh = img.width() > img_wh || img.height() > img_wh;
384            let exceeds_max_bytes = nr_bytes > max_bytes as u64;
385
386            let jpeg_quality = 75;
387            let ofmt = match fmt {
388                ImageFormat::Png if !exceeds_max_bytes => ImageOutputFormat::Png,
389                ImageFormat::Jpeg => {
390                    add_white_bg = false;
391                    ImageOutputFormat::Jpeg {
392                        quality: jpeg_quality,
393                    }
394                }
395                _ => ImageOutputFormat::Jpeg {
396                    quality: jpeg_quality,
397                },
398            };
399            // We need to rewrite images with Exif to remove metadata such as location,
400            // camera model, etc.
401            //
402            // TODO: Fix lost animation and transparency when recoding using the `image` crate. And
403            // also `Viewtype::Gif` (maybe renamed to `Animation`) should be used for animated
404            // images.
405            let do_scale = exceeds_max_bytes
406                || is_avatar
407                    && (exceeds_wh
408                        || exif.is_some() && {
409                            if mem::take(&mut add_white_bg) {
410                                self::add_white_bg(&mut img);
411                            }
412                            encoded_img_exceeds_bytes(
413                                context,
414                                &img,
415                                ofmt.clone(),
416                                max_bytes,
417                                &mut encoded,
418                            )?
419                        });
420
421            if do_scale {
422                if !exceeds_wh {
423                    img_wh = max(img.width(), img.height());
424                    // PNGs and WebPs may be huge because of animation, which is lost by the `image`
425                    // crate when recoding, so don't scale them down.
426                    if matches!(fmt, ImageFormat::Jpeg) || !encoded.is_empty() {
427                        img_wh = img_wh * 2 / 3;
428                    }
429                }
430
431                loop {
432                    if mem::take(&mut add_white_bg) {
433                        self::add_white_bg(&mut img);
434                    }
435                    let new_img = img.thumbnail(img_wh, img_wh);
436
437                    if encoded_img_exceeds_bytes(
438                        context,
439                        &new_img,
440                        ofmt.clone(),
441                        max_bytes,
442                        &mut encoded,
443                    )? && is_avatar
444                    {
445                        if img_wh < 20 {
446                            return Err(format_err!(
447                                "Failed to scale image to below {}B.",
448                                max_bytes,
449                            ));
450                        }
451
452                        img_wh = img_wh * 2 / 3;
453                    } else {
454                        info!(
455                            context,
456                            "Final scaled-down image size: {}B ({}px).",
457                            encoded.len(),
458                            img_wh
459                        );
460                        break;
461                    }
462                }
463            }
464
465            if do_scale || exif.is_some() {
466                // The file format is JPEG/PNG now, we may have to change the file extension
467                if !matches!(fmt, ImageFormat::Jpeg)
468                    && matches!(ofmt, ImageOutputFormat::Jpeg { .. })
469                {
470                    name = Path::new(&name)
471                        .with_extension("jpg")
472                        .to_string_lossy()
473                        .into_owned();
474                }
475
476                if encoded.is_empty() {
477                    if mem::take(&mut add_white_bg) {
478                        self::add_white_bg(&mut img);
479                    }
480                    encode_img(&img, ofmt, &mut encoded)?;
481                }
482
483                self.name = BlobObject::create_and_deduplicate_from_bytes(context, &encoded, &name)
484                    .context("failed to write recoded blob to file")?
485                    .name;
486            }
487
488            Ok(name)
489        });
490        match res {
491            Ok(_) => res,
492            Err(err) => {
493                if !is_avatar && no_exif {
494                    warn!(
495                        context,
496                        "Cannot recode image, using original data: {err:#}.",
497                    );
498                    Ok(original_name)
499                } else {
500                    Err(err)
501                }
502            }
503        }
504    }
505}
506
507fn file_hash(src: &Path) -> Result<blake3::Hash> {
508    ensure!(
509        !src.starts_with("$BLOBDIR/"),
510        "Use `get_abs_path()` to get the absolute path of the blobfile"
511    );
512    let mut hasher = blake3::Hasher::new();
513    let mut src_file = std::fs::File::open(src)
514        .with_context(|| format!("Failed to open file {}", src.display()))?;
515    hasher
516        .update_reader(&mut src_file)
517        .context("update_reader")?;
518    let hash = hasher.finalize();
519    Ok(hash)
520}
521
522/// Returns image file size and Exif.
523fn image_metadata(file: &std::fs::File) -> Result<(u64, Option<exif::Exif>)> {
524    let len = file.metadata()?.len();
525    let mut bufreader = std::io::BufReader::new(file);
526    let exif = exif::Reader::new().read_from_container(&mut bufreader).ok();
527    Ok((len, exif))
528}
529
530fn exif_orientation(exif: &exif::Exif, context: &Context) -> i32 {
531    if let Some(orientation) = exif.get_field(exif::Tag::Orientation, exif::In::PRIMARY) {
532        // possible orientation values are described at http://sylvana.net/jpegcrop/exif_orientation.html
533        // we only use rotation, in practise, flipping is not used.
534        match orientation.value.get_uint(0) {
535            Some(3) => return 180,
536            Some(6) => return 90,
537            Some(8) => return 270,
538            other => warn!(context, "Exif orientation value ignored: {other:?}."),
539        }
540    }
541    0
542}
543
544/// All files in the blobdir.
545///
546/// This exists so we can have a [`BlobDirIter`] which needs something to own the data of
547/// it's `&Path`.  Use [`BlobDirContents::iter`] to create the iterator.
548///
549/// Additionally pre-allocating this means we get a length for progress report.
550pub(crate) struct BlobDirContents<'a> {
551    inner: Vec<PathBuf>,
552    context: &'a Context,
553}
554
555impl<'a> BlobDirContents<'a> {
556    pub(crate) async fn new(context: &'a Context) -> Result<BlobDirContents<'a>> {
557        let readdir = fs::read_dir(context.get_blobdir()).await?;
558        let inner = ReadDirStream::new(readdir)
559            .filter_map(|entry| async move {
560                match entry {
561                    Ok(entry) => Some(entry),
562                    Err(err) => {
563                        error!(context, "Failed to read blob file: {err}.");
564                        None
565                    }
566                }
567            })
568            .filter_map(|entry| async move {
569                match entry.file_type().await.ok()?.is_file() {
570                    true => Some(entry.path()),
571                    false => {
572                        warn!(
573                            context,
574                            "Export: Found blob dir entry {} that is not a file, ignoring.",
575                            entry.path().display()
576                        );
577                        None
578                    }
579                }
580            })
581            .collect()
582            .await;
583        Ok(Self { inner, context })
584    }
585
586    pub(crate) fn iter(&self) -> BlobDirIter<'_> {
587        BlobDirIter::new(self.context, self.inner.iter())
588    }
589}
590
591/// A iterator over all the [`BlobObject`]s in the blobdir.
592pub(crate) struct BlobDirIter<'a> {
593    iter: std::slice::Iter<'a, PathBuf>,
594    context: &'a Context,
595}
596
597impl<'a> BlobDirIter<'a> {
598    fn new(context: &'a Context, iter: std::slice::Iter<'a, PathBuf>) -> BlobDirIter<'a> {
599        Self { iter, context }
600    }
601}
602
603impl<'a> Iterator for BlobDirIter<'a> {
604    type Item = BlobObject<'a>;
605
606    fn next(&mut self) -> Option<Self::Item> {
607        for path in self.iter.by_ref() {
608            // In theory this can error but we'd have corrupted filenames in the blobdir, so
609            // silently skipping them is fine.
610            match BlobObject::from_path(self.context, path) {
611                Ok(blob) => return Some(blob),
612                Err(err) => warn!(self.context, "{err}"),
613            }
614        }
615        None
616    }
617}
618
619impl FusedIterator for BlobDirIter<'_> {}
620
621fn encode_img(
622    img: &DynamicImage,
623    fmt: ImageOutputFormat,
624    encoded: &mut Vec<u8>,
625) -> anyhow::Result<()> {
626    encoded.clear();
627    let mut buf = Cursor::new(encoded);
628    match fmt {
629        ImageOutputFormat::Png => img.write_to(&mut buf, ImageFormat::Png)?,
630        ImageOutputFormat::Jpeg { quality } => {
631            let encoder = JpegEncoder::new_with_quality(&mut buf, quality);
632            // Convert image into RGB8 to avoid the error
633            // "The encoder or decoder for Jpeg does not support the color type Rgba8"
634            // (<https://github.com/image-rs/image/issues/2211>).
635            img.clone().into_rgb8().write_with_encoder(encoder)?;
636        }
637    }
638    Ok(())
639}
640
641fn encoded_img_exceeds_bytes(
642    context: &Context,
643    img: &DynamicImage,
644    fmt: ImageOutputFormat,
645    max_bytes: usize,
646    encoded: &mut Vec<u8>,
647) -> anyhow::Result<bool> {
648    encode_img(img, fmt, encoded)?;
649    if encoded.len() > max_bytes {
650        info!(
651            context,
652            "Image size {}B ({}x{}px) exceeds {}B, need to scale down.",
653            encoded.len(),
654            img.width(),
655            img.height(),
656            max_bytes,
657        );
658        return Ok(true);
659    }
660    Ok(false)
661}
662
663/// Removes transparency from an image using a white background.
664fn add_white_bg(img: &mut DynamicImage) {
665    for y in 0..img.height() {
666        for x in 0..img.width() {
667            let mut p = Rgba([255u8, 255, 255, 255]);
668            p.blend(&img.get_pixel(x, y));
669            img.put_pixel(x, y, p);
670        }
671    }
672}
673
674#[cfg(test)]
675mod blob_tests;