1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
//! # HTTP module.

use std::time::Duration;

use anyhow::{anyhow, Result};
use mime::Mime;
use once_cell::sync::Lazy;

use crate::context::Context;
use crate::socks::Socks5Config;

const HTTP_TIMEOUT: Duration = Duration::from_secs(30);

static LETSENCRYPT_ROOT: Lazy<reqwest::tls::Certificate> = Lazy::new(|| {
    reqwest::tls::Certificate::from_der(include_bytes!(
        "../../assets/root-certificates/letsencrypt/isrgrootx1.der"
    ))
    .unwrap()
});

/// HTTP(S) GET response.
#[derive(Debug)]
pub struct Response {
    /// Response body.
    pub blob: Vec<u8>,

    /// MIME type exntracted from the `Content-Type` header, if any.
    pub mimetype: Option<String>,

    /// Encoding extracted from the `Content-Type` header, if any.
    pub encoding: Option<String>,
}

/// Retrieves the text contents of URL using HTTP GET request.
pub async fn read_url(context: &Context, url: &str) -> Result<String> {
    Ok(read_url_inner(context, url).await?.text().await?)
}

/// Retrieves the binary contents of URL using HTTP GET request.
pub async fn read_url_blob(context: &Context, url: &str) -> Result<Response> {
    let response = read_url_inner(context, url).await?;
    let content_type = response
        .headers()
        .get(reqwest::header::CONTENT_TYPE)
        .and_then(|value| value.to_str().ok())
        .and_then(|value| value.parse::<Mime>().ok());
    let mimetype = content_type
        .as_ref()
        .map(|mime| mime.essence_str().to_string());
    let encoding = content_type.as_ref().and_then(|mime| {
        mime.get_param(mime::CHARSET)
            .map(|charset| charset.as_str().to_string())
    });
    let blob: Vec<u8> = response.bytes().await?.into();
    Ok(Response {
        blob,
        mimetype,
        encoding,
    })
}

async fn read_url_inner(context: &Context, url: &str) -> Result<reqwest::Response> {
    let socks5_config = Socks5Config::from_database(&context.sql).await?;
    let client = get_client(socks5_config)?;
    let mut url = url.to_string();

    // Follow up to 10 http-redirects
    for _i in 0..10 {
        let response = client.get(&url).send().await?;
        if response.status().is_redirection() {
            let headers = response.headers();
            let header = headers
                .get_all("location")
                .iter()
                .last()
                .ok_or_else(|| anyhow!("Redirection doesn't have a target location"))?
                .to_str()?;
            info!(context, "Following redirect to {}", header);
            url = header.to_string();
            continue;
        }

        return Ok(response);
    }

    Err(anyhow!("Followed 10 redirections"))
}

pub(crate) fn get_client(socks5_config: Option<Socks5Config>) -> Result<reqwest::Client> {
    let builder = reqwest::ClientBuilder::new()
        .timeout(HTTP_TIMEOUT)
        .add_root_certificate(LETSENCRYPT_ROOT.clone());

    let builder = if let Some(socks5_config) = socks5_config {
        let proxy = reqwest::Proxy::all(socks5_config.to_url())?;
        builder.proxy(proxy)
    } else {
        // Disable usage of "system" proxy configured via environment variables.
        // It is enabled by default in `reqwest`, see
        // <https://docs.rs/reqwest/0.11.14/reqwest/struct.ClientBuilder.html#method.no_proxy>
        // for documentation.
        builder.no_proxy()
    };
    Ok(builder.build()?)
}