diff --git a/CHANGELOG.md b/CHANGELOG.md index 806e8e09ce..596bbe9b8b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -60,6 +60,24 @@ different replica version or different replica options. It doesn't apply to `--pocketic` because PocketIC does not yet persist any data. +### feat: allow specifying encodings in `.ic-assets.json` + +When uploading assets to an asset canister, `dfx` by default uploads `.txt`, `.html` and `.js` files in `identity` encoding but also in `gzip` encoding to the frontend canister if encoding saves bytes. +It is now possible to specify in `.ic-assets.json` which encodings are used besides `identity`. +Note that encodings are only used if the encoding saves bytes compared to `identity` or if `identity` is not a specified encoding. + +Example: To turn off `gzip` for `.js` files and to turn on `gzip` for `.jpg` files, use this in `.ic-assets.json`: +``` json +{ + "match": "**/*.js", + "encodings": ["identity"] +}, +{ + "match": "**/*.jpg", + "encodings": ["identity", "gzip"] +} +``` + ### feat: `dfx canister url` Add `dfx canister url` subcommand to display the url of a given canister. Basic usage as below: diff --git a/e2e/tests-dfx/assetscanister.bash b/e2e/tests-dfx/assetscanister.bash index 6a17fe68cf..f75f3e676d 100644 --- a/e2e/tests-dfx/assetscanister.bash +++ b/e2e/tests-dfx/assetscanister.bash @@ -1304,6 +1304,40 @@ EOF assert_match "etag: my-custom-etag" } +@test "asset configuration via .ic-assets.json5 - overwriting default encodings" { + dfx_new_frontend + + dfx_start + + echo '[ + { + "match": "favicon.ico", + "encodings": ["gzip"] + }, + { + "match": "index.html", + "encodings": ["identity"] + } + ]' > src/e2e_project_frontend/assets/.ic-assets.json5 + + dfx deploy + + ID=$(dfx canister id e2e_project_frontend) + PORT=$(get_webserver_port) + + dfx canister call e2e_project_frontend list '(record{})' + + # favicon.ico is not available in gzip format by default but was configured to be + assert_command curl -vv -H "Accept-Encoding: gzip" "http://localhost:$PORT/favicon.ico?canisterId=$ID" + assert_match "content-encoding: gzip" + + # index.html is available in gzip format by default but was configured not to be + # The asset canister would serve the gzip encoding if it was available, but can't. + # Therefore it falls back to the identity encoding, meaning there is no `content-encoding` header present + assert_command curl -vv -H "Accept-Encoding: gzip" "http://localhost:$PORT/index.html?canisterId=$ID" + assert_not_match "content-encoding" +} + @test "aliasing rules: to .html or /index.html" { echo "test alias file" >'src/e2e_project_frontend/assets/test_alias_file.html' mkdir 'src/e2e_project_frontend/assets/index_test' diff --git a/src/canisters/frontend/ic-asset/src/asset/config.rs b/src/canisters/frontend/ic-asset/src/asset/config.rs index 5a04a32690..76fca14772 100644 --- a/src/canisters/frontend/ic-asset/src/asset/config.rs +++ b/src/canisters/frontend/ic-asset/src/asset/config.rs @@ -12,6 +12,8 @@ use std::{ sync::{Arc, Mutex}, }; +use super::content_encoder::ContentEncoder; + pub(crate) const ASSETS_CONFIG_FILENAME_JSON: &str = ".ic-assets.json"; pub(crate) const ASSETS_CONFIG_FILENAME_JSON5: &str = ".ic-assets.json5"; @@ -25,6 +27,7 @@ pub struct AssetConfig { pub(crate) enable_aliasing: Option, #[derivative(Default(value = "Some(true)"))] pub(crate) allow_raw_access: Option, + pub(crate) encodings: Option>, } pub(crate) type HeadersConfig = BTreeMap; @@ -60,6 +63,8 @@ pub struct AssetConfigRule { /// Redirects the traffic from .raw.icp0.io domain to .icp0.io #[serde(skip_serializing_if = "Option::is_none")] allow_raw_access: Option, + #[serde(skip_serializing_if = "Option::is_none")] + encodings: Option>, } #[derive(Deserialize, Debug, Clone, PartialEq, Eq)] @@ -256,6 +261,10 @@ impl AssetConfig { if other.allow_raw_access.is_some() { self.allow_raw_access = other.allow_raw_access; } + + if other.encodings.is_some() { + self.encodings = other.encodings.clone(); + } self } } @@ -264,8 +273,10 @@ impl AssetConfig { /// and pretty-printing of the `AssetConfigRule` data structure. mod rule_utils { use super::{AssetConfig, AssetConfigRule, CacheConfig, HeadersConfig, Maybe}; + use crate::asset::content_encoder::ContentEncoder; use crate::error::LoadRuleError; use globset::{Glob, GlobMatcher}; + use itertools::Itertools; use serde::{Deserialize, Serializer}; use serde_json::Value; use std::collections::BTreeMap; @@ -345,6 +356,7 @@ mod rule_utils { ignore: Option, enable_aliasing: Option, allow_raw_access: Option, + encodings: Option>, } impl AssetConfigRule { @@ -356,6 +368,7 @@ mod rule_utils { ignore, enable_aliasing, allow_raw_access, + encodings, }: InterimAssetConfigRule, config_file_parent_dir: &Path, ) -> Result { @@ -378,6 +391,7 @@ mod rule_utils { used: false, enable_aliasing, allow_raw_access, + encodings, }) } } @@ -406,6 +420,9 @@ mod rule_utils { } } } + if let Some(encodings) = self.encodings.as_ref() { + s.push_str(&format!(" and {} encodings", encodings.len())); + } s.push(')'); } @@ -447,6 +464,12 @@ mod rule_utils { )); } } + if let Some(encodings) = self.encodings.as_ref() { + s.push_str(&format!( + " - encodings: {}", + encodings.iter().map(|enc| enc.to_string()).join(",") + )); + } write!(f, "{}", s) } @@ -682,6 +705,40 @@ mod with_tempdir { ); } + #[test] + fn overriding_encodings() { + let cfg = Some(HashMap::from([ + ( + "".to_string(), + r#"[{"match": "**/*.txt", "encodings": []},{"match": "**/*.unknown", "encodings": ["gzip"]}]"#.to_string(), + ), + ])); + let assets_temp_dir = create_temporary_assets_directory(cfg, 7); + let assets_dir = assets_temp_dir.path().canonicalize().unwrap(); + + let mut assets_config = AssetSourceDirectoryConfiguration::load(&assets_dir).unwrap(); + // override default (.unknown defaults to empty list) + assert_eq!( + assets_config + .get_asset_config(assets_dir.join("file.unknown").as_path()) + .unwrap(), + AssetConfig { + encodings: Some(Vec::from([ContentEncoder::Gzip])), + ..Default::default() + } + ); + // override default with empty list (.txt defaults to gzip) + assert_eq!( + assets_config + .get_asset_config(assets_dir.join("text.txt").as_path()) + .unwrap(), + AssetConfig { + encodings: Some(Vec::from([])), + ..Default::default() + } + ); + } + #[test] fn prioritization() { // 1. the most deeply nested config file takes precedens over the one in parent dir diff --git a/src/canisters/frontend/ic-asset/src/asset/content.rs b/src/canisters/frontend/ic-asset/src/asset/content.rs index 90d3a02775..0567eb9fed 100644 --- a/src/canisters/frontend/ic-asset/src/asset/content.rs +++ b/src/canisters/frontend/ic-asset/src/asset/content.rs @@ -7,6 +7,7 @@ use sha2::{Digest, Sha256}; use std::io::Write; use std::path::Path; +#[derive(Clone)] pub(crate) struct Content { pub data: Vec, pub media_type: Mime, @@ -27,6 +28,7 @@ impl Content { pub fn encode(&self, encoder: &ContentEncoder) -> Result { match encoder { ContentEncoder::Gzip => self.to_gzip(), + ContentEncoder::Identity => Ok(self.clone()), } } diff --git a/src/canisters/frontend/ic-asset/src/asset/content_encoder.rs b/src/canisters/frontend/ic-asset/src/asset/content_encoder.rs index 91f89aab29..b38f84ac8f 100644 --- a/src/canisters/frontend/ic-asset/src/asset/content_encoder.rs +++ b/src/canisters/frontend/ic-asset/src/asset/content_encoder.rs @@ -1,12 +1,17 @@ -#[derive(Clone, Debug)] +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Copy, Serialize, Deserialize, Eq, PartialEq)] +#[serde(rename_all = "lowercase")] pub enum ContentEncoder { Gzip, + Identity, } impl std::fmt::Display for ContentEncoder { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match &self { ContentEncoder::Gzip => f.write_str("gzip"), + ContentEncoder::Identity => f.write_str("identity"), } } } diff --git a/src/canisters/frontend/ic-asset/src/batch_upload/plumbing.rs b/src/canisters/frontend/ic-asset/src/batch_upload/plumbing.rs index a0e88a21d2..fef9e5bd64 100644 --- a/src/canisters/frontend/ic-asset/src/batch_upload/plumbing.rs +++ b/src/canisters/frontend/ic-asset/src/batch_upload/plumbing.rs @@ -156,12 +156,13 @@ async fn make_encoding( asset_descriptor: &AssetDescriptor, canister_assets: &HashMap, content: &Content, - encoder: &Option, + encoder: &ContentEncoder, + force_encoding: bool, semaphores: &Semaphores, logger: &Logger, ) -> Result, CreateEncodingError> { match encoder { - None => { + ContentEncoder::Identity => { let identity_asset_encoding = make_project_asset_encoding( chunk_upload_target, asset_descriptor, @@ -178,11 +179,11 @@ async fn make_encoding( identity_asset_encoding, ))) } - Some(encoder) => { + encoder => { let encoded = content.encode(encoder).map_err(|e| { - EncodeContentFailed(asset_descriptor.key.clone(), encoder.clone(), e) + EncodeContentFailed(asset_descriptor.key.clone(), encoder.to_owned(), e) })?; - if encoded.data.len() < content.data.len() { + if force_encoding || encoded.data.len() < content.data.len() { let content_encoding = format!("{}", encoder); let project_asset_encoding = make_project_asset_encoding( chunk_upload_target, @@ -211,20 +212,26 @@ async fn make_encodings( semaphores: &Semaphores, logger: &Logger, ) -> Result, CreateEncodingError> { - let mut encoders = vec![None]; - for encoder in applicable_encoders(&content.media_type) { - encoders.push(Some(encoder)); - } + let encoders = asset_descriptor + .config + .encodings + .clone() + .unwrap_or_else(|| default_encoders(&content.media_type)); + // The identity encoding is always uploaded if it's in the list of chosen encodings. + // Other encoding are only uploaded if they save bytes compared to identity. + // The encoding is forced through the filter if there is no identity encoding to compare against. + let force_encoding = !encoders.contains(&ContentEncoder::Identity); let encoding_futures: Vec<_> = encoders .iter() - .map(|maybe_encoder| { + .map(|encoder| { make_encoding( chunk_upload_target, asset_descriptor, canister_assets, content, - maybe_encoder, + encoder, + force_encoding, semaphores, logger, ) @@ -367,10 +374,11 @@ fn content_encoding_descriptive_suffix(content_encoding: &str) -> String { } } -// todo: make this configurable https://github.com/dfinity/dx-triage/issues/152 -fn applicable_encoders(media_type: &Mime) -> Vec { +fn default_encoders(media_type: &Mime) -> Vec { match (media_type.type_(), media_type.subtype()) { - (mime::TEXT, _) | (_, mime::JAVASCRIPT) | (_, mime::HTML) => vec![ContentEncoder::Gzip], - _ => vec![], + (mime::TEXT, _) | (_, mime::JAVASCRIPT) | (_, mime::HTML) => { + vec![ContentEncoder::Identity, ContentEncoder::Gzip] + } + _ => vec![ContentEncoder::Identity], } }