Skip to content

Commit 192744f

Browse files
authored
feat: choose asset encodings that are synced to asset canister (#3792)
1 parent 3627daf commit 192744f

File tree

6 files changed

+140
-16
lines changed

6 files changed

+140
-16
lines changed

CHANGELOG.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,24 @@ different replica version or different replica options.
6060

6161
It doesn't apply to `--pocketic` because PocketIC does not yet persist any data.
6262

63+
### feat: allow specifying encodings in `.ic-assets.json`
64+
65+
When uploading assets to an asset canister, `dfx` by default uploads `.txt`, `.html` and `.js` files in `identity` encoding but also in `gzip` encoding to the frontend canister if encoding saves bytes.
66+
It is now possible to specify in `.ic-assets.json` which encodings are used besides `identity`.
67+
Note that encodings are only used if the encoding saves bytes compared to `identity` or if `identity` is not a specified encoding.
68+
69+
Example: To turn off `gzip` for `.js` files and to turn on `gzip` for `.jpg` files, use this in `.ic-assets.json`:
70+
``` json
71+
{
72+
"match": "**/*.js",
73+
"encodings": ["identity"]
74+
},
75+
{
76+
"match": "**/*.jpg",
77+
"encodings": ["identity", "gzip"]
78+
}
79+
```
80+
6381
### feat: `dfx canister url`
6482

6583
Add `dfx canister url` subcommand to display the url of a given canister. Basic usage as below:

e2e/tests-dfx/assetscanister.bash

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,6 +1304,40 @@ EOF
13041304
assert_match "etag: my-custom-etag"
13051305
}
13061306

1307+
@test "asset configuration via .ic-assets.json5 - overwriting default encodings" {
1308+
dfx_new_frontend
1309+
1310+
dfx_start
1311+
1312+
echo '[
1313+
{
1314+
"match": "favicon.ico",
1315+
"encodings": ["gzip"]
1316+
},
1317+
{
1318+
"match": "index.html",
1319+
"encodings": ["identity"]
1320+
}
1321+
]' > src/e2e_project_frontend/assets/.ic-assets.json5
1322+
1323+
dfx deploy
1324+
1325+
ID=$(dfx canister id e2e_project_frontend)
1326+
PORT=$(get_webserver_port)
1327+
1328+
dfx canister call e2e_project_frontend list '(record{})'
1329+
1330+
# favicon.ico is not available in gzip format by default but was configured to be
1331+
assert_command curl -vv -H "Accept-Encoding: gzip" "http://localhost:$PORT/favicon.ico?canisterId=$ID"
1332+
assert_match "content-encoding: gzip"
1333+
1334+
# index.html is available in gzip format by default but was configured not to be
1335+
# The asset canister would serve the gzip encoding if it was available, but can't.
1336+
# Therefore it falls back to the identity encoding, meaning there is no `content-encoding` header present
1337+
assert_command curl -vv -H "Accept-Encoding: gzip" "http://localhost:$PORT/index.html?canisterId=$ID"
1338+
assert_not_match "content-encoding"
1339+
}
1340+
13071341
@test "aliasing rules: <filename> to <filename>.html or <filename>/index.html" {
13081342
echo "test alias file" >'src/e2e_project_frontend/assets/test_alias_file.html'
13091343
mkdir 'src/e2e_project_frontend/assets/index_test'

src/canisters/frontend/ic-asset/src/asset/config.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ use std::{
1212
sync::{Arc, Mutex},
1313
};
1414

15+
use super::content_encoder::ContentEncoder;
16+
1517
pub(crate) const ASSETS_CONFIG_FILENAME_JSON: &str = ".ic-assets.json";
1618
pub(crate) const ASSETS_CONFIG_FILENAME_JSON5: &str = ".ic-assets.json5";
1719

@@ -25,6 +27,7 @@ pub struct AssetConfig {
2527
pub(crate) enable_aliasing: Option<bool>,
2628
#[derivative(Default(value = "Some(true)"))]
2729
pub(crate) allow_raw_access: Option<bool>,
30+
pub(crate) encodings: Option<Vec<ContentEncoder>>,
2831
}
2932

3033
pub(crate) type HeadersConfig = BTreeMap<String, String>;
@@ -60,6 +63,8 @@ pub struct AssetConfigRule {
6063
/// Redirects the traffic from .raw.icp0.io domain to .icp0.io
6164
#[serde(skip_serializing_if = "Option::is_none")]
6265
allow_raw_access: Option<bool>,
66+
#[serde(skip_serializing_if = "Option::is_none")]
67+
encodings: Option<Vec<ContentEncoder>>,
6368
}
6469

6570
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
@@ -256,6 +261,10 @@ impl AssetConfig {
256261
if other.allow_raw_access.is_some() {
257262
self.allow_raw_access = other.allow_raw_access;
258263
}
264+
265+
if other.encodings.is_some() {
266+
self.encodings = other.encodings.clone();
267+
}
259268
self
260269
}
261270
}
@@ -264,8 +273,10 @@ impl AssetConfig {
264273
/// and pretty-printing of the `AssetConfigRule` data structure.
265274
mod rule_utils {
266275
use super::{AssetConfig, AssetConfigRule, CacheConfig, HeadersConfig, Maybe};
276+
use crate::asset::content_encoder::ContentEncoder;
267277
use crate::error::LoadRuleError;
268278
use globset::{Glob, GlobMatcher};
279+
use itertools::Itertools;
269280
use serde::{Deserialize, Serializer};
270281
use serde_json::Value;
271282
use std::collections::BTreeMap;
@@ -345,6 +356,7 @@ mod rule_utils {
345356
ignore: Option<bool>,
346357
enable_aliasing: Option<bool>,
347358
allow_raw_access: Option<bool>,
359+
encodings: Option<Vec<ContentEncoder>>,
348360
}
349361

350362
impl AssetConfigRule {
@@ -356,6 +368,7 @@ mod rule_utils {
356368
ignore,
357369
enable_aliasing,
358370
allow_raw_access,
371+
encodings,
359372
}: InterimAssetConfigRule,
360373
config_file_parent_dir: &Path,
361374
) -> Result<Self, LoadRuleError> {
@@ -378,6 +391,7 @@ mod rule_utils {
378391
used: false,
379392
enable_aliasing,
380393
allow_raw_access,
394+
encodings,
381395
})
382396
}
383397
}
@@ -406,6 +420,9 @@ mod rule_utils {
406420
}
407421
}
408422
}
423+
if let Some(encodings) = self.encodings.as_ref() {
424+
s.push_str(&format!(" and {} encodings", encodings.len()));
425+
}
409426
s.push(')');
410427
}
411428

@@ -447,6 +464,12 @@ mod rule_utils {
447464
));
448465
}
449466
}
467+
if let Some(encodings) = self.encodings.as_ref() {
468+
s.push_str(&format!(
469+
" - encodings: {}",
470+
encodings.iter().map(|enc| enc.to_string()).join(",")
471+
));
472+
}
450473

451474
write!(f, "{}", s)
452475
}
@@ -682,6 +705,40 @@ mod with_tempdir {
682705
);
683706
}
684707

708+
#[test]
709+
fn overriding_encodings() {
710+
let cfg = Some(HashMap::from([
711+
(
712+
"".to_string(),
713+
r#"[{"match": "**/*.txt", "encodings": []},{"match": "**/*.unknown", "encodings": ["gzip"]}]"#.to_string(),
714+
),
715+
]));
716+
let assets_temp_dir = create_temporary_assets_directory(cfg, 7);
717+
let assets_dir = assets_temp_dir.path().canonicalize().unwrap();
718+
719+
let mut assets_config = AssetSourceDirectoryConfiguration::load(&assets_dir).unwrap();
720+
// override default (.unknown defaults to empty list)
721+
assert_eq!(
722+
assets_config
723+
.get_asset_config(assets_dir.join("file.unknown").as_path())
724+
.unwrap(),
725+
AssetConfig {
726+
encodings: Some(Vec::from([ContentEncoder::Gzip])),
727+
..Default::default()
728+
}
729+
);
730+
// override default with empty list (.txt defaults to gzip)
731+
assert_eq!(
732+
assets_config
733+
.get_asset_config(assets_dir.join("text.txt").as_path())
734+
.unwrap(),
735+
AssetConfig {
736+
encodings: Some(Vec::from([])),
737+
..Default::default()
738+
}
739+
);
740+
}
741+
685742
#[test]
686743
fn prioritization() {
687744
// 1. the most deeply nested config file takes precedens over the one in parent dir

src/canisters/frontend/ic-asset/src/asset/content.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use sha2::{Digest, Sha256};
77
use std::io::Write;
88
use std::path::Path;
99

10+
#[derive(Clone)]
1011
pub(crate) struct Content {
1112
pub data: Vec<u8>,
1213
pub media_type: Mime,
@@ -27,6 +28,7 @@ impl Content {
2728
pub fn encode(&self, encoder: &ContentEncoder) -> Result<Content, std::io::Error> {
2829
match encoder {
2930
ContentEncoder::Gzip => self.to_gzip(),
31+
ContentEncoder::Identity => Ok(self.clone()),
3032
}
3133
}
3234

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
1-
#[derive(Clone, Debug)]
1+
use serde::{Deserialize, Serialize};
2+
3+
#[derive(Clone, Debug, Copy, Serialize, Deserialize, Eq, PartialEq)]
4+
#[serde(rename_all = "lowercase")]
25
pub enum ContentEncoder {
36
Gzip,
7+
Identity,
48
}
59

610
impl std::fmt::Display for ContentEncoder {
711
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
812
match &self {
913
ContentEncoder::Gzip => f.write_str("gzip"),
14+
ContentEncoder::Identity => f.write_str("identity"),
1015
}
1116
}
1217
}

src/canisters/frontend/ic-asset/src/batch_upload/plumbing.rs

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,13 @@ async fn make_encoding(
156156
asset_descriptor: &AssetDescriptor,
157157
canister_assets: &HashMap<String, AssetDetails>,
158158
content: &Content,
159-
encoder: &Option<ContentEncoder>,
159+
encoder: &ContentEncoder,
160+
force_encoding: bool,
160161
semaphores: &Semaphores,
161162
logger: &Logger,
162163
) -> Result<Option<(String, ProjectAssetEncoding)>, CreateEncodingError> {
163164
match encoder {
164-
None => {
165+
ContentEncoder::Identity => {
165166
let identity_asset_encoding = make_project_asset_encoding(
166167
chunk_upload_target,
167168
asset_descriptor,
@@ -178,11 +179,11 @@ async fn make_encoding(
178179
identity_asset_encoding,
179180
)))
180181
}
181-
Some(encoder) => {
182+
encoder => {
182183
let encoded = content.encode(encoder).map_err(|e| {
183-
EncodeContentFailed(asset_descriptor.key.clone(), encoder.clone(), e)
184+
EncodeContentFailed(asset_descriptor.key.clone(), encoder.to_owned(), e)
184185
})?;
185-
if encoded.data.len() < content.data.len() {
186+
if force_encoding || encoded.data.len() < content.data.len() {
186187
let content_encoding = format!("{}", encoder);
187188
let project_asset_encoding = make_project_asset_encoding(
188189
chunk_upload_target,
@@ -211,20 +212,26 @@ async fn make_encodings(
211212
semaphores: &Semaphores,
212213
logger: &Logger,
213214
) -> Result<HashMap<String, ProjectAssetEncoding>, CreateEncodingError> {
214-
let mut encoders = vec![None];
215-
for encoder in applicable_encoders(&content.media_type) {
216-
encoders.push(Some(encoder));
217-
}
215+
let encoders = asset_descriptor
216+
.config
217+
.encodings
218+
.clone()
219+
.unwrap_or_else(|| default_encoders(&content.media_type));
220+
// The identity encoding is always uploaded if it's in the list of chosen encodings.
221+
// Other encoding are only uploaded if they save bytes compared to identity.
222+
// The encoding is forced through the filter if there is no identity encoding to compare against.
223+
let force_encoding = !encoders.contains(&ContentEncoder::Identity);
218224

219225
let encoding_futures: Vec<_> = encoders
220226
.iter()
221-
.map(|maybe_encoder| {
227+
.map(|encoder| {
222228
make_encoding(
223229
chunk_upload_target,
224230
asset_descriptor,
225231
canister_assets,
226232
content,
227-
maybe_encoder,
233+
encoder,
234+
force_encoding,
228235
semaphores,
229236
logger,
230237
)
@@ -367,10 +374,11 @@ fn content_encoding_descriptive_suffix(content_encoding: &str) -> String {
367374
}
368375
}
369376

370-
// todo: make this configurable https://github.com/dfinity/dx-triage/issues/152
371-
fn applicable_encoders(media_type: &Mime) -> Vec<ContentEncoder> {
377+
fn default_encoders(media_type: &Mime) -> Vec<ContentEncoder> {
372378
match (media_type.type_(), media_type.subtype()) {
373-
(mime::TEXT, _) | (_, mime::JAVASCRIPT) | (_, mime::HTML) => vec![ContentEncoder::Gzip],
374-
_ => vec![],
379+
(mime::TEXT, _) | (_, mime::JAVASCRIPT) | (_, mime::HTML) => {
380+
vec![ContentEncoder::Identity, ContentEncoder::Gzip]
381+
}
382+
_ => vec![ContentEncoder::Identity],
375383
}
376384
}

0 commit comments

Comments
 (0)