Skip to content

Commit aecc9f0

Browse files
committed
fix(cache): never cache errors on disk (#1734)
After thinking about this for a while, I think caching errors goes against typical CI workflows. That's because if a link fails due to a network issue, a server outage, or if a previously failing link has been fixed, reading an error from the cache prevents lychee from realizing the link is now working. Right now it will load the broken link from cache, notice that it has no status code associated with it, and fail immediately. It just caches the error again until it expires and gets re-checked. That's quite unintuitive. This commit updates Cache::store and Cache::load to skip CacheStatus::Error. As a result, failures are always rechecked on subsequent runs. We completely sidestep issues with caching transient network errors or missing HTTP status codes. It updates CLI tests to assert that errors are no longer cached (correcting prior tests that had their assertions flipped to expect the buggy behavior), and explicitly adds a new test to verify that loading an older, legacy `.lycheecache` file which contains a cached error correctly drops it and successfully retries the link. Fixes #1734. Fixes #1783.
1 parent caa7d5c commit aecc9f0

File tree

2 files changed

+97
-12
lines changed

2 files changed

+97
-12
lines changed

lychee-bin/src/cache.rs

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ impl StoreExt for Cache {
4646
.has_headers(false)
4747
.from_path(path)?;
4848
for result in self {
49+
// Do not serialize errors to disk. We always want to recheck failing links.
50+
if matches!(result.value().status, CacheStatus::Error(_)) {
51+
continue;
52+
}
4953
wtr.serialize((result.key(), result.value()))?;
5054
}
5155
Ok(())
@@ -60,7 +64,7 @@ impl StoreExt for Cache {
6064
.has_headers(false)
6165
.from_path(path)?;
6266

63-
let map = DashMap::new();
67+
let map = Cache::new();
6468
let current_ts = timestamp();
6569
for result in rdr.deserialize() {
6670
let (uri, value): (Uri, CacheValue) = result?;
@@ -70,6 +74,14 @@ impl StoreExt for Cache {
7074
continue;
7175
}
7276

77+
// Discard errors. Caching errors goes against typical CI workflows.
78+
// If a link fails due to a network issue, a server outage, or if a previously
79+
// failing link has been fixed, reading an error from the cache prevents lychee
80+
// from realizing the link is now working.
81+
if matches!(value.status, CacheStatus::Error(_)) {
82+
continue;
83+
}
84+
7385
// Discard entries for status codes which have been excluded.
7486
// Without this check, an entry might be cached, then its status code is configured as
7587
// excluded, and in subsequent runs the cached value is still reused.
@@ -85,7 +97,6 @@ impl StoreExt for Cache {
8597

8698
#[cfg(test)]
8799
mod tests {
88-
use dashmap::DashMap;
89100
use http::StatusCode;
90101
use lychee_lib::{CacheStatus, StatusCodeSelector, StatusRange, Uri};
91102

@@ -98,7 +109,7 @@ mod tests {
98109
fn test_excluded_status_not_reused_from_cache() {
99110
let uri: Uri = "https://example.com".try_into().unwrap();
100111

101-
let cache: Cache = DashMap::<Uri, CacheValue>::new();
112+
let cache = Cache::new();
102113
cache.insert(
103114
uri.clone(),
104115
CacheValue {
@@ -116,4 +127,34 @@ mod tests {
116127
let cache = Cache::load(tmp.path(), u64::MAX, &excluder).unwrap();
117128
assert!(cache.get(&uri).is_none());
118129
}
130+
131+
#[test]
132+
fn test_errors_not_stored_in_cache() {
133+
let uri: Uri = "https://example.com/error".try_into().unwrap();
134+
135+
let cache = Cache::new();
136+
cache.insert(
137+
uri.clone(),
138+
CacheValue {
139+
status: CacheStatus::Error(Some(StatusCode::INTERNAL_SERVER_ERROR)),
140+
timestamp: timestamp(),
141+
},
142+
);
143+
let uri_none: Uri = "https://example.com/none".try_into().unwrap();
144+
cache.insert(
145+
uri_none.clone(),
146+
CacheValue {
147+
status: CacheStatus::Error(None),
148+
timestamp: timestamp(),
149+
},
150+
);
151+
152+
let tmp = tempfile::NamedTempFile::new().unwrap();
153+
cache.store(tmp.path()).unwrap();
154+
155+
let excluder = StatusCodeSelector::empty();
156+
let loaded_cache = Cache::load(tmp.path(), u64::MAX, &excluder).unwrap();
157+
assert!(loaded_cache.get(&uri).is_none());
158+
assert!(loaded_cache.get(&uri_none).is_none());
159+
}
119160
}

lychee-bin/tests/cli.rs

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,8 +1316,8 @@ The config file should contain every possible key for documentation purposes."
13161316
"Missing OK entry in cache"
13171317
);
13181318
assert!(
1319-
data.contains(&format!("{}/,404", mock_server_err.uri())),
1320-
"Missing error entry in cache"
1319+
!data.contains(&format!("{}/,404", mock_server_err.uri())),
1320+
"Error entry should not be cached"
13211321
);
13221322

13231323
// Run again to verify cache behavior
@@ -1327,7 +1327,7 @@ The config file should contain every possible key for documentation purposes."
13271327
mock_server_ok.uri()
13281328
)))
13291329
.stderr(contains(format!(
1330-
"[404] {}/ (at 2:1) | Error (cached)\n",
1330+
"[404] {}/ (at 2:1) | Rejected status code: 404 Not Found (configurable with \"accept\" option)\n",
13311331
mock_server_err.uri()
13321332
)));
13331333

@@ -1454,8 +1454,11 @@ The config file should contain every possible key for documentation purposes."
14541454
// check content of cache file
14551455
let data = fs::read_to_string(&cache_file)?;
14561456
assert!(data.contains(&format!("{}/,200", mock_server_ok.uri())));
1457-
assert!(data.contains(&format!("{}/,418", mock_server_teapot.uri())));
1458-
assert!(data.contains(&format!("{}/,500", mock_server_server_error.uri())));
1457+
// Because the first run DID NOT use `--accept`, 418 and 500 were both treated as errors.
1458+
// With our new error dropping logic, NEITHER gets cached in the first run.
1459+
println!("DATA: {}", data);
1460+
assert!(!data.contains(&format!("{}/,418", mock_server_teapot.uri())));
1461+
assert!(!data.contains(&format!("{}/,500", mock_server_server_error.uri())));
14591462

14601463
// run again to verify cache behavior
14611464
// this time accept 418 and 500 as valid status codes
@@ -1466,11 +1469,11 @@ The config file should contain every possible key for documentation purposes."
14661469
.assert()
14671470
.success()
14681471
.stderr(contains(format!(
1469-
"[418] {}/ (at 2:1) | OK (cached)",
1472+
"[418] {}/ (at 2:1) | 418 I'm a teapot: I'm a teapot",
14701473
mock_server_teapot.uri()
14711474
)))
14721475
.stderr(contains(format!(
1473-
"[500] {}/ (at 3:1) | OK (cached)",
1476+
"[500] {}/ (at 3:1) | 500 Internal Server Error: Internal Server Error",
14741477
mock_server_server_error.uri()
14751478
)));
14761479

@@ -1601,8 +1604,11 @@ The config file should contain every possible key for documentation purposes."
16011604
// If the status code was 999, the cache file should be empty
16021605
// because we do not want to cache unknown status codes
16031606
let buf = fs::read(&cache_file).unwrap();
1604-
let data = String::from_utf8(buf)?;
1605-
assert!(data.contains(",999,"));
1607+
assert!(
1608+
buf.is_empty(),
1609+
"cache file should be empty, but was {}",
1610+
String::from_utf8_lossy(&buf)
1611+
);
16061612

16071613
Ok(())
16081614
}
@@ -3697,4 +3703,42 @@ https://lychee.cli.rs/guides/cli/#fragments-ignored
36973703
.assert()
36983704
.success();
36993705
}
3706+
3707+
/// Verifies that loading an older, legacy `.lycheecache` file containing a cached error
3708+
/// correctly drops the error and successfully retries the link.
3709+
/// This ensures we don't break existing user CI workflows that have older cache files
3710+
/// stored before we stopped caching errors.
3711+
#[tokio::test]
3712+
async fn test_legacy_cache_file_ignores_errors() -> Result<()> {
3713+
let _ts = std::time::SystemTime::now()
3714+
.duration_since(std::time::UNIX_EPOCH)
3715+
.unwrap()
3716+
.as_secs();
3717+
let dir = tempfile::tempdir()?;
3718+
let base_path = dir.path();
3719+
let cache_file = base_path.join(LYCHEE_CACHE_FILE);
3720+
3721+
// A server that is currently returning OK
3722+
let mock_server_ok = mock_server!(StatusCode::OK);
3723+
3724+
// Simulate an older `.lycheecache` where this exact URL previously failed with 404
3725+
// We use a future timestamp ({ts}) so it doesn't expire
3726+
fs::write(&cache_file, format!("{},404,{_ts}\n", mock_server_ok.uri()))?;
3727+
3728+
let mut file = File::create(dir.path().join("input.txt"))?;
3729+
writeln!(file, "{}", mock_server_ok.uri())?;
3730+
3731+
// Run lychee. It should ignore the 404 from the cache, actually check the mock server (which returns 200), and succeed.
3732+
cargo_bin_cmd!()
3733+
.current_dir(base_path)
3734+
.arg("input.txt")
3735+
.arg("--cache")
3736+
.arg("--verbose")
3737+
.assert()
3738+
.success()
3739+
.stdout(contains("1 OK"))
3740+
.stdout(contains("0 Errors"));
3741+
3742+
Ok(())
3743+
}
37003744
}

0 commit comments

Comments
 (0)