Skip to content

Commit 0222c88

Browse files
committed
WIP invalidation warning
1 parent f31b032 commit 0222c88

File tree

8 files changed

+287
-87
lines changed

8 files changed

+287
-87
lines changed

crates/napi/src/next_api/project.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ use turbo_tasks::{
3535
message_queue::{CompilationEvent, Severity, TimingEvent},
3636
trace::TraceRawVcs,
3737
};
38+
use turbo_tasks_backend::db_invalidation::invalidation_reasons;
3839
use turbo_tasks_fs::{
3940
DiskFileSystem, FileContent, FileSystem, FileSystemPath, get_relative_path_to,
4041
util::uri_from_file,
@@ -571,9 +572,15 @@ pub async fn project_update(
571572
pub async fn project_invalidate_persistent_cache(
572573
#[napi(ts_arg_type = "{ __napiType: \"Project\" }")] project: External<ProjectInstance>,
573574
) -> napi::Result<()> {
574-
tokio::task::spawn_blocking(move || project.turbo_tasks.invalidate_persistent_cache())
575-
.await
576-
.context("panicked while invalidating persistent cache")??;
575+
tokio::task::spawn_blocking(move || {
576+
// TODO: Let the JS caller specify a reason? We need to limit the reasons to ones we know
577+
// how to generate a message for on the Rust side of the FFI.
578+
project
579+
.turbo_tasks
580+
.invalidate_persistent_cache(invalidation_reasons::USER_REQUEST)
581+
})
582+
.await
583+
.context("panicked while invalidating persistent cache")??;
577584
Ok(())
578585
}
579586

crates/napi/src/next_api/utils.rs

Lines changed: 62 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,14 @@ use serde::Serialize;
1111
use tokio::sync::mpsc::Receiver;
1212
use turbo_tasks::{
1313
Effects, OperationVc, ReadRef, TaskId, TryJoinIterExt, TurboTasks, TurboTasksApi, UpdateInfo,
14-
Vc, VcValueType, get_effects, message_queue::CompilationEvent,
15-
task_statistics::TaskStatisticsApi, trace::TraceRawVcs,
14+
Vc, VcValueType, get_effects,
15+
message_queue::{CompilationEvent, Severity},
16+
task_statistics::TaskStatisticsApi,
17+
trace::TraceRawVcs,
1618
};
1719
use turbo_tasks_backend::{
18-
DefaultBackingStorage, GitVersionInfo, NoopBackingStorage, default_backing_storage,
19-
noop_backing_storage,
20+
BackingStorage, DefaultBackingStorage, GitVersionInfo, NoopBackingStorage, StartupCacheState,
21+
db_invalidation::invalidation_reasons, default_backing_storage, noop_backing_storage,
2022
};
2123
use turbo_tasks_fs::FileContent;
2224
use turbopack_core::{
@@ -150,17 +152,51 @@ impl NextTurboTasks {
150152
}
151153
}
152154

153-
pub fn invalidate_persistent_cache(&self) -> Result<()> {
155+
pub fn invalidate_persistent_cache(&self, reason_code: &str) -> Result<()> {
154156
match self {
155157
NextTurboTasks::Memory(_) => {}
156-
NextTurboTasks::PersistentCaching(turbo_tasks) => {
157-
turbo_tasks.backend().invalidate_storage()?
158-
}
158+
NextTurboTasks::PersistentCaching(turbo_tasks) => turbo_tasks
159+
.backend()
160+
.backing_storage()
161+
.invalidate(reason_code)?,
159162
}
160163
Ok(())
161164
}
162165
}
163166

167+
#[derive(Serialize)]
168+
struct StartupCacheInvalidationEvent {
169+
reason_code: Option<String>,
170+
}
171+
172+
impl CompilationEvent for StartupCacheInvalidationEvent {
173+
fn type_name(&self) -> &'static str {
174+
"StartupCacheInvalidationEvent"
175+
}
176+
177+
fn severity(&self) -> Severity {
178+
Severity::Warning
179+
}
180+
181+
fn message(&self) -> String {
182+
let reason_msg = match self.reason_code.as_deref() {
183+
Some(invalidation_reasons::PANIC) => {
184+
" because we previously detected an internal error in Turbopack"
185+
}
186+
Some(invalidation_reasons::USER_REQUEST) => " as the result of a user request",
187+
_ => "", // ignore unknown reasons
188+
};
189+
format!(
190+
"Turbopack's persistent on-disk cache has been invalidated{reason_msg}. Builds or \
191+
page loads may be slower as a result."
192+
)
193+
}
194+
195+
fn to_json(&self) -> String {
196+
serde_json::to_string(self).unwrap()
197+
}
198+
}
199+
164200
pub fn create_turbo_tasks(
165201
output_path: PathBuf,
166202
persistent_caching: bool,
@@ -174,24 +210,24 @@ pub fn create_turbo_tasks(
174210
dirty: option_env!("CI").is_none_or(|value| value.is_empty())
175211
&& env!("VERGEN_GIT_DIRTY") == "true",
176212
};
177-
NextTurboTasks::PersistentCaching(TurboTasks::new(
178-
turbo_tasks_backend::TurboTasksBackend::new(
179-
turbo_tasks_backend::BackendOptions {
180-
storage_mode: Some(if std::env::var("TURBO_ENGINE_READ_ONLY").is_ok() {
181-
turbo_tasks_backend::StorageMode::ReadOnly
182-
} else {
183-
turbo_tasks_backend::StorageMode::ReadWrite
184-
}),
185-
dependency_tracking,
186-
..Default::default()
187-
},
188-
default_backing_storage(
189-
&output_path.join("cache/turbopack"),
190-
&version_info,
191-
is_ci,
192-
)?,
193-
),
194-
))
213+
let (backing_storage, cache_state) =
214+
default_backing_storage(&output_path.join("cache/turbopack"), &version_info, is_ci)?;
215+
let tt = TurboTasks::new(turbo_tasks_backend::TurboTasksBackend::new(
216+
turbo_tasks_backend::BackendOptions {
217+
storage_mode: Some(if std::env::var("TURBO_ENGINE_READ_ONLY").is_ok() {
218+
turbo_tasks_backend::StorageMode::ReadOnly
219+
} else {
220+
turbo_tasks_backend::StorageMode::ReadWrite
221+
}),
222+
dependency_tracking,
223+
..Default::default()
224+
},
225+
backing_storage,
226+
));
227+
if let StartupCacheState::Invalidated { reason_code } = cache_state {
228+
tt.send_compilation_event(Arc::new(StartupCacheInvalidationEvent { reason_code }));
229+
}
230+
NextTurboTasks::PersistentCaching(tt)
195231
} else {
196232
NextTurboTasks::Memory(TurboTasks::new(
197233
turbo_tasks_backend::TurboTasksBackend::new(

turbopack/crates/turbo-tasks-backend/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ rand = { workspace = true }
4141
rayon = { workspace = true }
4242
rustc-hash = { workspace = true }
4343
serde = { workspace = true }
44+
serde_json = { workspace = true }
4445
serde_path_to_error = { workspace = true }
4546
smallvec = { workspace = true }
4647
tokio = { workspace = true }
@@ -54,7 +55,6 @@ turbo-tasks-testing = { workspace = true }
5455
[dev-dependencies]
5556
criterion = { workspace = true, features = ["async_tokio"] }
5657
regex = { workspace = true }
57-
serde_json = { workspace = true }
5858
tempfile = { workspace = true }
5959
rstest = { workspace = true }
6060

turbopack/crates/turbo-tasks-backend/src/backend/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,8 +210,8 @@ impl<B: BackingStorage> TurboTasksBackend<B> {
210210
)))
211211
}
212212

213-
pub fn invalidate_storage(&self) -> Result<()> {
214-
self.0.backing_storage.invalidate()
213+
pub fn backing_storage(&self) -> &B {
214+
&self.0.backing_storage
215215
}
216216
}
217217

turbopack/crates/turbo-tasks-backend/src/backing_storage.rs

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,36 @@ use crate::{
1010
utils::chunked_vec::ChunkedVec,
1111
};
1212

13-
pub trait BackingStorage: 'static + Send + Sync {
13+
/// Represents types accepted by [`TurboTasksBackend::new`]. Typically this is the value returned by
14+
/// [`default_backing_storage`] or [`noop_backing_storage`].
15+
///
16+
/// This trait is [sealed]. External crates are not allowed to implement it.
17+
///
18+
/// [`default_backing_storage`]: crate::default_backing_storage
19+
/// [`noop_backing_storage`]: crate::noop_backing_storage
20+
/// [`TurboTasksBackend::new`]: crate::TurboTasksBackend::new
21+
/// [sealed]: https://predr.ag/blog/definitive-guide-to-sealed-traits-in-rust/
22+
pub trait BackingStorage: BackingStorageSealed {
23+
/// Called when the database should be invalidated upon re-initialization.
24+
///
25+
/// This typically means that we'll restart the process or `turbo-tasks` soon with a fresh
26+
/// database. If this happens, there's no point in writing anything else to disk, or flushing
27+
/// during [`KeyValueDatabase::shutdown`].
28+
///
29+
/// This can be implemented by calling [`invalidate_db`] with
30+
/// the database's non-versioned base path.
31+
///
32+
/// [`KeyValueDatabase::shutdown`]: crate::database::key_value_database::KeyValueDatabase::shutdown
33+
/// [`invalidate_db`]: crate::database::db_invalidation::invalidate_db
34+
fn invalidate(&self, reason_code: &str) -> Result<()>;
35+
}
36+
37+
/// Private methods used by [`BackingStorage`]. This trait is `pub` (because of the sealed-trait
38+
/// pattern), but should not be exported outside of the crate.
39+
///
40+
/// [`BackingStorage`] is exported for documentation reasons and to expose the public
41+
/// [`BackingStorage::invalidate`] method.
42+
pub trait BackingStorageSealed: 'static + Send + Sync {
1443
type ReadTransaction<'l>;
1544
fn lower_read_transaction<'l: 'i + 'r, 'i: 'r, 'r>(
1645
tx: &'r Self::ReadTransaction<'l>,
@@ -63,16 +92,6 @@ pub trait BackingStorage: 'static + Send + Sync {
6392
category: TaskDataCategory,
6493
) -> Result<Vec<CachedDataItem>>;
6594

66-
/// Called when the database should be invalidated upon re-initialization.
67-
///
68-
/// This typically means that we'll restart the process or `turbo-tasks` soon with a fresh
69-
/// database. If this happens, there's no point in writing anything else to disk, or flushing
70-
/// during [`KeyValueDatabase::shutdown`].
71-
///
72-
/// This can be implemented by calling [`crate::database::db_invalidation::invalidate_db`] with
73-
/// the database's non-versioned base path.
74-
fn invalidate(&self) -> Result<()>;
75-
7695
fn shutdown(&self) -> Result<()> {
7796
Ok(())
7897
}

turbopack/crates/turbo-tasks-backend/src/database/db_invalidation.rs

Lines changed: 112 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,70 @@
11
use std::{
2-
fs::{self, read_dir},
3-
io::{self, ErrorKind},
2+
borrow::Cow,
3+
fs::{self, File, read_dir},
4+
io::{self, BufReader, BufWriter, ErrorKind, Write},
45
path::Path,
56
};
67

78
use anyhow::Context;
9+
use serde::{Deserialize, Serialize};
810

911
const INVALIDATION_MARKER: &str = "__turbo_tasks_invalidated_db";
1012

11-
/// Atomically write an invalidation marker.
13+
const EXPLANATION: &str = "The cache database has been invalidated. The existence of this file \
14+
will cause the cache directory to be cleaned up the next time \
15+
Turbopack starts up.";
16+
const EASTER_EGG: &str =
17+
"you just wrote me, and this is crazy, but if you see me, delete everything maybe?";
18+
19+
/// The data written to the file at [`INVALIDATION_MARKER`].
20+
#[derive(Serialize, Deserialize)]
21+
struct InvalidationFile<'a> {
22+
#[serde(skip_deserializing)]
23+
_explanation: Option<&'static str>,
24+
#[serde(skip_deserializing)]
25+
_easter_egg: Option<&'static str>,
26+
/// See [`StartupCacheState::Invalidated::reason_code`].
27+
reason_code: Cow<'a, str>,
28+
}
29+
30+
/// Information about if there's was a pre-existing cache or if the cache was detected as
31+
/// invalidated during startup.
32+
///
33+
/// If the cache was invalidated, the application may choose to show a warning to the user or log it
34+
/// to telemetry.
35+
///
36+
/// This value is returned by [`crate::turbo_backing_storage`] and
37+
/// [`crate::default_backing_storage`].
38+
pub enum StartupCacheState {
39+
NoCache,
40+
Cached,
41+
Invalidated {
42+
/// A short code passed to [`invalidate_db`]. This value is application-specific.
43+
///
44+
/// If the value is `None` or doesn't match an expected value, the application should just
45+
/// treat this reason as unknown. The invalidation file may have been corrupted or
46+
/// modified by an external tool.
47+
///
48+
/// See [`invalidation_reasons`] for some common reason codes.
49+
reason_code: Option<String>,
50+
},
51+
}
52+
53+
/// Common invalidation reason codes. The application or libraries it uses may choose to use these
54+
/// reasons, or it may define it's own reasons.
55+
pub mod invalidation_reasons {
56+
/// This invalidation reason is used by [`crate::turbo_backing_storage`] when the database was
57+
/// invalidated by a panic.
58+
pub const PANIC: &str = concat!(module_path!(), "::PANIC");
59+
/// This invalidation reason is used by [`crate::turbo_backing_storage`] when the database was
60+
/// invalidated by a panic.
61+
pub const USER_REQUEST: &str = concat!(module_path!(), "::USER_REQUEST");
62+
}
63+
64+
/// Atomically create an invalidation marker.
65+
///
66+
/// Makes a best-effort attempt to write `reason_code` to the file, but ignores any failure with
67+
/// writing to the file.
1268
///
1369
/// Because attempting to delete currently open database files could cause issues, actual deletion
1470
/// of files is deferred until the next start-up (in [`check_db_invalidation_and_cleanup`]).
@@ -18,9 +74,27 @@ const INVALIDATION_MARKER: &str = "__turbo_tasks_invalidated_db";
1874
///
1975
/// This should be run with the base (non-versioned) path, as that likely aligns closest with user
2076
/// expectations (e.g. if they're clearing the cache for disk space reasons).
21-
pub fn invalidate_db(base_path: &Path) -> anyhow::Result<()> {
22-
match fs::write(base_path.join(INVALIDATION_MARKER), [0u8; 0]) {
23-
Ok(_) => Ok(()),
77+
///
78+
/// In most cases, you should prefer a higher-level API like
79+
/// [`crate::backing_storage::BackingStorage::invalidate`] to this one.
80+
pub(crate) fn invalidate_db(base_path: &Path, reason_code: &str) -> anyhow::Result<()> {
81+
match File::create_new(base_path.join(INVALIDATION_MARKER)) {
82+
Ok(file) => {
83+
let mut writer = BufWriter::new(file);
84+
let _ = serde_json::to_writer_pretty(
85+
&mut writer,
86+
&InvalidationFile {
87+
_explanation: Some(EXPLANATION),
88+
_easter_egg: Some(EASTER_EGG),
89+
reason_code: Cow::Borrowed(reason_code),
90+
},
91+
);
92+
let _ = writer.flush();
93+
Ok(())
94+
}
95+
// the database was already invalidated, avoid overwriting that reason or risking concurrent
96+
// writes to the same file.
97+
Err(err) if err.kind() == ErrorKind::AlreadyExists => Ok(()),
2498
// just ignore if the cache directory doesn't exist at all
2599
Err(err) if err.kind() == ErrorKind::NotFound => Ok(()),
26100
Err(err) => Err(err).context("Failed to invalidate database"),
@@ -31,21 +105,45 @@ pub fn invalidate_db(base_path: &Path) -> anyhow::Result<()> {
31105
/// delete any invalidated database files.
32106
///
33107
/// This should be run with the base (non-versioned) path.
34-
pub fn check_db_invalidation_and_cleanup(base_path: &Path) -> anyhow::Result<()> {
35-
if fs::exists(base_path.join(INVALIDATION_MARKER))? {
36-
// if this cleanup fails, we might try to open an invalid database later, so it's best to
37-
// just propagate the error here.
38-
cleanup_db(base_path)?;
39-
};
40-
Ok(())
108+
///
109+
/// In most cases, you should prefer a higher-level API like
110+
/// [`crate::KeyValueDatabaseBackingStorage::open_versioned_on_disk`] to this one.
111+
pub(crate) fn check_db_invalidation_and_cleanup(
112+
base_path: &Path,
113+
) -> anyhow::Result<StartupCacheState> {
114+
match File::open(base_path.join(INVALIDATION_MARKER)) {
115+
Ok(file) => {
116+
// Best-effort: Try to read the reason_code from the file, if the file format is
117+
// corrupted (or anything else) just use `None`.
118+
let reason_code = serde_json::from_reader::<_, InvalidationFile>(BufReader::new(file))
119+
.ok()
120+
.map(|contents| contents.reason_code.into_owned());
121+
// `file` is dropped at this point: That's important for Windows where we can't delete
122+
// open files.
123+
124+
// if this cleanup fails, we might try to open an invalid database later, so it's best
125+
// to just propagate the error here.
126+
cleanup_db(base_path)?;
127+
Ok(StartupCacheState::Invalidated { reason_code })
128+
}
129+
Err(err) if err.kind() == ErrorKind::NotFound => {
130+
if fs::exists(base_path)? {
131+
Ok(StartupCacheState::Cached)
132+
} else {
133+
Ok(StartupCacheState::NoCache)
134+
}
135+
}
136+
Err(err) => Err(err)
137+
.with_context(|| format!("Failed to check for {INVALIDATION_MARKER} in {base_path:?}")),
138+
}
41139
}
42140

43141
/// Helper for [`check_db_invalidation_and_cleanup`]. You can call this to explicitly clean up a
44142
/// database after running [`invalidate_db`] when turbo-tasks is not running.
45143
///
46144
/// You should not run this if the database has not yet been invalidated, as this operation is not
47145
/// atomic and could result in a partially-deleted and corrupted database.
48-
pub fn cleanup_db(base_path: &Path) -> anyhow::Result<()> {
146+
pub(crate) fn cleanup_db(base_path: &Path) -> anyhow::Result<()> {
49147
cleanup_db_inner(base_path).with_context(|| {
50148
format!(
51149
"Unable to remove invalid database. If this issue persists you can work around by \

0 commit comments

Comments
 (0)