Skip to content

Commit 3cddc61

Browse files
authored
feat: support waiting for background tasks on the per_worker policy (#451)
* chore: update deno manifest * chore: update `.gitignore` * feat: support waiting for background tasks on the `per_worker` policy * chore: add integration tests * chore: add an example * chore: add `global.d.ts`
1 parent 388d2ea commit 3cddc61

File tree

17 files changed

+410
-113
lines changed

17 files changed

+410
-113
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,5 @@ scripts/debug.sh
1212

1313
node_modules/
1414
.DS_Store
15-
eszip.bin
15+
eszip.bin
16+
deno.lock

crates/base/src/deno_runtime.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ use sb_core::external_memory::CustomAllocator;
6969
use sb_core::net::sb_core_net;
7070
use sb_core::permissions::{sb_core_permissions, Permissions};
7171
use sb_core::runtime::sb_core_runtime;
72-
use sb_core::{sb_core_main_js, MemCheckWaker};
72+
use sb_core::{sb_core_main_js, MemCheckWaker, PromiseMetrics};
7373
use sb_env::sb_env as sb_env_op;
7474
use sb_fs::deno_compile_fs::DenoCompileFileSystem;
7575
use sb_graph::emitter::EmitterFactory;
@@ -254,6 +254,7 @@ pub struct DenoRuntime<RuntimeContext = ()> {
254254

255255
main_module_id: ModuleId,
256256
maybe_inspector: Option<Inspector>,
257+
promise_metrics: PromiseMetrics,
257258

258259
mem_check: Arc<MemCheck>,
259260
waker: Arc<AtomicWaker>,
@@ -322,6 +323,7 @@ where
322323
// TODO(Nyannyacha): Make sure `service_path` is an absolute path first.
323324

324325
let drop_token = CancellationToken::default();
326+
let promise_metrics = PromiseMetrics::default();
325327

326328
let base_dir_path = std::env::current_dir().map(|p| p.join(&service_path))?;
327329
let Ok(mut main_module_url) = Url::from_directory_path(&base_dir_path) else {
@@ -709,19 +711,12 @@ where
709711

710712
{
711713
let main_context = js_runtime.main_context();
712-
713714
let op_state = js_runtime.op_state();
714715
let mut op_state = op_state.borrow_mut();
715716

716717
op_state.put(dispatch_fns);
718+
op_state.put(promise_metrics.clone());
717719
op_state.put(GlobalMainContext(main_context));
718-
}
719-
720-
let version: Option<&str> = option_env!("GIT_V_TAG");
721-
722-
{
723-
let op_state_rc = js_runtime.op_state();
724-
let mut op_state = op_state_rc.borrow_mut();
725720

726721
// NOTE(Andreespirela): We do this because "NODE_DEBUG" is trying to be read during
727722
// initialization, But we need the gotham state to be up-to-date.
@@ -739,7 +734,7 @@ where
739734
// 2: isEventsWorker
740735
conf.is_events_worker(),
741736
// 3: edgeRuntimeVersion
742-
version.unwrap_or("0.1.0"),
737+
option_env!("GIT_V_TAG").unwrap_or("0.1.0"),
743738
// 4: denoVersion
744739
MAYBE_DENO_VERSION
745740
.get()
@@ -884,6 +879,7 @@ where
884879

885880
main_module_id,
886881
maybe_inspector,
882+
promise_metrics,
887883

888884
mem_check,
889885
waker: Arc::default(),
@@ -1210,6 +1206,10 @@ where
12101206
self.maybe_inspector.clone()
12111207
}
12121208

1209+
pub fn promise_metrics(&self) -> PromiseMetrics {
1210+
self.promise_metrics.clone()
1211+
}
1212+
12131213
pub fn mem_check_state(&self) -> Arc<RwLock<MemCheckState>> {
12141214
self.mem_check.state.clone()
12151215
}

crates/base/src/rt_worker/supervisor/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use deno_core::v8;
88
use enum_as_inner::EnumAsInner;
99
use futures_util::task::AtomicWaker;
1010
use log::{error, warn};
11+
use sb_core::PromiseMetrics;
1112
use sb_workers::context::{Timing, UserWorkerMsgs, UserWorkerRuntimeOpts};
1213
use tokio::sync::{
1314
mpsc::{self, UnboundedReceiver},
@@ -129,6 +130,7 @@ pub struct Arguments {
129130
pub cpu_usage_metrics_rx: Option<mpsc::UnboundedReceiver<CPUUsageMetrics>>,
130131
pub cpu_timer_param: CPUTimerParam,
131132
pub supervisor_policy: SupervisorPolicy,
133+
pub promise_metrics: PromiseMetrics,
132134
pub timing: Option<Timing>,
133135
pub memory_limit_rx: mpsc::UnboundedReceiver<()>,
134136
pub pool_msg_tx: Option<mpsc::UnboundedSender<UserWorkerMsgs>>,

crates/base/src/rt_worker/supervisor/strategy_per_worker.rs

Lines changed: 54 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ pub async fn supervise(args: Arguments) -> (ShutdownReason, i64) {
1818
let Arguments {
1919
key,
2020
runtime_opts,
21+
promise_metrics,
2122
timing,
2223
mut memory_limit_rx,
2324
cpu_timer,
@@ -55,11 +56,13 @@ pub async fn supervise(args: Arguments) -> (ShutdownReason, i64) {
5556
let is_wall_clock_limit_disabled = wall_clock_limit_ms == 0;
5657
let mut is_worker_entered = false;
5758
let mut is_wall_clock_beforeunload_armed = false;
59+
let mut is_cpu_time_soft_limit_reached = false;
60+
let mut is_termination_requested = false;
61+
let mut have_all_reqs_been_acknowledged = false;
5862

5963
let mut cpu_usage_metrics_rx = cpu_usage_metrics_rx.unwrap();
6064
let mut cpu_usage_ms = 0i64;
6165

62-
let mut cpu_time_soft_limit_reached = false;
6366
let mut wall_clock_alerts = 0;
6467
let mut req_ack_count = 0usize;
6568

@@ -113,20 +116,23 @@ pub async fn supervise(args: Arguments) -> (ShutdownReason, i64) {
113116
tokio::pin!(wall_clock_duration_alert);
114117
tokio::pin!(wall_clock_beforeunload_alert);
115118

116-
loop {
119+
let result = 'scope: loop {
117120
tokio::select! {
118121
_ = supervise.cancelled() => {
119-
return (ShutdownReason::TerminationRequested, cpu_usage_ms);
122+
break 'scope (ShutdownReason::TerminationRequested, cpu_usage_ms);
120123
}
121124

122125
_ = async {
123126
match termination.as_ref() {
124127
Some(token) => token.inbound.cancelled().await,
125128
None => pending().await,
126129
}
127-
} => {
128-
terminate_fn();
129-
return (ShutdownReason::TerminationRequested, cpu_usage_ms);
130+
}, if !is_termination_requested => {
131+
is_termination_requested = true;
132+
if promise_metrics.have_all_promises_been_resolved() {
133+
terminate_fn();
134+
break 'scope (ShutdownReason::TerminationRequested, cpu_usage_ms);
135+
}
130136
}
131137

132138
Some(metrics) = cpu_usage_metrics_rx.recv() => {
@@ -160,17 +166,28 @@ pub async fn supervise(args: Arguments) -> (ShutdownReason, i64) {
160166
if cpu_usage_ms >= hard_limit_ms as i64 {
161167
terminate_fn();
162168
error!("CPU time hard limit reached: isolate: {:?}", key);
163-
return (ShutdownReason::CPUTime, cpu_usage_ms);
164-
} else if cpu_usage_ms >= soft_limit_ms as i64 && !cpu_time_soft_limit_reached {
169+
break 'scope (ShutdownReason::CPUTime, cpu_usage_ms);
170+
} else if cpu_usage_ms >= soft_limit_ms as i64 && !is_cpu_time_soft_limit_reached {
165171
early_retire_fn();
166172
error!("CPU time soft limit reached: isolate: {:?}", key);
167-
cpu_time_soft_limit_reached = true;
168173

169-
if req_ack_count == demand.load(Ordering::Acquire) {
174+
is_cpu_time_soft_limit_reached = true;
175+
have_all_reqs_been_acknowledged = req_ack_count == demand.load(Ordering::Acquire);
176+
177+
if have_all_reqs_been_acknowledged
178+
&& promise_metrics.have_all_promises_been_resolved()
179+
{
170180
terminate_fn();
171181
error!("early termination due to the last request being completed: isolate: {:?}", key);
172-
return (ShutdownReason::EarlyDrop, cpu_usage_ms);
182+
break 'scope (ShutdownReason::EarlyDrop, cpu_usage_ms);
173183
}
184+
185+
} else if is_cpu_time_soft_limit_reached
186+
&& have_all_reqs_been_acknowledged
187+
&& promise_metrics.have_all_promises_been_resolved()
188+
{
189+
terminate_fn();
190+
break 'scope (ShutdownReason::EarlyDrop, cpu_usage_ms);
174191
}
175192
}
176193
}
@@ -179,42 +196,50 @@ pub async fn supervise(args: Arguments) -> (ShutdownReason, i64) {
179196

180197
Some(_) = wait_cpu_alarm(cpu_alarms_rx.as_mut()) => {
181198
if is_worker_entered {
182-
if !cpu_time_soft_limit_reached {
199+
if !is_cpu_time_soft_limit_reached {
183200
early_retire_fn();
184201
error!("CPU time soft limit reached: isolate: {:?}", key);
185-
cpu_time_soft_limit_reached = true;
186202

187-
if req_ack_count == demand.load(Ordering::Acquire) {
203+
is_cpu_time_soft_limit_reached = true;
204+
have_all_reqs_been_acknowledged = req_ack_count == demand.load(Ordering::Acquire);
205+
206+
if have_all_reqs_been_acknowledged
207+
&& promise_metrics.have_all_promises_been_resolved()
208+
{
188209
terminate_fn();
189210
error!("early termination due to the last request being completed: isolate: {:?}", key);
190-
return (ShutdownReason::EarlyDrop, cpu_usage_ms);
211+
break 'scope (ShutdownReason::EarlyDrop, cpu_usage_ms);
191212
}
192213
} else {
193214
terminate_fn();
194215
error!("CPU time hard limit reached: isolate: {:?}", key);
195-
return (ShutdownReason::CPUTime, cpu_usage_ms);
216+
break 'scope (ShutdownReason::CPUTime, cpu_usage_ms);
196217
}
197218
}
198219
}
199220

200221
Some(_) = req_end_rx.recv() => {
201222
req_ack_count += 1;
223+
have_all_reqs_been_acknowledged = req_ack_count == demand.load(Ordering::Acquire);
202224

203-
if !cpu_time_soft_limit_reached {
225+
if !is_cpu_time_soft_limit_reached {
204226
if let Some(tx) = pool_msg_tx.clone() {
205227
if tx.send(UserWorkerMsgs::Idle(key)).is_err() {
206228
error!("failed to send idle msg to pool: {:?}", key);
207229
}
208230
}
209231
}
210232

211-
if !cpu_time_soft_limit_reached || req_ack_count != demand.load(Ordering::Acquire) {
233+
if !is_cpu_time_soft_limit_reached
234+
|| !have_all_reqs_been_acknowledged
235+
|| !promise_metrics.have_all_promises_been_resolved()
236+
{
212237
continue;
213238
}
214239

215240
terminate_fn();
216241
error!("early termination due to the last request being completed: isolate: {:?}", key);
217-
return (ShutdownReason::EarlyDrop, cpu_usage_ms);
242+
break 'scope (ShutdownReason::EarlyDrop, cpu_usage_ms);
218243
}
219244

220245
_ = wall_clock_duration_alert.tick(), if !is_wall_clock_limit_disabled => {
@@ -229,10 +254,8 @@ pub async fn supervise(args: Arguments) -> (ShutdownReason, i64) {
229254
let is_in_flight_req_exists = req_ack_count != demand.load(Ordering::Acquire);
230255

231256
terminate_fn();
232-
233257
error!("wall clock duration reached: isolate: {:?} (in_flight_req_exists = {})", key, is_in_flight_req_exists);
234-
235-
return (ShutdownReason::WallClockTime, cpu_usage_ms);
258+
break 'scope (ShutdownReason::WallClockTime, cpu_usage_ms);
236259
}
237260
}
238261

@@ -252,8 +275,16 @@ pub async fn supervise(args: Arguments) -> (ShutdownReason, i64) {
252275
Some(_) = memory_limit_rx.recv() => {
253276
terminate_fn();
254277
error!("memory limit reached for the worker: isolate: {:?}", key);
255-
return (ShutdownReason::Memory, cpu_usage_ms);
278+
break 'scope (ShutdownReason::Memory, cpu_usage_ms);
256279
}
257280
}
281+
};
282+
283+
match result {
284+
(ShutdownReason::EarlyDrop, cpu_usage_ms) if is_termination_requested => {
285+
(ShutdownReason::TerminationRequested, cpu_usage_ms)
286+
}
287+
288+
result => result,
258289
}
259290
}

crates/base/src/rt_worker/worker.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -242,12 +242,12 @@ impl Worker {
242242
},
243243
));
244244

245-
if !thread_safe_handle.request_interrupt(
246-
supervisor::v8_handle_termination,
247-
data_ptr_mut as *mut std::ffi::c_void,
248-
) {
249-
drop(unsafe { Box::from_raw(data_ptr_mut) });
250-
}
245+
if !thread_safe_handle.request_interrupt(
246+
supervisor::v8_handle_termination,
247+
data_ptr_mut as *mut std::ffi::c_void,
248+
) {
249+
drop(unsafe { Box::from_raw(data_ptr_mut) });
250+
}
251251

252252
while !is_terminated.is_raised() {
253253
waker.wake();

crates/base/src/rt_worker/worker_ctx.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@ pub fn create_supervisor(
352352
let _rt_guard = base_rt::SUPERVISOR_RT.enter();
353353
let maybe_cpu_timer_inner = maybe_cpu_timer.clone();
354354
let supervise_cancel_token_inner = supervise_cancel_token.clone();
355+
let promise_metrics = worker_runtime.promise_metrics();
355356

356357
tokio::spawn(async move {
357358
let (isolate_memory_usage_tx, isolate_memory_usage_rx) =
@@ -364,6 +365,7 @@ pub fn create_supervisor(
364365
cpu_usage_metrics_rx,
365366
cpu_timer_param,
366367
supervisor_policy,
368+
promise_metrics,
367369
timing,
368370
memory_limit_rx,
369371
pool_msg_tx,

crates/base/test_cases/main/index.ts

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,20 @@
11
console.log('main function started');
22

3-
Deno.serve(async (req: Request) => {
3+
function parseIntFromHeadersOrDefault(req: Request, key: string, val: number) {
4+
const headerValue = req.headers.get(key);
5+
if (!headerValue) {
6+
return val;
7+
}
8+
9+
const parsedValue = parseInt(headerValue);
10+
if (isNaN(parsedValue)) {
11+
return val;
12+
}
13+
14+
return parsedValue;
15+
}
16+
17+
Deno.serve((req: Request) => {
418
console.log(req.url);
519
const url = new URL(req.url);
620
const { pathname } = url;
@@ -19,10 +33,11 @@ Deno.serve(async (req: Request) => {
1933
console.error(`serving the request with ${servicePath}`);
2034

2135
const createWorker = async () => {
22-
const memoryLimitMb = 150;
23-
const workerTimeoutMs = 10 * 60 * 1000;
24-
const cpuTimeSoftLimitMs = 10 * 60 * 1000;
25-
const cpuTimeHardLimitMs = 10 * 60 * 1000;
36+
const memoryLimitMb = parseIntFromHeadersOrDefault(req, "x-memory-limit-mb", 150);
37+
const workerTimeoutMs = parseIntFromHeadersOrDefault(req, "x-worker-timeout-ms", 10 * 60 * 1000);
38+
const cpuTimeSoftLimitMs = parseIntFromHeadersOrDefault(req, "x-cpu-time-soft-limit-ms", 10 * 60 * 1000);
39+
const cpuTimeHardLimitMs = parseIntFromHeadersOrDefault(req, "x-cpu-time-hard-limit-ms", 10 * 60 * 1000);
40+
console.log(cpuTimeSoftLimitMs);
2641
const noModuleCache = false;
2742
const importMapPath = null;
2843
const envVarsObj = Deno.env.toObject();
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
function sleep(ms: number): Promise<string> {
2+
return new Promise(res => {
3+
setTimeout(() => {
4+
res("meow");
5+
}, ms)
6+
});
7+
}
8+
9+
function mySlowFunction(baseNumber: number) {
10+
const now = Date.now();
11+
let result = 0;
12+
for (let i = Math.pow(baseNumber, 7); i >= 0; i--) {
13+
result += Math.atan(i) * Math.tan(i);
14+
}
15+
const duration = Date.now() - now;
16+
return { result: result, duration: duration };
17+
}
18+
19+
class MyBackgroundTaskEvent extends Event {
20+
readonly taskPromise: Promise<string>
21+
22+
constructor(taskPromise: Promise<string>) {
23+
super('myBackgroundTask')
24+
this.taskPromise = taskPromise
25+
}
26+
}
27+
28+
globalThis.addEventListener('myBackgroundTask', async (event) => {
29+
const str = await (event as MyBackgroundTaskEvent).taskPromise
30+
console.log(str);
31+
});
32+
33+
34+
export default {
35+
fetch() {
36+
// consumes lots of cpu time
37+
mySlowFunction(10);
38+
// however, this time we did not notify the runtime that it should wait for this promise.
39+
// therefore, the above console.log(str) will not be output and the worker will terminate.
40+
dispatchEvent(new MyBackgroundTaskEvent(sleep(5000)));
41+
return new Response();
42+
}
43+
}

0 commit comments

Comments
 (0)