From 880246618b6c28adc76e5e68247aa5a9302320f8 Mon Sep 17 00:00:00 2001 From: Ben Blum Date: Wed, 31 Jul 2013 15:56:18 -0400 Subject: [PATCH 1/4] Relax some atomic barriers. Loosen up all that tension. There, doesn't that feel good? --- src/libstd/rt/comm.rs | 12 ++++++------ src/libstd/rt/kill.rs | 23 +++++++++++++---------- src/libstd/unstable/sync.rs | 10 ++++------ 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/src/libstd/rt/comm.rs b/src/libstd/rt/comm.rs index a27ff559b2ba1..11ba42f805b55 100644 --- a/src/libstd/rt/comm.rs +++ b/src/libstd/rt/comm.rs @@ -18,7 +18,7 @@ use kinds::Send; use rt::sched::Scheduler; use rt::local::Local; use rt::select::{Select, SelectPort}; -use unstable::atomics::{AtomicUint, AtomicOption, Acquire, Release, SeqCst}; +use unstable::atomics::{AtomicUint, AtomicOption, Acquire, Relaxed, SeqCst}; use unstable::sync::UnsafeAtomicRcBox; use util::Void; use comm::{GenericChan, GenericSmartChan, GenericPort, Peekable}; @@ -216,15 +216,15 @@ impl Select for PortOne { } STATE_ONE => { // Re-record that we are the only owner of the packet. - // Release barrier needed in case the task gets reawoken - // on a different core (this is analogous to writing a - // payload; a barrier in enqueueing the task protects it). + // No barrier needed, even if the task gets reawoken + // on a different core -- this is analogous to writing a + // payload; a barrier in enqueueing the task protects it. // NB(#8132). This *must* occur before the enqueue below. // FIXME(#6842, #8130) This is usually only needed for the // assertion in recv_ready, except in the case of select(). // This won't actually ever have cacheline contention, but // maybe should be optimized out with a cfg(test) anyway? - (*self.packet()).state.store(STATE_ONE, Release); + (*self.packet()).state.store(STATE_ONE, Relaxed); rtdebug!("rendezvous recv"); sched.metrics.rendezvous_recvs += 1; @@ -299,7 +299,7 @@ impl SelectPort for PortOne { unsafe { // See corresponding store() above in block_on for rationale. // FIXME(#8130) This can happen only in test builds. - assert!((*packet).state.load(Acquire) == STATE_ONE); + assert!((*packet).state.load(Relaxed) == STATE_ONE); let payload = (*packet).payload.take(); diff --git a/src/libstd/rt/kill.rs b/src/libstd/rt/kill.rs index c2571f171a172..729c682dbd114 100644 --- a/src/libstd/rt/kill.rs +++ b/src/libstd/rt/kill.rs @@ -17,7 +17,7 @@ use option::{Option, Some, None}; use prelude::*; use rt::task::Task; use to_bytes::IterBytes; -use unstable::atomics::{AtomicUint, Acquire, SeqCst}; +use unstable::atomics::{AtomicUint, Relaxed}; use unstable::sync::{UnsafeAtomicRcBox, LittleLock}; use util; @@ -95,7 +95,7 @@ impl Drop for KillFlag { // Letting a KillFlag with a task inside get dropped would leak the task. // We could free it here, but the task should get awoken by hand somehow. fn drop(&self) { - match self.load(Acquire) { + match self.load(Relaxed) { KILL_RUNNING | KILL_KILLED => { }, _ => rtabort!("can't drop kill flag with a blocked task inside!"), } @@ -124,7 +124,7 @@ impl BlockedTask { Unkillable(task) => Some(task), Killable(flag_arc) => { let flag = unsafe { &mut **flag_arc.get() }; - match flag.swap(KILL_RUNNING, SeqCst) { + match flag.swap(KILL_RUNNING, Relaxed) { KILL_RUNNING => None, // woken from select(), perhaps KILL_KILLED => None, // a killer stole it already task_ptr => @@ -159,7 +159,7 @@ impl BlockedTask { let flag = &mut **flag_arc.get(); let task_ptr = cast::transmute(task); // Expect flag to contain RUNNING. If KILLED, it should stay KILLED. - match flag.compare_and_swap(KILL_RUNNING, task_ptr, SeqCst) { + match flag.compare_and_swap(KILL_RUNNING, task_ptr, Relaxed) { KILL_RUNNING => Right(Killable(flag_arc)), KILL_KILLED => Left(revive_task_ptr(task_ptr, Some(flag_arc))), x => rtabort!("can't block task! kill flag = %?", x), @@ -257,7 +257,7 @@ impl KillHandle { let inner = unsafe { &mut *self.get() }; // Expect flag to contain RUNNING. If KILLED, it should stay KILLED. // FIXME(#7544)(bblum): is it really necessary to prohibit double kill? - match inner.unkillable.compare_and_swap(KILL_RUNNING, KILL_UNKILLABLE, SeqCst) { + match inner.unkillable.compare_and_swap(KILL_RUNNING, KILL_UNKILLABLE, Relaxed) { KILL_RUNNING => { }, // normal case KILL_KILLED => if !already_failing { fail!(KILLED_MSG) }, _ => rtabort!("inhibit_kill: task already unkillable"), @@ -270,7 +270,7 @@ impl KillHandle { let inner = unsafe { &mut *self.get() }; // Expect flag to contain UNKILLABLE. If KILLED, it should stay KILLED. // FIXME(#7544)(bblum): is it really necessary to prohibit double kill? - match inner.unkillable.compare_and_swap(KILL_UNKILLABLE, KILL_RUNNING, SeqCst) { + match inner.unkillable.compare_and_swap(KILL_UNKILLABLE, KILL_RUNNING, Relaxed) { KILL_UNKILLABLE => { }, // normal case KILL_KILLED => if !already_failing { fail!(KILLED_MSG) }, _ => rtabort!("allow_kill: task already killable"), @@ -281,10 +281,10 @@ impl KillHandle { // if it was blocked and needs punted awake. To be called by other tasks. pub fn kill(&mut self) -> Option<~Task> { let inner = unsafe { &mut *self.get() }; - if inner.unkillable.swap(KILL_KILLED, SeqCst) == KILL_RUNNING { + if inner.unkillable.swap(KILL_KILLED, Relaxed) == KILL_RUNNING { // Got in. Allowed to try to punt the task awake. let flag = unsafe { &mut *inner.killed.get() }; - match flag.swap(KILL_KILLED, SeqCst) { + match flag.swap(KILL_KILLED, Relaxed) { // Task either not blocked or already taken care of. KILL_RUNNING | KILL_KILLED => None, // Got ownership of the blocked task. @@ -306,8 +306,11 @@ impl KillHandle { // is unkillable with a kill signal pending. let inner = unsafe { &*self.get() }; let flag = unsafe { &*inner.killed.get() }; - // FIXME(#6598): can use relaxed ordering (i think) - flag.load(Acquire) == KILL_KILLED + // A barrier-related concern here is that a task that gets killed + // awake needs to see the killer's write of KILLED to this flag. This + // is analogous to receiving a pipe payload; the appropriate barrier + // should happen when enqueueing the task. + flag.load(Relaxed) == KILL_KILLED } pub fn notify_immediate_failure(&mut self) { diff --git a/src/libstd/unstable/sync.rs b/src/libstd/unstable/sync.rs index e865d3a467dd4..04ada5ec07be6 100644 --- a/src/libstd/unstable/sync.rs +++ b/src/libstd/unstable/sync.rs @@ -16,7 +16,7 @@ use ptr; use option::*; use either::{Either, Left, Right}; use task; -use unstable::atomics::{AtomicOption,AtomicUint,Acquire,Release,SeqCst}; +use unstable::atomics::{AtomicOption,AtomicUint,Acquire,Release,Relaxed,SeqCst}; use unstable::finally::Finally; use ops::Drop; use clone::Clone; @@ -95,8 +95,7 @@ impl UnsafeAtomicRcBox { pub fn get(&self) -> *mut T { unsafe { let mut data: ~AtomicRcBoxData = cast::transmute(self.data); - // FIXME(#6598) Change Acquire to Relaxed. - assert!(data.count.load(Acquire) > 0); + assert!(data.count.load(Relaxed) > 0); let r: *mut T = data.data.get_mut_ref(); cast::forget(data); return r; @@ -107,7 +106,7 @@ impl UnsafeAtomicRcBox { pub fn get_immut(&self) -> *T { unsafe { let data: ~AtomicRcBoxData = cast::transmute(self.data); - assert!(data.count.load(Acquire) > 0); // no barrier is really needed + assert!(data.count.load(Relaxed) > 0); let r: *T = data.data.get_ref(); cast::forget(data); return r; @@ -130,8 +129,7 @@ impl UnsafeAtomicRcBox { // Try to put our server end in the unwrapper slot. // This needs no barrier -- it's protected by the release barrier on // the xadd, and the acquire+release barrier in the destructor's xadd. - // FIXME(#6598) Change Acquire to Relaxed. - if data.unwrapper.fill(~(c1,p2), Acquire).is_none() { + if data.unwrapper.fill(~(c1,p2), Relaxed).is_none() { // Got in. Tell this handle's destructor not to run (we are now it). this.data = ptr::mut_null(); // Drop our own reference. From 036a6d2f0080abd10521f7999af7100e5bcaaffe Mon Sep 17 00:00:00 2001 From: Ben Blum Date: Wed, 31 Jul 2013 19:14:59 -0400 Subject: [PATCH 2/4] Document task killing design and relaxed barrier rationale. --- src/libstd/rt/kill.rs | 58 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/src/libstd/rt/kill.rs b/src/libstd/rt/kill.rs index 729c682dbd114..208af522d80e1 100644 --- a/src/libstd/rt/kill.rs +++ b/src/libstd/rt/kill.rs @@ -8,7 +8,63 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -//! Task death: asynchronous killing, linked failure, exit code propagation. +/*! + +Task death: asynchronous killing, linked failure, exit code propagation. + +This file implements two orthogonal building-blocks for communicating failure +between tasks. One is 'linked failure' or 'task killing', that is, a failing +task causing other tasks to fail promptly (even those that are blocked on +pipes or I/O). The other is 'exit code propagation', which affects the result +observed by the parent of a task::try task that itself spawns child tasks +(such as any #[test] function). In both cases the data structures live in +KillHandle. + +I. Task killing. + +The model for killing involves two atomic flags, the "kill flag" and the +"unkillable flag". Operations on the kill flag include: + +- In the taskgroup code (task/spawn.rs), tasks store a clone of their + KillHandle in their shared taskgroup. Another task in the group that fails + will use that handle to call kill(). +- When a task blocks, it turns its ~Task into a BlockedTask by storing a + the transmuted ~Task pointer inside the KillHandle's kill flag. A task + trying to block and a task trying to kill it can simultaneously access the + kill flag, after which the task will get scheduled and fail (no matter who + wins the race). Likewise, a task trying to wake a blocked task normally and + a task trying to kill it can simultaneously access the flag; only one will + get the task to reschedule it. + +Operations on the unkillable flag include: + +- When a task becomes unkillable, it swaps on the flag to forbid any killer + from waking it up while it's blocked inside the unkillable section. If a + kill was already pending, the task fails instead of becoming unkillable. +- When a task is done being unkillable, it restores the flag to the normal + running state. If a kill was received-but-blocked during the unkillable + section, the task fails at this later point. +- When a task tries to kill another task, before swapping on the kill flag, it + first swaps on the unkillable flag, to see if it's "allowed" to wake up the + task. If it isn't, the killed task will receive the signal when it becomes + killable again. (Of course, a task trying to wake the task normally (e.g. + sending on a channel) does not access the unkillable flag at all.) + +Why do we not need acquire/release barriers on any of the kill flag swaps? +This is because barriers establish orderings between accesses on different +memory locations, but each kill-related operation is only a swap on a single +location, so atomicity is all that matters. The exception is kill(), which +does a swap on both flags in sequence. kill() needs no barriers because it +does not matter if its two accesses are seen reordered on another CPU: if a +killer does perform both writes, it means it saw a KILL_RUNNING in the +unkillable flag, which means an unkillable task will see KILL_KILLED and fail +immediately (rendering the subsequent write to the kill flag unnecessary). + +II. Exit code propagation. + +FIXME(#7544): Decide on the ultimate model for this and document it. + +*/ use cast; use cell::Cell; From aeaed77301397c6ab0c4f5413dbd4b7f0d5b5c9a Mon Sep 17 00:00:00 2001 From: Ben Blum Date: Wed, 31 Jul 2013 16:30:14 -0400 Subject: [PATCH 3/4] Make a forgotten assert in comm be cfg(test)-dependent --- src/libstd/rt/comm.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/libstd/rt/comm.rs b/src/libstd/rt/comm.rs index 11ba42f805b55..eb56e4a78c02c 100644 --- a/src/libstd/rt/comm.rs +++ b/src/libstd/rt/comm.rs @@ -375,9 +375,7 @@ impl Drop for PortOne { // receiver was killed awake. The task can't still be // blocked (we are it), but we need to free the handle. let recvr = BlockedTask::cast_from_uint(task_as_state); - // FIXME(#7554)(bblum): Make this cfg(test) dependent. - // in a later commit. - assert!(recvr.wake().is_none()); + recvr.assert_already_awake(); } } } From 963d37e821590b470f7a1fc9cfcda5a5ceceeee4 Mon Sep 17 00:00:00 2001 From: Ben Blum Date: Wed, 31 Jul 2013 19:48:38 -0400 Subject: [PATCH 4/4] Temporary workaround to prevent taskgroup cleanup code from failing without an exception handler. --- src/libstd/rt/kill.rs | 7 ++++++- src/libstd/rt/task.rs | 9 +++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/libstd/rt/kill.rs b/src/libstd/rt/kill.rs index 208af522d80e1..696f4a8c35515 100644 --- a/src/libstd/rt/kill.rs +++ b/src/libstd/rt/kill.rs @@ -72,6 +72,7 @@ use either::{Either, Left, Right}; use option::{Option, Some, None}; use prelude::*; use rt::task::Task; +use task::spawn::Taskgroup; use to_bytes::IterBytes; use unstable::atomics::{AtomicUint, Relaxed}; use unstable::sync::{UnsafeAtomicRcBox, LittleLock}; @@ -474,7 +475,7 @@ impl Death { } /// Collect failure exit codes from children and propagate them to a parent. - pub fn collect_failure(&mut self, mut success: bool) { + pub fn collect_failure(&mut self, mut success: bool, group: Option) { // This may run after the task has already failed, so even though the // task appears to need to be killed, the scheduler should not fail us // when we block to unwrap. @@ -484,6 +485,10 @@ impl Death { rtassert!(self.unkillable == 0); self.unkillable = 1; + // FIXME(#7544): See corresponding fixme at the callsite in task.rs. + // NB(#8192): Doesn't work with "let _ = ..." + { use util; util::ignore(group); } + // Step 1. Decide if we need to collect child failures synchronously. do self.on_exit.take_map |on_exit| { if success { diff --git a/src/libstd/rt/task.rs b/src/libstd/rt/task.rs index c1b799796d158..f7f1b10e58c76 100644 --- a/src/libstd/rt/task.rs +++ b/src/libstd/rt/task.rs @@ -129,8 +129,13 @@ impl Task { } self.unwinder.try(f); - { let _ = self.taskgroup.take(); } - self.death.collect_failure(!self.unwinder.unwinding); + // FIXME(#7544): We pass the taskgroup into death so that it can be + // dropped while the unkillable counter is set. This should not be + // necessary except for an extraneous clone() in task/spawn.rs that + // causes a killhandle to get dropped, which mustn't receive a kill + // signal since we're outside of the unwinder's try() scope. + // { let _ = self.taskgroup.take(); } + self.death.collect_failure(!self.unwinder.unwinding, self.taskgroup.take()); self.destroy(); }