From b9ffd58f73d25bd3a512b50cfb426ace3736ab47 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 24 Apr 2024 18:50:59 -0700 Subject: [PATCH 1/3] Fix data races when writing / reading `ob_gc_bits` Use relaxed atomics when reading / writing to the field. There are still a few places in the GC where we do not use atomics. Those should be safe as the world is stopped. Sample races reported by TSAN: * [Race between `_PyObject_GC_IS_SHARED()` and `_PyObject_GC_SET_SHARED()`](https://gist.github.com/mpage/ebfd022773f39c33bec68c0993a2249e) * [Race between `_PyObject_GC_TRACK()` and `_PyObject_GC_IS_SHARED()`](https://gist.github.com/mpage/b53b5c22a2f923bf1571b887bb31ef88) --- Include/cpython/pyatomic.h | 15 +++++++++++++ Include/cpython/pyatomic_gcc.h | 16 ++++++++++++++ Include/cpython/pyatomic_msc.h | 21 +++++++++++++++++- Include/cpython/pyatomic_std.h | 20 +++++++++++++++++ Include/internal/pycore_gc.h | 38 +++++++++++++++++++++++++------- Include/internal/pycore_object.h | 6 ++--- Objects/object.c | 2 +- 7 files changed, 105 insertions(+), 13 deletions(-) diff --git a/Include/cpython/pyatomic.h b/Include/cpython/pyatomic.h index 69083f1d9dd0c2..7a87265cee7419 100644 --- a/Include/cpython/pyatomic.h +++ b/Include/cpython/pyatomic.h @@ -250,6 +250,14 @@ static inline uintptr_t _Py_atomic_and_uintptr(uintptr_t *obj, uintptr_t value); +// --- _Py_atomic_and_relaxed ------------------------------------------------ +// Performs `*obj &= value` atomically and returns the previous value of `*obj` +// (relaxed consistency). + +static inline uint8_t +_Py_atomic_and_uint8_relaxed(uint8_t *obj, uint8_t value); + + // --- _Py_atomic_or --------------------------------------------------------- // Performs `*obj |= value` atomically and returns the previous value of `*obj`. @@ -269,6 +277,13 @@ static inline uintptr_t _Py_atomic_or_uintptr(uintptr_t *obj, uintptr_t value); +// --- _Py_atomic_or_relaxed ------------------------------------------------- +// Performs `*obj |= value` atomically and returns the previous value of `*obj` +// (relaxed consistency). + +static inline uint8_t +_Py_atomic_or_uint8_relaxed(uint8_t *obj, uint8_t value); + // --- _Py_atomic_load ------------------------------------------------------- // Atomically loads `*obj` (sequential consistency) diff --git a/Include/cpython/pyatomic_gcc.h b/Include/cpython/pyatomic_gcc.h index af78a94c736545..fc86e4dfd0a470 100644 --- a/Include/cpython/pyatomic_gcc.h +++ b/Include/cpython/pyatomic_gcc.h @@ -218,6 +218,13 @@ _Py_atomic_and_uintptr(uintptr_t *obj, uintptr_t value) { return __atomic_fetch_and(obj, value, __ATOMIC_SEQ_CST); } +// --- _Py_atomic_and_relaxed ------------------------------------------------ + +static inline uint8_t +_Py_atomic_and_uint8_relaxed(uint8_t *obj, uint8_t value) +{ return __atomic_fetch_and(obj, value, __ATOMIC_RELAXED); } + + // --- _Py_atomic_or --------------------------------------------------------- static inline uint8_t @@ -241,6 +248,15 @@ _Py_atomic_or_uintptr(uintptr_t *obj, uintptr_t value) { return __atomic_fetch_or(obj, value, __ATOMIC_SEQ_CST); } +// --- _Py_atomic_or_relaxed ------------------------------------------------- + +static inline uint8_t +_Py_atomic_or_uint8_relaxed(uint8_t *obj, uint8_t value) +{ + return __atomic_fetch_or(obj, value, __ATOMIC_RELAXED); +} + + // --- _Py_atomic_load ------------------------------------------------------- static inline int diff --git a/Include/cpython/pyatomic_msc.h b/Include/cpython/pyatomic_msc.h index 212cd7817d01c5..bf65309e329da6 100644 --- a/Include/cpython/pyatomic_msc.h +++ b/Include/cpython/pyatomic_msc.h @@ -450,6 +450,16 @@ _Py_atomic_and_uintptr(uintptr_t *obj, uintptr_t value) } +// --- _Py_atomic_and_relaxed ------------------------------------------------ + +static inline uint8_t +_Py_atomic_and_uint8_relaxed(uint8_t *obj, uint8_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(char); + return (uint8_t)_InterlockedAnd8NoFence((volatile char *)obj, (char)value); +} + + // --- _Py_atomic_or --------------------------------------------------------- static inline uint8_t @@ -490,7 +500,6 @@ _Py_atomic_or_uint64(uint64_t *obj, uint64_t value) #endif } - static inline uintptr_t _Py_atomic_or_uintptr(uintptr_t *obj, uintptr_t value) { @@ -506,6 +515,16 @@ _Py_atomic_or_uintptr(uintptr_t *obj, uintptr_t value) } +// --- _Py_atomic_or_relaxed ------------------------------------------------- + +static inline uint8_t +_Py_atomic_or_uint8_relaxed(uint8_t *obj, uint8_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(char); + return (uint8_t)_InterlockedOr8NoFence((volatile char *)obj, (char)value); +} + + // --- _Py_atomic_load ------------------------------------------------------- static inline uint8_t diff --git a/Include/cpython/pyatomic_std.h b/Include/cpython/pyatomic_std.h index 6a77eae536d8dd..072a7a789f6004 100644 --- a/Include/cpython/pyatomic_std.h +++ b/Include/cpython/pyatomic_std.h @@ -366,6 +366,16 @@ _Py_atomic_and_uintptr(uintptr_t *obj, uintptr_t value) } +// --- _Py_atomic_and -------------------------------------------------------- + +static inline uint8_t +_Py_atomic_and_uint8_relaxed(uint8_t *obj, uint8_t value) +{ + _Py_USING_STD; + return atomic_fetch_and_explicit((_Atomic(uint8_t)*)obj, value, memory_order_relaxed); +} + + // --- _Py_atomic_or --------------------------------------------------------- static inline uint8_t @@ -404,6 +414,16 @@ _Py_atomic_or_uintptr(uintptr_t *obj, uintptr_t value) } +// --- _Py_atomic_or_relaxed ------------------------------------------------- + +static inline uint8_t +_Py_atomic_or_uint8_relaxed(uint8_t *obj, uint8_t value) +{ + _Py_USING_STD; + return atomic_fetch_or_explicit((_Atomic(uint8_t)*)obj, value, memory_order_relaxed); +} + + // --- _Py_atomic_load ------------------------------------------------------- static inline int diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 9e465fdd86279f..98a610c301696d 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -48,10 +48,32 @@ static inline PyObject* _Py_FROM_GC(PyGC_Head *gc) { # define _PyGC_BITS_DEFERRED (64) // Use deferred reference counting #endif +#ifdef Py_GIL_DISABLED + +static inline void +_PyObject_SET_GC_BITS(PyObject *op, uint8_t bits) +{ + _Py_atomic_or_uint8_relaxed(&op->ob_gc_bits, bits); +} + +static inline int +_PyObject_HAS_GC_BITS(PyObject *op, uint8_t bits) +{ + return (_Py_atomic_load_uint8_relaxed(&op->ob_gc_bits) & bits) != 0; +} + +static inline void +_PyObject_CLEAR_GC_BITS(PyObject *op, uint8_t bits) +{ + _Py_atomic_and_uint8_relaxed(&op->ob_gc_bits, ~bits); +} + +#endif + /* True if the object is currently tracked by the GC. */ static inline int _PyObject_GC_IS_TRACKED(PyObject *op) { #ifdef Py_GIL_DISABLED - return (op->ob_gc_bits & _PyGC_BITS_TRACKED) != 0; + return _PyObject_HAS_GC_BITS(op, _PyGC_BITS_TRACKED); #else PyGC_Head *gc = _Py_AS_GC(op); return (gc->_gc_next != 0); @@ -80,12 +102,12 @@ static inline int _PyObject_GC_MAY_BE_TRACKED(PyObject *obj) { * for calling _PyMem_FreeDelayed on the referenced * memory. */ static inline int _PyObject_GC_IS_SHARED(PyObject *op) { - return (op->ob_gc_bits & _PyGC_BITS_SHARED) != 0; + return _PyObject_HAS_GC_BITS(op, _PyGC_BITS_SHARED); } #define _PyObject_GC_IS_SHARED(op) _PyObject_GC_IS_SHARED(_Py_CAST(PyObject*, op)) static inline void _PyObject_GC_SET_SHARED(PyObject *op) { - op->ob_gc_bits |= _PyGC_BITS_SHARED; + _PyObject_SET_GC_BITS(op, _PyGC_BITS_SHARED); } #define _PyObject_GC_SET_SHARED(op) _PyObject_GC_SET_SHARED(_Py_CAST(PyObject*, op)) @@ -95,13 +117,13 @@ static inline void _PyObject_GC_SET_SHARED(PyObject *op) { * Objects with this bit that are GC objects will automatically * delay-freed by PyObject_GC_Del. */ static inline int _PyObject_GC_IS_SHARED_INLINE(PyObject *op) { - return (op->ob_gc_bits & _PyGC_BITS_SHARED_INLINE) != 0; + return _PyObject_HAS_GC_BITS(op, _PyGC_BITS_SHARED_INLINE); } #define _PyObject_GC_IS_SHARED_INLINE(op) \ _PyObject_GC_IS_SHARED_INLINE(_Py_CAST(PyObject*, op)) static inline void _PyObject_GC_SET_SHARED_INLINE(PyObject *op) { - op->ob_gc_bits |= _PyGC_BITS_SHARED_INLINE; + _PyObject_SET_GC_BITS(op, _PyGC_BITS_SHARED_INLINE); } #define _PyObject_GC_SET_SHARED_INLINE(op) \ _PyObject_GC_SET_SHARED_INLINE(_Py_CAST(PyObject*, op)) @@ -178,7 +200,7 @@ static inline void _PyGCHead_SET_PREV(PyGC_Head *gc, PyGC_Head *prev) { static inline int _PyGC_FINALIZED(PyObject *op) { #ifdef Py_GIL_DISABLED - return (op->ob_gc_bits & _PyGC_BITS_FINALIZED) != 0; + return _PyObject_HAS_GC_BITS(op, _PyGC_BITS_FINALIZED); #else PyGC_Head *gc = _Py_AS_GC(op); return ((gc->_gc_prev & _PyGC_PREV_MASK_FINALIZED) != 0); @@ -186,7 +208,7 @@ static inline int _PyGC_FINALIZED(PyObject *op) { } static inline void _PyGC_SET_FINALIZED(PyObject *op) { #ifdef Py_GIL_DISABLED - op->ob_gc_bits |= _PyGC_BITS_FINALIZED; + _PyObject_SET_GC_BITS(op, _PyGC_BITS_FINALIZED); #else PyGC_Head *gc = _Py_AS_GC(op); gc->_gc_prev |= _PyGC_PREV_MASK_FINALIZED; @@ -194,7 +216,7 @@ static inline void _PyGC_SET_FINALIZED(PyObject *op) { } static inline void _PyGC_CLEAR_FINALIZED(PyObject *op) { #ifdef Py_GIL_DISABLED - op->ob_gc_bits &= ~_PyGC_BITS_FINALIZED; + _PyObject_CLEAR_GC_BITS(op, _PyGC_BITS_FINALIZED); #else PyGC_Head *gc = _Py_AS_GC(op); gc->_gc_prev &= ~_PyGC_PREV_MASK_FINALIZED; diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 88b052f4544b15..24de91022ca693 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -168,7 +168,7 @@ static inline int _PyObject_HasDeferredRefcount(PyObject *op) { #ifdef Py_GIL_DISABLED - return (op->ob_gc_bits & _PyGC_BITS_DEFERRED) != 0; + return _PyObject_HAS_GC_BITS(op, _PyGC_BITS_DEFERRED); #else return 0; #endif @@ -320,7 +320,7 @@ static inline void _PyObject_GC_TRACK( "object already tracked by the garbage collector", filename, lineno, __func__); #ifdef Py_GIL_DISABLED - op->ob_gc_bits |= _PyGC_BITS_TRACKED; + _PyObject_SET_GC_BITS(op, _PyGC_BITS_TRACKED); #else PyGC_Head *gc = _Py_AS_GC(op); _PyObject_ASSERT_FROM(op, @@ -361,7 +361,7 @@ static inline void _PyObject_GC_UNTRACK( filename, lineno, __func__); #ifdef Py_GIL_DISABLED - op->ob_gc_bits &= ~_PyGC_BITS_TRACKED; + _PyObject_CLEAR_GC_BITS(op, _PyGC_BITS_TRACKED); #else PyGC_Head *gc = _Py_AS_GC(op); PyGC_Head *prev = _PyGCHead_PREV(gc); diff --git a/Objects/object.c b/Objects/object.c index 91bb0114cbfc32..d7e4838c8ec0e8 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2430,7 +2430,7 @@ _PyObject_SetDeferredRefcount(PyObject *op) assert(PyType_IS_GC(Py_TYPE(op))); assert(_Py_IsOwnedByCurrentThread(op)); assert(op->ob_ref_shared == 0); - op->ob_gc_bits |= _PyGC_BITS_DEFERRED; + _PyObject_SET_GC_BITS(op, _PyGC_BITS_DEFERRED); op->ob_ref_local += 1; op->ob_ref_shared = _Py_REF_QUEUED; #endif From ae8e5b8ef74d371b8423273916f2af3166465c1e Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 25 Apr 2024 11:55:33 -0700 Subject: [PATCH 2/3] Fix comment and remove suppressions --- Include/cpython/pyatomic_std.h | 2 +- Tools/tsan/suppressions_free_threading.txt | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/Include/cpython/pyatomic_std.h b/Include/cpython/pyatomic_std.h index 072a7a789f6004..aa01aa12190630 100644 --- a/Include/cpython/pyatomic_std.h +++ b/Include/cpython/pyatomic_std.h @@ -366,7 +366,7 @@ _Py_atomic_and_uintptr(uintptr_t *obj, uintptr_t value) } -// --- _Py_atomic_and -------------------------------------------------------- +// --- _Py_atomic_and_relaxed ------------------------------------------------ static inline uint8_t _Py_atomic_and_uint8_relaxed(uint8_t *obj, uint8_t value) diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 1408103ba80f96..7070e8550c29e7 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -22,9 +22,6 @@ race:_PyImport_AcquireLock race:_PyImport_ReleaseLock race:_PyInterpreterState_SetNotRunningMain race:_PyInterpreterState_IsRunningMain -race:_PyObject_GC_IS_SHARED -race:_PyObject_GC_SET_SHARED -race:_PyObject_GC_TRACK race:_PyType_HasFeature race:assign_version_tag race:compare_unicode_unicode From 3f354ebabb8507ad9f179dc288e012947cd62c39 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 6 May 2024 16:05:01 -0700 Subject: [PATCH 3/3] Use new locking discipline --- Include/cpython/pyatomic.h | 15 --------------- Include/cpython/pyatomic_gcc.h | 16 ---------------- Include/cpython/pyatomic_msc.h | 21 +-------------------- Include/cpython/pyatomic_std.h | 20 -------------------- Include/internal/pycore_gc.h | 20 +++++++++++++++----- 5 files changed, 16 insertions(+), 76 deletions(-) diff --git a/Include/cpython/pyatomic.h b/Include/cpython/pyatomic.h index 7a87265cee7419..69083f1d9dd0c2 100644 --- a/Include/cpython/pyatomic.h +++ b/Include/cpython/pyatomic.h @@ -250,14 +250,6 @@ static inline uintptr_t _Py_atomic_and_uintptr(uintptr_t *obj, uintptr_t value); -// --- _Py_atomic_and_relaxed ------------------------------------------------ -// Performs `*obj &= value` atomically and returns the previous value of `*obj` -// (relaxed consistency). - -static inline uint8_t -_Py_atomic_and_uint8_relaxed(uint8_t *obj, uint8_t value); - - // --- _Py_atomic_or --------------------------------------------------------- // Performs `*obj |= value` atomically and returns the previous value of `*obj`. @@ -277,13 +269,6 @@ static inline uintptr_t _Py_atomic_or_uintptr(uintptr_t *obj, uintptr_t value); -// --- _Py_atomic_or_relaxed ------------------------------------------------- -// Performs `*obj |= value` atomically and returns the previous value of `*obj` -// (relaxed consistency). - -static inline uint8_t -_Py_atomic_or_uint8_relaxed(uint8_t *obj, uint8_t value); - // --- _Py_atomic_load ------------------------------------------------------- // Atomically loads `*obj` (sequential consistency) diff --git a/Include/cpython/pyatomic_gcc.h b/Include/cpython/pyatomic_gcc.h index fc86e4dfd0a470..af78a94c736545 100644 --- a/Include/cpython/pyatomic_gcc.h +++ b/Include/cpython/pyatomic_gcc.h @@ -218,13 +218,6 @@ _Py_atomic_and_uintptr(uintptr_t *obj, uintptr_t value) { return __atomic_fetch_and(obj, value, __ATOMIC_SEQ_CST); } -// --- _Py_atomic_and_relaxed ------------------------------------------------ - -static inline uint8_t -_Py_atomic_and_uint8_relaxed(uint8_t *obj, uint8_t value) -{ return __atomic_fetch_and(obj, value, __ATOMIC_RELAXED); } - - // --- _Py_atomic_or --------------------------------------------------------- static inline uint8_t @@ -248,15 +241,6 @@ _Py_atomic_or_uintptr(uintptr_t *obj, uintptr_t value) { return __atomic_fetch_or(obj, value, __ATOMIC_SEQ_CST); } -// --- _Py_atomic_or_relaxed ------------------------------------------------- - -static inline uint8_t -_Py_atomic_or_uint8_relaxed(uint8_t *obj, uint8_t value) -{ - return __atomic_fetch_or(obj, value, __ATOMIC_RELAXED); -} - - // --- _Py_atomic_load ------------------------------------------------------- static inline int diff --git a/Include/cpython/pyatomic_msc.h b/Include/cpython/pyatomic_msc.h index bf65309e329da6..212cd7817d01c5 100644 --- a/Include/cpython/pyatomic_msc.h +++ b/Include/cpython/pyatomic_msc.h @@ -450,16 +450,6 @@ _Py_atomic_and_uintptr(uintptr_t *obj, uintptr_t value) } -// --- _Py_atomic_and_relaxed ------------------------------------------------ - -static inline uint8_t -_Py_atomic_and_uint8_relaxed(uint8_t *obj, uint8_t value) -{ - _Py_atomic_ASSERT_ARG_TYPE(char); - return (uint8_t)_InterlockedAnd8NoFence((volatile char *)obj, (char)value); -} - - // --- _Py_atomic_or --------------------------------------------------------- static inline uint8_t @@ -500,6 +490,7 @@ _Py_atomic_or_uint64(uint64_t *obj, uint64_t value) #endif } + static inline uintptr_t _Py_atomic_or_uintptr(uintptr_t *obj, uintptr_t value) { @@ -515,16 +506,6 @@ _Py_atomic_or_uintptr(uintptr_t *obj, uintptr_t value) } -// --- _Py_atomic_or_relaxed ------------------------------------------------- - -static inline uint8_t -_Py_atomic_or_uint8_relaxed(uint8_t *obj, uint8_t value) -{ - _Py_atomic_ASSERT_ARG_TYPE(char); - return (uint8_t)_InterlockedOr8NoFence((volatile char *)obj, (char)value); -} - - // --- _Py_atomic_load ------------------------------------------------------- static inline uint8_t diff --git a/Include/cpython/pyatomic_std.h b/Include/cpython/pyatomic_std.h index aa01aa12190630..6a77eae536d8dd 100644 --- a/Include/cpython/pyatomic_std.h +++ b/Include/cpython/pyatomic_std.h @@ -366,16 +366,6 @@ _Py_atomic_and_uintptr(uintptr_t *obj, uintptr_t value) } -// --- _Py_atomic_and_relaxed ------------------------------------------------ - -static inline uint8_t -_Py_atomic_and_uint8_relaxed(uint8_t *obj, uint8_t value) -{ - _Py_USING_STD; - return atomic_fetch_and_explicit((_Atomic(uint8_t)*)obj, value, memory_order_relaxed); -} - - // --- _Py_atomic_or --------------------------------------------------------- static inline uint8_t @@ -414,16 +404,6 @@ _Py_atomic_or_uintptr(uintptr_t *obj, uintptr_t value) } -// --- _Py_atomic_or_relaxed ------------------------------------------------- - -static inline uint8_t -_Py_atomic_or_uint8_relaxed(uint8_t *obj, uint8_t value) -{ - _Py_USING_STD; - return atomic_fetch_or_explicit((_Atomic(uint8_t)*)obj, value, memory_order_relaxed); -} - - // --- _Py_atomic_load ------------------------------------------------------- static inline int diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 7e2b432cb83c96..60582521db5bd7 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -37,7 +37,15 @@ static inline PyObject* _Py_FROM_GC(PyGC_Head *gc) { } -/* Bit flags for ob_gc_bits (in Py_GIL_DISABLED builds) */ +/* Bit flags for ob_gc_bits (in Py_GIL_DISABLED builds) + * + * Setting the bits requires a relaxed store. The per-object lock must also be + * held, except when the object is only visible to a single thread (e.g. during + * object initialization or destruction). + * + * Reading the bits requires using a relaxed load, but does not require holding + * the per-object lock. + */ #ifdef Py_GIL_DISABLED # define _PyGC_BITS_TRACKED (1) // Tracked by the GC # define _PyGC_BITS_FINALIZED (2) // tp_finalize was called @@ -51,9 +59,10 @@ static inline PyObject* _Py_FROM_GC(PyGC_Head *gc) { #ifdef Py_GIL_DISABLED static inline void -_PyObject_SET_GC_BITS(PyObject *op, uint8_t bits) +_PyObject_SET_GC_BITS(PyObject *op, uint8_t new_bits) { - _Py_atomic_or_uint8_relaxed(&op->ob_gc_bits, bits); + uint8_t bits = _Py_atomic_load_uint8_relaxed(&op->ob_gc_bits); + _Py_atomic_store_uint8_relaxed(&op->ob_gc_bits, bits | new_bits); } static inline int @@ -63,9 +72,10 @@ _PyObject_HAS_GC_BITS(PyObject *op, uint8_t bits) } static inline void -_PyObject_CLEAR_GC_BITS(PyObject *op, uint8_t bits) +_PyObject_CLEAR_GC_BITS(PyObject *op, uint8_t bits_to_clear) { - _Py_atomic_and_uint8_relaxed(&op->ob_gc_bits, ~bits); + uint8_t bits = _Py_atomic_load_uint8_relaxed(&op->ob_gc_bits); + _Py_atomic_store_uint8_relaxed(&op->ob_gc_bits, bits & ~bits_to_clear); } #endif