-
-
Notifications
You must be signed in to change notification settings - Fork 33.7k
gh-105766: Add Locking Around Custom Allocators #105619
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
fcbeb6d
fbe26eb
7191629
dd60989
0f2a242
321d1c2
52c01ea
a213597
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ | |
| #include "Python.h" | ||
| #include "pycore_code.h" // stats | ||
| #include "pycore_pystate.h" // _PyInterpreterState_GET | ||
| #include "pycore_ceval.h" // _PyEval_AcquireLock() | ||
|
|
||
| #include "pycore_obmalloc.h" | ||
| #include "pycore_pymem.h" | ||
|
|
@@ -209,6 +210,8 @@ _PyMem_ArenaFree(void *Py_UNUSED(ctx), void *ptr, | |
| #define _PyObject (_PyRuntime.allocators.standard.obj) | ||
| #define _PyMem_Debug (_PyRuntime.allocators.debug) | ||
| #define _PyObject_Arena (_PyRuntime.allocators.obj_arena) | ||
| #define _PyMem_Wrapped (_PyRuntime.allocators.wrapped_with_lock.mem) | ||
| #define _PyObject_Wrapped (_PyRuntime.allocators.wrapped_with_lock.obj) | ||
|
|
||
|
|
||
| /***************************/ | ||
|
|
@@ -530,14 +533,32 @@ PyMem_SetupDebugHooks(void) | |
| PyThread_release_lock(ALLOCATORS_MUTEX); | ||
| } | ||
|
|
||
| static int has_locking_wrapper(PyMemAllocatorEx *allocator); | ||
|
|
||
| static void | ||
| get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) | ||
| { | ||
| switch(domain) | ||
| { | ||
| case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break; | ||
| case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break; | ||
| case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break; | ||
| case PYMEM_DOMAIN_RAW: | ||
| *allocator = _PyMem_Raw; | ||
| break; | ||
| case PYMEM_DOMAIN_MEM: | ||
| if (has_locking_wrapper(&_PyMem)) { | ||
| *allocator = *(PyMemAllocatorEx *)_PyMem.ctx; | ||
| } | ||
| else { | ||
| *allocator = _PyMem; | ||
| } | ||
| break; | ||
| case PYMEM_DOMAIN_OBJ: | ||
| if (has_locking_wrapper(&_PyObject)) { | ||
| *allocator = *(PyMemAllocatorEx *)_PyObject.ctx; | ||
| } | ||
| else { | ||
| *allocator = _PyObject; | ||
| } | ||
| break; | ||
| default: | ||
| /* unknown domain: set all attributes to NULL */ | ||
| allocator->ctx = NULL; | ||
|
|
@@ -548,15 +569,28 @@ get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) | |
| } | ||
| } | ||
|
|
||
| static void maybe_add_locking_wrapper( | ||
| PyMemAllocatorDomain, PyMemAllocatorEx *, PyMemAllocatorEx *); | ||
|
|
||
| static void | ||
| set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) | ||
| { | ||
| switch(domain) | ||
| { | ||
| case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break; | ||
| case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break; | ||
| case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break; | ||
| /* ignore unknown domain */ | ||
| case PYMEM_DOMAIN_RAW: | ||
| _PyMem_Raw = *allocator; | ||
| break; | ||
| case PYMEM_DOMAIN_MEM: | ||
| _PyMem = *allocator; | ||
| maybe_add_locking_wrapper(domain, &_PyMem, &_PyMem_Wrapped); | ||
| break; | ||
| case PYMEM_DOMAIN_OBJ: | ||
| _PyObject = *allocator; | ||
| maybe_add_locking_wrapper(domain, &_PyObject, &_PyObject_Wrapped); | ||
| break; | ||
| default: | ||
| /* ignore unknown domain */ | ||
| return; | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -627,6 +661,192 @@ PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator) | |
| * around these uses of the runtime-global allocators state. */ | ||
|
|
||
|
|
||
| /************************************/ | ||
| /* locking around custom allocators */ | ||
| /************************************/ | ||
|
|
||
| static inline int | ||
| should_lock(PyInterpreterState *interp) | ||
| { | ||
| return interp->ceval.gil != _PyInterpreterState_Main()->ceval.gil; | ||
| } | ||
|
|
||
| static PyThreadState * | ||
| get_tstate_for_main_gil(void) | ||
| { | ||
| PyThreadState *tstate = _PyRuntimeState_GetFinalizing(&_PyRuntime); | ||
| if (tstate == NULL) { | ||
| /* To use its GIL, we only need the pointer and one field. */ | ||
| static const PyThreadState _main_tstate = { | ||
| .interp = &_PyRuntime._main_interpreter, | ||
| }; | ||
| tstate = (PyThreadState *)&_main_tstate; | ||
| } | ||
| return tstate; | ||
| } | ||
|
|
||
| static inline void | ||
| acquire_custom_allocator_lock(PyThreadState *tstate) | ||
| { | ||
| _PyEval_AcquireLock(tstate); | ||
| } | ||
|
|
||
| static inline void | ||
| release_custom_allocator_lock(PyThreadState *tstate) | ||
| { | ||
| _PyEval_ReleaseLock(tstate->interp, tstate); | ||
| } | ||
|
|
||
| static void * | ||
| _PyMem_MallocLocked(void *ctx, size_t size) | ||
| { | ||
| PyMemAllocatorEx *wrapped = (PyMemAllocatorEx *)ctx; | ||
| if (_PyRuntime.allocators.num_gils > 1) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You lock updates (writes) to this value. Is it safe to use without atomic access or a lock? (requiring an atomic read here would presumably be a performance hit?). Rather than always checking... could the logic that does ... num_gils++;
if (num_gils == 2 && !has_locking_wrapper(...)) {
maybe_add_locking_wrapper(...);
}such that the wrapped pointer switch happens upon first creation of an additional gil while the main runtime gil (the only gil prior to the current code running) is still held. I'm not sure it is worth doing the opposite during finalization. Once wrapped due to per subinterpreter GILs, just stay wrapped. At least the wrappers won't have this conditional anymore. I suspect this thought is either overoptimization... or actually necessary to avoid locking/atomic access to num_gils.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's a great idea. I'll try it out. |
||
| PyThreadState *tstate = _PyThreadState_GET(); | ||
| if (should_lock(tstate->interp)) { | ||
| tstate = get_tstate_for_main_gil(); | ||
| acquire_custom_allocator_lock(tstate); | ||
| void *ptr = wrapped->malloc(wrapped->ctx, size); | ||
| release_custom_allocator_lock(tstate); | ||
| return ptr; | ||
| } | ||
| } | ||
| return wrapped->malloc(wrapped->ctx, size); | ||
| } | ||
|
|
||
| static void * | ||
| _PyMem_CallocLocked(void *ctx, size_t nelem, size_t elsize) | ||
| { | ||
| PyMemAllocatorEx *wrapped = (PyMemAllocatorEx *)ctx; | ||
| if (_PyRuntime.allocators.num_gils > 1) { | ||
| PyThreadState *tstate = _PyThreadState_GET(); | ||
| if (should_lock(tstate->interp)) { | ||
| tstate = get_tstate_for_main_gil(); | ||
| acquire_custom_allocator_lock(tstate); | ||
| void *ptr = wrapped->calloc(wrapped->ctx, nelem, elsize); | ||
| release_custom_allocator_lock(tstate); | ||
| return ptr; | ||
| } | ||
| } | ||
| return wrapped->calloc(wrapped->ctx, nelem, elsize); | ||
| } | ||
|
|
||
| static void * | ||
| _PyMem_ReallocLocked(void *ctx, void *ptr, size_t new_size) | ||
| { | ||
| PyMemAllocatorEx *wrapped = (PyMemAllocatorEx *)ctx; | ||
| if (_PyRuntime.allocators.num_gils > 1) { | ||
| PyThreadState *tstate = _PyThreadState_GET(); | ||
| if (should_lock(tstate->interp)) { | ||
| tstate = get_tstate_for_main_gil(); | ||
| acquire_custom_allocator_lock(tstate); | ||
| ptr = wrapped->realloc(wrapped->ctx, ptr, new_size); | ||
| release_custom_allocator_lock(tstate); | ||
| return ptr; | ||
| } | ||
| } | ||
| return wrapped->realloc(wrapped->ctx, ptr, new_size); | ||
| } | ||
|
|
||
| static void | ||
| _PyMem_FreeLocked(void *ctx, void *ptr) | ||
| { | ||
| PyMemAllocatorEx *wrapped = (PyMemAllocatorEx *)ctx; | ||
| if (_PyRuntime.allocators.num_gils > 1) { | ||
| PyThreadState *tstate = _PyThreadState_GET(); | ||
| if (should_lock(tstate->interp)) { | ||
| tstate = get_tstate_for_main_gil(); | ||
| acquire_custom_allocator_lock(tstate); | ||
| wrapped->free(wrapped->ctx, ptr); | ||
| release_custom_allocator_lock(tstate); | ||
| return; | ||
| } | ||
| } | ||
| wrapped->free(wrapped->ctx, ptr); | ||
| } | ||
|
|
||
| static int | ||
| has_locking_wrapper(PyMemAllocatorEx *allocator) | ||
| { | ||
| if (allocator->ctx == NULL) { | ||
| return 0; | ||
| } | ||
| return (allocator->malloc == _PyMem_MallocLocked | ||
| || allocator->calloc == _PyMem_CallocLocked | ||
| || allocator->realloc == _PyMem_ReallocLocked | ||
| || allocator->free == _PyMem_FreeLocked); | ||
| } | ||
|
|
||
| static void | ||
| maybe_add_locking_wrapper(PyMemAllocatorDomain domain, | ||
| PyMemAllocatorEx *allocator, | ||
| PyMemAllocatorEx *wrapped) | ||
| { | ||
| assert(domain == PYMEM_DOMAIN_MEM || domain == PYMEM_DOMAIN_OBJ); | ||
|
|
||
| *wrapped = (PyMemAllocatorEx){0}; | ||
|
|
||
| if (allocator->malloc == _PyMem_DebugMalloc) { | ||
| /* The debug allocator only wraps an already set allocator, | ||
| * which would have gone through this function already. */ | ||
| return; | ||
| } | ||
|
|
||
| void *ctx = allocator->ctx; | ||
|
|
||
| /* What is the likelihood of reentrancy with the wrapper funcs? | ||
| * For now we assume it is effectively zero. */ | ||
|
|
||
| if (allocator->malloc != _PyMem_RawMalloc | ||
| #ifdef WITH_PYMALLOC | ||
| && allocator->malloc != _PyObject_Malloc | ||
| #endif | ||
| && allocator->malloc != _PyMem_MallocLocked) | ||
| { | ||
| wrapped->ctx = ctx; | ||
| wrapped->malloc = allocator->malloc; | ||
| allocator->ctx = wrapped; | ||
| allocator->malloc = _PyMem_MallocLocked; | ||
| } | ||
|
|
||
| if (allocator->calloc != _PyMem_RawCalloc | ||
| #ifdef WITH_PYMALLOC | ||
| && allocator->calloc != _PyObject_Calloc | ||
| #endif | ||
| && allocator->calloc != _PyMem_CallocLocked) | ||
| { | ||
| wrapped->ctx = ctx; | ||
| wrapped->calloc = allocator->calloc; | ||
| allocator->ctx = wrapped; | ||
| allocator->calloc = _PyMem_CallocLocked; | ||
| } | ||
|
|
||
| if (allocator->realloc != _PyMem_RawRealloc | ||
| #ifdef WITH_PYMALLOC | ||
| && allocator->realloc != _PyObject_Realloc | ||
| #endif | ||
| && allocator->realloc != _PyMem_ReallocLocked) | ||
| { | ||
| wrapped->ctx = ctx; | ||
| wrapped->realloc = allocator->realloc; | ||
| allocator->ctx = wrapped; | ||
| allocator->realloc = _PyMem_ReallocLocked; | ||
| } | ||
|
|
||
| if (allocator->free != _PyMem_RawFree | ||
| #ifdef WITH_PYMALLOC | ||
| && allocator->free != _PyObject_Free | ||
| #endif | ||
| && allocator->free != _PyMem_FreeLocked) | ||
| { | ||
| wrapped->ctx = ctx; | ||
| wrapped->free = allocator->free; | ||
| allocator->ctx = wrapped; | ||
| allocator->free = _PyMem_FreeLocked; | ||
| } | ||
| } | ||
|
|
||
|
|
||
| /*************************/ | ||
| /* the "arena" allocator */ | ||
| /*************************/ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -586,6 +586,7 @@ static PyStatus | |
| init_interp_create_gil(PyThreadState *tstate, int own_gil) | ||
| { | ||
| PyStatus status; | ||
| _PyRuntimeState *runtime = tstate->interp->runtime; | ||
|
|
||
| /* finalize_interp_delete() comment explains why _PyEval_FiniGIL() is | ||
| only called here. */ | ||
|
|
@@ -603,6 +604,9 @@ init_interp_create_gil(PyThreadState *tstate, int own_gil) | |
| if (_PyStatus_EXCEPTION(status)) { | ||
| return status; | ||
| } | ||
| HEAD_LOCK(runtime); | ||
| runtime->allocators.num_gils++; | ||
|
||
| HEAD_UNLOCK(runtime); | ||
|
|
||
| return _PyStatus_OK(); | ||
| } | ||
|
|
@@ -1730,6 +1734,11 @@ finalize_interp_delete(PyInterpreterState *interp) | |
| /* Cleanup auto-thread-state */ | ||
| _PyGILState_Fini(interp); | ||
|
|
||
| _PyRuntimeState *runtime = interp->runtime; | ||
| HEAD_LOCK(runtime); | ||
| runtime->allocators.num_gils--; | ||
|
||
| HEAD_UNLOCK(runtime); | ||
|
|
||
| /* We can't call _PyEval_FiniGIL() here because destroying the GIL lock can | ||
| fail when it is being awaited by another running daemon thread (see | ||
| bpo-9901). Instead pycore_create_interpreter() destroys the previously | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suggest
unsigned int