Skip to content

Commit 4450445

Browse files
committed
pymem: add _PyMem_FreeQsbr
1 parent 74df778 commit 4450445

File tree

8 files changed

+189
-1
lines changed

8 files changed

+189
-1
lines changed

Include/Python.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
#include "cpython/cellobject.h"
7777
#include "iterobject.h"
7878
#include "cpython/initconfig.h"
79+
#include "cpython/pyqueue.h"
7980
#include "pystate.h"
8081
#include "cpython/genobject.h"
8182
#include "descrobject.h"

Include/cpython/pystate.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,10 @@ struct _ts {
211211
_PyStackChunk *datastack_chunk;
212212
PyObject **datastack_top;
213213
PyObject **datastack_limit;
214+
215+
/* Queue of data pointers to be freed */
216+
struct _Py_queue_head/*<_PyMemWork>*/ mem_work;
217+
214218
/* XXX signal handlers should also be here */
215219

216220
/* The following fields are here to avoid allocation during init.

Include/internal/pycore_interp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ extern "C" {
2424
#include "pycore_list.h" // struct _Py_list_state
2525
#include "pycore_llist.h" // struct llist_node
2626
#include "pycore_global_objects.h" // struct _Py_interp_static_objects
27+
#include "pycore_pymem.h" // struct _mem_work
2728
#include "pycore_tuple.h" // struct _Py_tuple_state
2829
#include "pycore_typeobject.h" // struct type_cache
2930
#include "pycore_unicodeobject.h" // struct _Py_unicode_state
@@ -118,6 +119,7 @@ struct _is {
118119

119120
struct _ceval_state ceval;
120121
struct _gc_runtime_state gc;
122+
struct _mem_state mem;
121123

122124
// sys.modules dictionary
123125
PyObject *modules;

Include/internal/pycore_pymem.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ struct _pymem_allocators {
3333
PyObjectArenaAllocator obj_arena;
3434
};
3535

36+
struct _mem_state {
37+
_PyMutex mutex;
38+
/* Queue of data pointers to be freed from dead threads */
39+
struct _Py_queue_head/*<_PyMemWork>*/ work;
40+
int nonempty;
41+
};
42+
3643

3744
/* Set the memory allocator of the specified domain to the default.
3845
Save the old allocator into *old_alloc if it's non-NULL.
@@ -92,6 +99,12 @@ PyAPI_FUNC(int) _PyMem_GetAllocatorName(
9299
PYMEM_ALLOCATOR_NOT_SET does nothing. */
93100
PyAPI_FUNC(int) _PyMem_SetupAllocators(PyMemAllocatorName allocator);
94101

102+
/* Free the pointer after all threads are quiescent. */
103+
extern void _PyMem_FreeQsbr(void *ptr);
104+
extern void _PyMem_QsbrPoll(PyThreadState *tstate);
105+
extern void _PyMem_AbandonQsbr(PyThreadState *tstate);
106+
extern void _PyMem_QsbrFini(PyInterpreterState *interp);
107+
95108
extern void * _PyMem_DefaultRawMalloc(size_t);
96109
extern void * _PyMem_DefaultRawCalloc(size_t, size_t);
97110
extern void * _PyMem_DefaultRawRealloc(void *, size_t);

Include/internal/pycore_qsbr.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ struct qsbr {
1313
uint64_t t_seq;
1414
struct qsbr_shared *t_shared;
1515
struct qsbr *t_next;
16+
int t_deferred;
17+
int t_limit;
1618
PyThreadState *tstate;
1719
};
1820

@@ -46,6 +48,9 @@ _Py_qsbr_init(struct qsbr_shared *shared);
4648
uint64_t
4749
_Py_qsbr_advance(struct qsbr_shared *shared);
4850

51+
uint64_t
52+
_Py_qsbr_deferred_advance(struct qsbr *qsbr);
53+
4954
bool
5055
_Py_qsbr_poll(struct qsbr *qsbr, uint64_t goal);
5156

Objects/obmalloc.c

Lines changed: 146 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
#include "Python.h"
22
#include "pycore_code.h" // stats
33
#include "pycore_pystate.h" // _PyInterpreterState_GET
4-
54
#include "pycore_obmalloc.h"
65
#include "pycore_pymem.h"
76
#include "pycore_pymem_init.h"
7+
#include "pycore_pyqueue.h"
8+
#include "pycore_qsbr.h"
89

910
#include <stdlib.h> // malloc()
1011
#include <stdbool.h>
@@ -675,6 +676,150 @@ PyMem_Free(void *ptr)
675676
}
676677

677678

679+
typedef struct {
680+
void *ptr;
681+
uint64_t seq;
682+
} _PyMem_WorkItem;
683+
684+
#define PY_MEM_WORK_ITEMS 127
685+
686+
typedef struct _PyMemWork {
687+
struct _Py_queue_node node;
688+
unsigned int first;
689+
unsigned int size;
690+
_PyMem_WorkItem items[PY_MEM_WORK_ITEMS];
691+
} _PyMem_WorkBuf;
692+
693+
void
694+
_PyMem_FreeQsbr(void *ptr)
695+
{
696+
PyThreadState *tstate = _PyThreadState_GET();
697+
698+
// Try to get an non-full workbuf
699+
_PyMem_WorkBuf *work = NULL;
700+
if (!_Py_queue_is_empty(&tstate->mem_work)) {
701+
work = _Py_queue_last(&tstate->mem_work, _PyMem_WorkBuf, node);
702+
if (work->size == PY_MEM_WORK_ITEMS) {
703+
work = NULL;
704+
}
705+
}
706+
707+
if (work == NULL) {
708+
work = PyMem_RawMalloc(sizeof(_PyMem_WorkBuf));
709+
if (work == NULL) {
710+
Py_FatalError("out of memory (in _PyMem_FreeQsbr)");
711+
}
712+
work->first = work->size = 0;
713+
_Py_queue_enqeue(&tstate->mem_work, &work->node);
714+
}
715+
716+
PyThreadStateImpl *tstate_impl = (PyThreadStateImpl *)tstate;
717+
work->items[work->size].ptr = ptr;
718+
work->items[work->size].seq = _Py_qsbr_deferred_advance(tstate_impl->qsbr);
719+
work->size++;
720+
721+
if (work->size == PY_MEM_WORK_ITEMS) {
722+
// Now seems like a good time to check for any memory that can be freed.
723+
_PyMem_QsbrPoll(tstate);
724+
}
725+
}
726+
727+
static int
728+
_PyMem_ProcessQueue(struct _Py_queue_head *queue, struct qsbr *qsbr, bool keep_empty)
729+
{
730+
while (!_Py_queue_is_empty(queue)) {
731+
_PyMem_WorkBuf *work = _Py_queue_first(queue, _PyMem_WorkBuf, node);
732+
if (work->size == 0 && keep_empty) {
733+
return 0;
734+
}
735+
while (work->first < work->size) {
736+
_PyMem_WorkItem *item = &work->items[work->first];
737+
if (!_Py_qsbr_poll(qsbr, item->seq)) {
738+
return 1;
739+
}
740+
PyMem_Free(item->ptr);
741+
work->first++;
742+
}
743+
744+
// Remove the empty work buffer
745+
_Py_queue_dequeue(queue);
746+
747+
// If the queue doesn't have an empty work buffer, stick this
748+
// one at the end of the queue. Otherwise, free it.
749+
if (keep_empty && _Py_queue_is_empty(queue)) {
750+
work->first = work->size = 0;
751+
_Py_queue_enqeue(queue, &work->node);
752+
return 0;
753+
}
754+
else if (keep_empty && _Py_queue_last(queue, _PyMem_WorkBuf, node)->size == 0) {
755+
work->first = work->size = 0;
756+
_Py_queue_enqeue(queue, &work->node);
757+
}
758+
else {
759+
PyMem_RawFree(work);
760+
}
761+
}
762+
return 0;
763+
}
764+
765+
void
766+
_PyMem_QsbrPoll(PyThreadState *tstate)
767+
{
768+
struct qsbr *qsbr = ((PyThreadStateImpl *)tstate)->qsbr;
769+
770+
// Process any work on the thread-local queue.
771+
_PyMem_ProcessQueue(&tstate->mem_work, qsbr, true);
772+
773+
// Process any work on the interpreter queue if we can get the lock.
774+
PyInterpreterState *interp = tstate->interp;
775+
if (_Py_atomic_load_int_relaxed(&interp->mem.nonempty) &&
776+
_PyMutex_TryLock(&interp->mem.mutex)) {
777+
int more = _PyMem_ProcessQueue(&interp->mem.work, qsbr, false);
778+
_Py_atomic_store_int_relaxed(&interp->mem.nonempty, more);
779+
_PyMutex_unlock(&interp->mem.mutex);
780+
}
781+
}
782+
783+
void
784+
_PyMem_QsbrFini(PyInterpreterState *interp)
785+
{
786+
struct _Py_queue_head *queue = &interp->mem.work;
787+
while (!_Py_queue_is_empty(queue)) {
788+
_PyMem_WorkBuf *work = _Py_queue_first(queue, _PyMem_WorkBuf, node);
789+
while (work->first < work->size) {
790+
_PyMem_WorkItem *item = &work->items[work->first];
791+
PyMem_Free(item->ptr);
792+
work->first++;
793+
}
794+
_Py_queue_dequeue(queue);
795+
PyMem_RawFree(work);
796+
}
797+
interp->mem.nonempty = 0;
798+
}
799+
800+
void
801+
_PyMem_AbandonQsbr(PyThreadState *tstate)
802+
{
803+
PyInterpreterState *interp = tstate->interp;
804+
805+
while (!_Py_queue_is_empty(&tstate->mem_work)) {
806+
struct _Py_queue_node *node = _Py_queue_dequeue(&tstate->mem_work);
807+
if (node == NULL) {
808+
break;
809+
}
810+
_PyMem_WorkBuf *work = _Py_queue_data(node, _PyMem_WorkBuf, node);
811+
if (work->first == work->size) {
812+
PyMem_RawFree(work);
813+
}
814+
else {
815+
_PyMutex_lock(&interp->mem.mutex);
816+
_Py_queue_enqeue(&interp->mem.work, node);
817+
_Py_atomic_store_int_relaxed(&interp->mem.nonempty, 1);
818+
_PyMutex_unlock(&interp->mem.mutex);
819+
}
820+
}
821+
}
822+
678823
wchar_t*
679824
_PyMem_RawWcsdup(const wchar_t *str)
680825
{

Python/pystate.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "pycore_pyerrors.h"
1313
#include "pycore_pylifecycle.h"
1414
#include "pycore_pymem.h" // _PyMem_DefaultRawFree()
15+
#include "pycore_pyqueue.h" // _Py_queue_init
1516
#include "pycore_pystate.h" // _PyThreadState_GET()
1617
#include "pycore_qsbr.h"
1718
#include "pycore_runtime_init.h" // _PyRuntimeState_INIT
@@ -464,6 +465,7 @@ init_interpreter(PyInterpreterState *interp,
464465
_PyGC_InitState(&interp->gc);
465466
PyConfig_InitPythonConfig(&interp->config);
466467
_PyType_InitCache(interp);
468+
_Py_queue_init(&interp->mem.work);
467469

468470
interp->_initialized = 1;
469471
}
@@ -598,6 +600,9 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
598600
_PyGC_CollectNoFail(tstate);
599601
_PyGC_Fini(interp);
600602

603+
/* Perform any delayed PyMem_Free calls */
604+
_PyMem_QsbrFini(interp);
605+
601606
/* We don't clear sysdict and builtins until the end of this function.
602607
Because clearing other attributes can execute arbitrary Python code
603608
which requires sysdict and builtins. */
@@ -1051,6 +1056,7 @@ init_threadstate(PyThreadState *tstate,
10511056
tstate->daemon = (id > 1);
10521057
tstate->done_event = done_event;
10531058
_PyEventRc_Incref(done_event);
1059+
_Py_queue_init(&tstate->mem_work);
10541060

10551061
if (_PyRuntime.stop_the_world_requested) {
10561062
tstate->status = _Py_THREAD_GC;
@@ -1339,6 +1345,7 @@ PyThreadState_Clear(PyThreadState *tstate)
13391345
}
13401346

13411347
_Py_queue_destroy(tstate);
1348+
_PyMem_AbandonQsbr(tstate);
13421349

13431350
/* Don't clear tstate->pyframe: it is a borrowed reference */
13441351

Python/qsbr.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ _Py_qsbr_alloc(struct qsbr_shared *shared)
5858
}
5959
memset(qsbr, 0, sizeof(*qsbr));
6060
qsbr->t_shared = shared;
61+
qsbr->t_limit = 32;
6162
return qsbr;
6263
}
6364

@@ -95,6 +96,16 @@ _Py_qsbr_advance(struct qsbr_shared *shared)
9596
return _Py_atomic_add_uint64(&shared->s_wr, QSBR_INCR) + QSBR_INCR;
9697
}
9798

99+
uint64_t
100+
_Py_qsbr_deferred_advance(struct qsbr *qsbr)
101+
{
102+
if (++qsbr->t_deferred < qsbr->t_limit) {
103+
return _Py_qsbr_shared_current(qsbr->t_shared) + QSBR_INCR;
104+
}
105+
qsbr->t_deferred = 0;
106+
return _Py_qsbr_advance(qsbr->t_shared);
107+
}
108+
98109
uint64_t
99110
_Py_qsbr_poll_scan(struct qsbr_shared *shared)
100111
{

0 commit comments

Comments
 (0)