From eb895df9cd4522b0a43df3e1c425c12cf56f2daf Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 16 Jul 2024 14:10:20 -0700 Subject: [PATCH 1/2] Invalidate side exits, too --- Include/internal/pycore_optimizer.h | 2 +- Include/internal/pycore_uop_metadata.h | 2 +- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 5 +- Python/optimizer.c | 70 +++++++++++++++----------- 5 files changed, 46 insertions(+), 35 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index bcbb8b73706359..5a0e0da98adf71 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -62,7 +62,7 @@ typedef struct { typedef struct { uint32_t target; _Py_BackoffCounter temperature; - const struct _PyExecutorObject *executor; + struct _PyExecutorObject *executor; } _PyExitData; typedef struct _PyExecutorObject { diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 2a2d6e923b7617..3d8e78f330732c 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -254,7 +254,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_DYNAMIC_EXIT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, - [_START_EXECUTOR] = HAS_DEOPT_FLAG, + [_START_EXECUTOR] = 0, [_FATAL_ERROR] = 0, [_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG, [_DEOPT] = 0, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 84241c64ffae88..ecc4442c798a5b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4748,7 +4748,7 @@ dummy_func( #ifndef _Py_JIT current_executor = (_PyExecutorObject*)executor; #endif - DEOPT_IF(!((_PyExecutorObject *)executor)->vm_data.valid); + assert(((_PyExecutorObject *)executor)->vm_data.valid); } tier2 op(_FATAL_ERROR, (--)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 8f6bc75b528d9b..12218fc776e72e 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5004,10 +5004,7 @@ #ifndef _Py_JIT current_executor = (_PyExecutorObject*)executor; #endif - if (!((_PyExecutorObject *)executor)->vm_data.valid) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } + assert(((_PyExecutorObject *)executor)->vm_data.valid); break; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 561ec4efa4ee2a..db0bd984cb48a7 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1581,42 +1581,56 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is _Py_BloomFilter_Add(&obj_filter, obj); /* Walk the list of executors */ /* TO DO -- Use a tree to avoid traversing as many objects */ - bool no_memory = false; - PyObject *invalidate = PyList_New(0); - if (invalidate == NULL) { - PyErr_Clear(); - no_memory = true; - } /* Clearing an executor can deallocate others, so we need to make a list of * executors to invalidate first */ - for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { + PyObject *refs = PyList_New(0); + if (refs == NULL) { + goto error; + } + // First, invalidate all executors that depend on the object and remove them + // from the linked list: + for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL; + exec = exec->vm_data.links.next) + { assert(exec->vm_data.valid); - _PyExecutorObject *next = exec->vm_data.links.next; - if (bloom_filter_may_contain(&exec->vm_data.bloom, &obj_filter)) { - unlink_executor(exec); - if (no_memory) { - exec->vm_data.valid = 0; - } else { - if (PyList_Append(invalidate, (PyObject *)exec) < 0) { - PyErr_Clear(); - no_memory = true; - exec->vm_data.valid = 0; - } - } - if (is_invalidation) { - OPT_STAT_INC(executors_invalidated); - } + if (!bloom_filter_may_contain(&exec->vm_data.bloom, &obj_filter)) { + continue; + } + if (PyList_Append(refs, (PyObject *)exec)) { + goto error; } - exec = next; } - if (invalidate != NULL) { - for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) { - _PyExecutorObject *exec = (_PyExecutorObject *)PyList_GET_ITEM(invalidate, i); - executor_clear(exec); + for (Py_ssize_t i = 0; i < PyList_GET_SIZE(refs); i++) { + executor_clear((_PyExecutorObject *)PyList_GET_ITEM(refs, i)); + if (is_invalidation) { + OPT_STAT_INC(executors_invalidated); + } + } + // Then, remove any newly-invalidated executors from side exits: + for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL; + exec = exec->vm_data.links.next) + { + assert(exec->vm_data.valid); + for (uint32_t i = 0; i < exec->exit_count; i++) { + _PyExitData *exit = &exec->exits[i]; + _PyExecutorObject *exit_executor = exit->executor; + if (exit_executor == NULL || exit_executor->vm_data.valid) { + continue; + } + exit->temperature = initial_temperature_backoff_counter(); + exit->executor = NULL; + if (_PyList_AppendTakeRef((PyListObject *)refs, (PyObject *)exit_executor)) { + goto error; + } } - Py_DECREF(invalidate); } + Py_DECREF(refs); return; +error: + PyErr_Clear(); + Py_XDECREF(refs); + // If we're truly out of memory, wiping out everything is a fine fallback: + _Py_Executors_InvalidateAll(interp, is_invalidation); } /* Invalidate all executors */ From 8e00533e4241b7f4efc7ac6a65674dc4e9fd44af Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 18 Jul 2024 16:00:15 -0700 Subject: [PATCH 2/2] Remove invalidated side exits lazily --- Include/internal/pycore_optimizer.h | 2 +- Python/bytecodes.c | 4 +++ Python/executor_cases.c.h | 4 +++ Python/optimizer.c | 50 +++++++++-------------------- 4 files changed, 24 insertions(+), 36 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 5a0e0da98adf71..bcbb8b73706359 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -62,7 +62,7 @@ typedef struct { typedef struct { uint32_t target; _Py_BackoffCounter temperature; - struct _PyExecutorObject *executor; + const struct _PyExecutorObject *executor; } _PyExitData; typedef struct _PyExecutorObject { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 644425290f90b4..9dd7cf37beecf0 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4624,6 +4624,10 @@ dummy_func( _PyOpcode_OpName[target->op.code]); } #endif + if (exit->executor && !exit->executor->vm_data.valid) { + exit->temperature = initial_temperature_backoff_counter(); + Py_CLEAR(exit->executor); + } if (exit->executor == NULL) { _Py_BackoffCounter temperature = exit->temperature; if (!backoff_counter_triggers(temperature)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 0d77cde552567c..2a4428e4a52cf0 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4986,6 +4986,10 @@ _PyOpcode_OpName[target->op.code]); } #endif + if (exit->executor && !exit->executor->vm_data.valid) { + exit->temperature = initial_temperature_backoff_counter(); + Py_CLEAR(exit->executor); + } if (exit->executor == NULL) { _Py_BackoffCounter temperature = exit->temperature; if (!backoff_counter_triggers(temperature)) { diff --git a/Python/optimizer.c b/Python/optimizer.c index f0e65db7dca6e9..73316b3587f221 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1587,54 +1587,34 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is _Py_BloomFilter_Add(&obj_filter, obj); /* Walk the list of executors */ /* TO DO -- Use a tree to avoid traversing as many objects */ - /* Clearing an executor can deallocate others, so we need to make a list of - * executors to invalidate first */ - PyObject *refs = PyList_New(0); - if (refs == NULL) { + PyObject *invalidate = PyList_New(0); + if (invalidate == NULL) { goto error; } - // First, invalidate all executors that depend on the object and remove them - // from the linked list: - for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL; - exec = exec->vm_data.links.next) - { + /* Clearing an executor can deallocate others, so we need to make a list of + * executors to invalidate first */ + for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { assert(exec->vm_data.valid); - if (!bloom_filter_may_contain(&exec->vm_data.bloom, &obj_filter)) { - continue; - } - if (PyList_Append(refs, (PyObject *)exec)) { + _PyExecutorObject *next = exec->vm_data.links.next; + if (bloom_filter_may_contain(&exec->vm_data.bloom, &obj_filter) && + PyList_Append(invalidate, (PyObject *)exec)) + { goto error; } + exec = next; } - for (Py_ssize_t i = 0; i < PyList_GET_SIZE(refs); i++) { - executor_clear((_PyExecutorObject *)PyList_GET_ITEM(refs, i)); + for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) { + _PyExecutorObject *exec = (_PyExecutorObject *)PyList_GET_ITEM(invalidate, i); + executor_clear(exec); if (is_invalidation) { OPT_STAT_INC(executors_invalidated); } } - // Then, remove any newly-invalidated executors from side exits: - for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL; - exec = exec->vm_data.links.next) - { - assert(exec->vm_data.valid); - for (uint32_t i = 0; i < exec->exit_count; i++) { - _PyExitData *exit = &exec->exits[i]; - _PyExecutorObject *exit_executor = exit->executor; - if (exit_executor == NULL || exit_executor->vm_data.valid) { - continue; - } - exit->temperature = initial_temperature_backoff_counter(); - exit->executor = NULL; - if (_PyList_AppendTakeRef((PyListObject *)refs, (PyObject *)exit_executor)) { - goto error; - } - } - } - Py_DECREF(refs); + Py_DECREF(invalidate); return; error: PyErr_Clear(); - Py_XDECREF(refs); + Py_XDECREF(invalidate); // If we're truly out of memory, wiping out everything is a fine fallback: _Py_Executors_InvalidateAll(interp, is_invalidation); }