Skip to content

Commit 4149aea

Browse files
gvanrossumdiegorusso
authored andcommitted
pythongh-117045: Add code object to function version cache (python#117028)
Changes to the function version cache: - In addition to the function object, also store the code object, and allow the latter to be retrieved even if the function has been evicted. - Stop assigning new function versions after a critical attribute (e.g. `__code__`) has been modified; the version is permanently reset to zero in this case. - Changes to `__annotations__` are no longer considered critical. (This fixes pythongh-109998.) Changes to the Tier 2 optimization machinery: - If we cannot map a function version to a function, but it is still mapped to a code object, we continue projecting the trace. The operand of the `_PUSH_FRAME` and `_POP_FRAME` opcodes can be either NULL, a function object, or a code object with the lowest bit set. This allows us to trace through code that calls an ephemeral function, i.e., a function that may not be alive when we are constructing the executor, e.g. a generator expression or certain nested functions. We will lose globals removal inside such functions, but we can still do other peephole operations (and even possibly [call inlining](python#116290), if we decide to do it), which only need the code object. As before, if we cannot retrieve the code object from the cache, we stop projecting.
1 parent d50a5bd commit 4149aea

File tree

8 files changed

+209
-96
lines changed

8 files changed

+209
-96
lines changed

Include/internal/pycore_frame.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ enum _frameowner {
5555
};
5656

5757
typedef struct _PyInterpreterFrame {
58-
PyObject *f_executable; /* Strong reference */
58+
PyObject *f_executable; /* Strong reference (code object or None) */
5959
struct _PyInterpreterFrame *previous;
6060
PyObject *f_funcobj; /* Strong reference. Only valid if not on C stack */
6161
PyObject *f_globals; /* Borrowed reference. Only valid if not on C stack */

Include/internal/pycore_function.h

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,27 @@ extern PyObject* _PyFunction_Vectorcall(
1717
#define FUNC_MAX_WATCHERS 8
1818

1919
#define FUNC_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */
20+
21+
struct _func_version_cache_item {
22+
PyFunctionObject *func;
23+
PyObject *code;
24+
};
25+
2026
struct _py_func_state {
2127
uint32_t next_version;
22-
// Borrowed references to function objects whose
28+
// Borrowed references to function and code objects whose
2329
// func_version % FUNC_VERSION_CACHE_SIZE
2430
// once was equal to the index in the table.
25-
// They are cleared when the function is deallocated.
26-
PyFunctionObject *func_version_cache[FUNC_VERSION_CACHE_SIZE];
31+
// They are cleared when the function or code object is deallocated.
32+
struct _func_version_cache_item func_version_cache[FUNC_VERSION_CACHE_SIZE];
2733
};
2834

2935
extern PyFunctionObject* _PyFunction_FromConstructor(PyFrameConstructor *constr);
3036

3137
extern uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func);
3238
PyAPI_FUNC(void) _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version);
33-
PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version);
39+
void _PyFunction_ClearCodeByVersion(uint32_t version);
40+
PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version, PyObject **p_code);
3441

3542
extern PyObject *_Py_set_function_type_params(
3643
PyThreadState* unused, PyObject *func, PyObject *type_params);

Objects/codeobject.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1710,6 +1710,7 @@ code_dealloc(PyCodeObject *co)
17101710
}
17111711
Py_SET_REFCNT(co, 0);
17121712

1713+
_PyFunction_ClearCodeByVersion(co->co_version);
17131714
if (co->co_extra != NULL) {
17141715
PyInterpreterState *interp = _PyInterpreterState_GET();
17151716
_PyCodeObjectExtra *co_extra = co->co_extra;

Objects/funcobject.c

Lines changed: 90 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -218,92 +218,131 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname
218218
}
219219

220220
/*
221-
Function versions
222-
-----------------
221+
(This is purely internal documentation. There are no public APIs here.)
223222
224-
Function versions are used to detect when a function object has been
225-
updated, invalidating inline cache data used by the `CALL` bytecode
226-
(notably `CALL_PY_EXACT_ARGS` and a few other `CALL` specializations).
223+
Function (and code) versions
224+
----------------------------
227225
228-
They are also used by the Tier 2 superblock creation code to find
229-
the function being called (and from there the code object).
226+
The Tier 1 specializer generates CALL variants that can be invalidated
227+
by changes to critical function attributes:
230228
231-
How does a function's `func_version` field get initialized?
229+
- __code__
230+
- __defaults__
231+
- __kwdefaults__
232+
- __closure__
232233
233-
- `PyFunction_New` and friends initialize it to 0.
234-
- The `MAKE_FUNCTION` instruction sets it from the code's `co_version`.
235-
- It is reset to 0 when various attributes like `__code__` are set.
236-
- A new version is allocated by `_PyFunction_GetVersionForCurrentState`
237-
when the specializer needs a version and the version is 0.
234+
For this purpose function objects have a 32-bit func_version member
235+
that the specializer writes to the specialized instruction's inline
236+
cache and which is checked by a guard on the specialized instructions.
238237
239-
The latter allocates versions using a counter in the interpreter state,
240-
`interp->func_state.next_version`.
241-
When the counter wraps around to 0, no more versions are allocated.
242-
There is one other special case: functions with a non-standard
243-
`vectorcall` field are not given a version.
238+
The MAKE_FUNCTION bytecode sets func_version from the code object's
239+
co_version field. The latter is initialized from a counter in the
240+
interpreter state (interp->func_state.next_version) and never changes.
241+
When this counter overflows, it remains zero and the specializer loses
242+
the ability to specialize calls to new functions.
244243
245-
When the function version is 0, the `CALL` bytecode is not specialized.
244+
The func_version is reset to zero when any of the critical attributes
245+
is modified; after this point the specializer will no longer specialize
246+
calls to this function, and the guard will always fail.
246247
247-
Code object versions
248-
--------------------
248+
The function and code version cache
249+
-----------------------------------
249250
250-
So where to code objects get their `co_version`?
251-
They share the same counter, `interp->func_state.next_version`.
251+
The Tier 2 optimizer now has a problem, since it needs to find the
252+
function and code objects given only the version number from the inline
253+
cache. Our solution is to maintain a cache mapping version numbers to
254+
function and code objects. To limit the cache size we could hash
255+
the version number, but for now we simply use it modulo the table size.
256+
257+
There are some corner cases (e.g. generator expressions) where we will
258+
be unable to find the function object in the cache but we can still
259+
find the code object. For this reason the cache stores both the
260+
function object and the code object.
261+
262+
The cache doesn't contain strong references; cache entries are
263+
invalidated whenever the function or code object is deallocated.
264+
265+
Invariants
266+
----------
267+
268+
These should hold at any time except when one of the cache-mutating
269+
functions is running.
270+
271+
- For any slot s at index i:
272+
- s->func == NULL or s->func->func_version % FUNC_VERSION_CACHE_SIZE == i
273+
- s->code == NULL or s->code->co_version % FUNC_VERSION_CACHE_SIZE == i
274+
if s->func != NULL, then s->func->func_code == s->code
252275
253-
Code objects get a new `co_version` allocated from this counter upon
254-
creation. Since code objects are nominally immutable, `co_version` can
255-
not be invalidated. The only way it can be 0 is when 2**32 or more
256-
code objects have been created during the process's lifetime.
257-
(The counter isn't reset by `fork()`, extending the lifetime.)
258276
*/
259277

260278
void
261279
_PyFunction_SetVersion(PyFunctionObject *func, uint32_t version)
262280
{
263281
PyInterpreterState *interp = _PyInterpreterState_GET();
264282
if (func->func_version != 0) {
265-
PyFunctionObject **slot =
283+
struct _func_version_cache_item *slot =
266284
interp->func_state.func_version_cache
267285
+ (func->func_version % FUNC_VERSION_CACHE_SIZE);
268-
if (*slot == func) {
269-
*slot = NULL;
286+
if (slot->func == func) {
287+
slot->func = NULL;
288+
// Leave slot->code alone, there may be use for it.
270289
}
271290
}
272291
func->func_version = version;
273292
if (version != 0) {
274-
interp->func_state.func_version_cache[
275-
version % FUNC_VERSION_CACHE_SIZE] = func;
293+
struct _func_version_cache_item *slot =
294+
interp->func_state.func_version_cache
295+
+ (version % FUNC_VERSION_CACHE_SIZE);
296+
slot->func = func;
297+
slot->code = func->func_code;
298+
}
299+
}
300+
301+
void
302+
_PyFunction_ClearCodeByVersion(uint32_t version)
303+
{
304+
PyInterpreterState *interp = _PyInterpreterState_GET();
305+
struct _func_version_cache_item *slot =
306+
interp->func_state.func_version_cache
307+
+ (version % FUNC_VERSION_CACHE_SIZE);
308+
if (slot->code) {
309+
assert(PyCode_Check(slot->code));
310+
PyCodeObject *code = (PyCodeObject *)slot->code;
311+
if (code->co_version == version) {
312+
slot->code = NULL;
313+
slot->func = NULL;
314+
}
276315
}
277316
}
278317

279318
PyFunctionObject *
280-
_PyFunction_LookupByVersion(uint32_t version)
319+
_PyFunction_LookupByVersion(uint32_t version, PyObject **p_code)
281320
{
282321
PyInterpreterState *interp = _PyInterpreterState_GET();
283-
PyFunctionObject *func = interp->func_state.func_version_cache[
284-
version % FUNC_VERSION_CACHE_SIZE];
285-
if (func != NULL && func->func_version == version) {
286-
return func;
322+
struct _func_version_cache_item *slot =
323+
interp->func_state.func_version_cache
324+
+ (version % FUNC_VERSION_CACHE_SIZE);
325+
if (slot->code) {
326+
assert(PyCode_Check(slot->code));
327+
PyCodeObject *code = (PyCodeObject *)slot->code;
328+
if (code->co_version == version) {
329+
*p_code = slot->code;
330+
}
331+
}
332+
else {
333+
*p_code = NULL;
334+
}
335+
if (slot->func && slot->func->func_version == version) {
336+
assert(slot->func->func_code == slot->code);
337+
return slot->func;
287338
}
288339
return NULL;
289340
}
290341

291342
uint32_t
292343
_PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
293344
{
294-
if (func->func_version != 0) {
295-
return func->func_version;
296-
}
297-
if (func->vectorcall != _PyFunction_Vectorcall) {
298-
return 0;
299-
}
300-
PyInterpreterState *interp = _PyInterpreterState_GET();
301-
if (interp->func_state.next_version == 0) {
302-
return 0;
303-
}
304-
uint32_t v = interp->func_state.next_version++;
305-
_PyFunction_SetVersion(func, v);
306-
return v;
345+
return func->func_version;
307346
}
308347

309348
PyObject *
@@ -507,7 +546,6 @@ PyFunction_SetAnnotations(PyObject *op, PyObject *annotations)
507546
"non-dict annotations");
508547
return -1;
509548
}
510-
_PyFunction_SetVersion((PyFunctionObject *)op, 0);
511549
Py_XSETREF(((PyFunctionObject *)op)->func_annotations, annotations);
512550
return 0;
513551
}
@@ -731,7 +769,6 @@ func_set_annotations(PyFunctionObject *op, PyObject *value, void *Py_UNUSED(igno
731769
"__annotations__ must be set to a dict object");
732770
return -1;
733771
}
734-
_PyFunction_SetVersion(op, 0);
735772
Py_XSETREF(op->func_annotations, Py_XNewRef(value));
736773
return 0;
737774
}

Python/optimizer.c

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ _PyOptimizer_Optimize(
211211
_PyInterpreterFrame *frame, _Py_CODEUNIT *start,
212212
PyObject **stack_pointer, _PyExecutorObject **executor_ptr)
213213
{
214-
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
214+
PyCodeObject *code = _PyFrame_GetCode(frame);
215215
assert(PyCode_Check(code));
216216
PyInterpreterState *interp = _PyInterpreterState_GET();
217217
if (!has_space_for_executor(code, start)) {
@@ -479,8 +479,9 @@ BRANCH_TO_GUARD[4][2] = {
479479
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); \
480480
goto done; \
481481
} \
482-
assert(func->func_code == (PyObject *)code); \
482+
assert(func == NULL || func->func_code == (PyObject *)code); \
483483
trace_stack[trace_stack_depth].func = func; \
484+
trace_stack[trace_stack_depth].code = code; \
484485
trace_stack[trace_stack_depth].instr = instr; \
485486
trace_stack_depth++;
486487
#define TRACE_STACK_POP() \
@@ -489,7 +490,8 @@ BRANCH_TO_GUARD[4][2] = {
489490
} \
490491
trace_stack_depth--; \
491492
func = trace_stack[trace_stack_depth].func; \
492-
code = (PyCodeObject *)trace_stack[trace_stack_depth].func->func_code; \
493+
code = trace_stack[trace_stack_depth].code; \
494+
assert(func == NULL || func->func_code == (PyObject *)code); \
493495
instr = trace_stack[trace_stack_depth].instr;
494496

495497
/* Returns 1 on success,
@@ -505,7 +507,7 @@ translate_bytecode_to_trace(
505507
_PyBloomFilter *dependencies)
506508
{
507509
bool progress_needed = true;
508-
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
510+
PyCodeObject *code = _PyFrame_GetCode(frame);
509511
PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj;
510512
assert(PyFunction_Check(func));
511513
PyCodeObject *initial_code = code;
@@ -515,6 +517,7 @@ translate_bytecode_to_trace(
515517
int max_length = buffer_size;
516518
struct {
517519
PyFunctionObject *func;
520+
PyCodeObject *code;
518521
_Py_CODEUNIT *instr;
519522
} trace_stack[TRACE_STACK_SIZE];
520523
int trace_stack_depth = 0;
@@ -719,9 +722,19 @@ translate_bytecode_to_trace(
719722

720723
if (uop == _POP_FRAME) {
721724
TRACE_STACK_POP();
722-
/* Set the operand to the function object returned to,
723-
* to assist optimization passes */
724-
ADD_TO_TRACE(uop, oparg, (uintptr_t)func, target);
725+
/* Set the operand to the function or code object returned to,
726+
* to assist optimization passes. (See _PUSH_FRAME below.)
727+
*/
728+
if (func != NULL) {
729+
operand = (uintptr_t)func;
730+
}
731+
else if (code != NULL) {
732+
operand = (uintptr_t)code | 1;
733+
}
734+
else {
735+
operand = 0;
736+
}
737+
ADD_TO_TRACE(uop, oparg, operand, target);
725738
DPRINTF(2,
726739
"Returning to %s (%s:%d) at byte offset %d\n",
727740
PyUnicode_AsUTF8(code->co_qualname),
@@ -738,10 +751,12 @@ translate_bytecode_to_trace(
738751
// Add one to account for the actual opcode/oparg pair:
739752
+ 1;
740753
uint32_t func_version = read_u32(&instr[func_version_offset].cache);
741-
PyFunctionObject *new_func = _PyFunction_LookupByVersion(func_version);
742-
DPRINTF(2, "Function: version=%#x; object=%p\n", (int)func_version, new_func);
743-
if (new_func != NULL) {
744-
PyCodeObject *new_code = (PyCodeObject *)PyFunction_GET_CODE(new_func);
754+
PyCodeObject *new_code = NULL;
755+
PyFunctionObject *new_func =
756+
_PyFunction_LookupByVersion(func_version, (PyObject **) &new_code);
757+
DPRINTF(2, "Function: version=%#x; new_func=%p, new_code=%p\n",
758+
(int)func_version, new_func, new_code);
759+
if (new_code != NULL) {
745760
if (new_code == code) {
746761
// Recursive call, bail (we could be here forever).
747762
DPRINTF(2, "Bailing on recursive call to %s (%s:%d)\n",
@@ -766,9 +781,22 @@ translate_bytecode_to_trace(
766781
instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1;
767782
TRACE_STACK_PUSH();
768783
_Py_BloomFilter_Add(dependencies, new_code);
769-
/* Set the operand to the callee's function object,
770-
* to assist optimization passes */
771-
ADD_TO_TRACE(uop, oparg, (uintptr_t)new_func, target);
784+
/* Set the operand to the callee's function or code object,
785+
* to assist optimization passes.
786+
* We prefer setting it to the function (for remove_globals())
787+
* but if that's not available but the code is available,
788+
* use the code, setting the low bit so the optimizer knows.
789+
*/
790+
if (new_func != NULL) {
791+
operand = (uintptr_t)new_func;
792+
}
793+
else if (new_code != NULL) {
794+
operand = (uintptr_t)new_code | 1;
795+
}
796+
else {
797+
operand = 0;
798+
}
799+
ADD_TO_TRACE(uop, oparg, operand, target);
772800
code = new_code;
773801
func = new_func;
774802
instr = _PyCode_CODE(code);
@@ -780,8 +808,8 @@ translate_bytecode_to_trace(
780808
2 * INSTR_IP(instr, code));
781809
goto top;
782810
}
783-
DPRINTF(2, "Bail, new_func == NULL\n");
784-
ADD_TO_TRACE(uop, oparg, operand, target);
811+
DPRINTF(2, "Bail, new_code == NULL\n");
812+
ADD_TO_TRACE(uop, oparg, 0, target);
785813
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
786814
goto done;
787815
}
@@ -1116,7 +1144,7 @@ counter_optimize(
11161144
int Py_UNUSED(curr_stackentries)
11171145
)
11181146
{
1119-
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
1147+
PyCodeObject *code = _PyFrame_GetCode(frame);
11201148
int oparg = instr->op.arg;
11211149
while (instr->op.code == EXTENDED_ARG) {
11221150
instr++;

0 commit comments

Comments
 (0)