From f2dbcf9528e83ef322083ed03b01b95ec75b1fa9 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 10 Feb 2023 11:41:52 +0000 Subject: [PATCH 1/6] Leave receiver on stack when exiting SEND loop. --- Lib/importlib/_bootstrap_external.py | 3 ++- Lib/test/test_dis.py | 21 +++++++++++++-------- Objects/frameobject.c | 4 ++-- Python/bytecodes.c | 8 +++----- Python/compile.c | 2 ++ Python/generated_cases.c.h | 10 ++++------ Python/opcode_metadata.h | 4 ++-- 7 files changed, 28 insertions(+), 24 deletions(-) diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 933c8c7d7e0590..38d4a384c2cc95 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -432,6 +432,7 @@ def _write_atomic(path, data, mode=0o666): # Python 3.12a5 3516 (Add COMPARE_AND_BRANCH instruction) # Python 3.12a5 3517 (Change YIELD_VALUE oparg to exception block depth) # Python 3.12a5 3518 (Add RETURN_CONST instruction) +# Python 3.12a5 3519 (Modify SEND instruction) # Python 3.13 will start with 3550 @@ -444,7 +445,7 @@ def _write_atomic(path, data, mode=0o666): # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array # in PC/launcher.c must also be updated. -MAGIC_NUMBER = (3518).to_bytes(2, 'little') + b'\r\n' +MAGIC_NUMBER = (3519).to_bytes(2, 'little') + b'\r\n' _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 1050b15e16eaaa..eba03ede8559b0 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -479,7 +479,9 @@ async def _asyncwith(c): YIELD_VALUE 2 RESUME 3 JUMP_BACKWARD_NO_INTERRUPT 4 (to 14) - >> POP_TOP + >> SWAP 2 + POP_TOP + POP_TOP %3d LOAD_CONST 1 (1) STORE_FAST 1 (x) @@ -490,30 +492,33 @@ async def _asyncwith(c): CALL 2 GET_AWAITABLE 2 LOAD_CONST 0 (None) - >> SEND 3 (to 56) + >> SEND 3 (to 60) YIELD_VALUE 2 RESUME 3 - JUMP_BACKWARD_NO_INTERRUPT 4 (to 48) + JUMP_BACKWARD_NO_INTERRUPT 4 (to 52) >> POP_TOP + POP_TOP %3d LOAD_CONST 2 (2) STORE_FAST 2 (y) RETURN_CONST 0 (None) %3d >> CLEANUP_THROW - JUMP_BACKWARD 23 (to 22) + JUMP_BACKWARD 26 (to 22) >> CLEANUP_THROW - JUMP_BACKWARD 8 (to 56) + JUMP_BACKWARD 9 (to 60) >> PUSH_EXC_INFO WITH_EXCEPT_START GET_AWAITABLE 2 LOAD_CONST 0 (None) - >> SEND 4 (to 90) + >> SEND 4 (to 96) YIELD_VALUE 3 RESUME 3 - JUMP_BACKWARD_NO_INTERRUPT 4 (to 80) + JUMP_BACKWARD_NO_INTERRUPT 4 (to 86) >> CLEANUP_THROW - >> POP_JUMP_IF_TRUE 1 (to 94) + >> SWAP 2 + POP_TOP + POP_JUMP_IF_TRUE 1 (to 104) RERAISE 2 >> POP_TOP POP_EXCEPT diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 0e52a3e2399c06..8e5846aa9924c6 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -336,8 +336,8 @@ mark_stacks(PyCodeObject *code_obj, int len) case SEND: j = get_arg(code, i) + i + 1; assert(j < len); - assert(stacks[j] == UNINITIALIZED || stacks[j] == pop_value(next_stack)); - stacks[j] = pop_value(next_stack); + assert(stacks[j] == UNINITIALIZED || stacks[j] == next_stack); + stacks[j] = next_stack; stacks[i+1] = next_stack; break; case JUMP_FORWARD: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 2b9f12fefa14e9..f6d872b47da073 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -680,9 +680,8 @@ dummy_func( PREDICT(LOAD_CONST); } - inst(SEND, (receiver, v -- receiver if (!jump), retval)) { + inst(SEND, (receiver, v -- receiver, retval)) { assert(frame != &entry_frame); - bool jump = false; PySendResult gen_status; if (tstate->c_tracefunc == NULL) { gen_status = PyIter_Send(receiver, v, &retval); @@ -715,9 +714,7 @@ dummy_func( Py_DECREF(v); if (gen_status == PYGEN_RETURN) { assert(retval != NULL); - Py_DECREF(receiver); JUMPBY(oparg); - jump = true; } else { assert(gen_status == PYGEN_NEXT); @@ -796,12 +793,13 @@ dummy_func( } } - inst(CLEANUP_THROW, (sub_iter, last_sent_val, exc_value -- value)) { + inst(CLEANUP_THROW, (sub_iter, last_sent_val, exc_value -- none, value)) { assert(throwflag); assert(exc_value && PyExceptionInstance_Check(exc_value)); if (PyErr_GivenExceptionMatches(exc_value, PyExc_StopIteration)) { value = Py_NewRef(((PyStopIterationObject *)exc_value)->value); DECREF_INPUTS(); + none = Py_NewRef(Py_None); } else { _PyErr_SetRaisedException(tstate, Py_NewRef(exc_value)); diff --git a/Python/compile.c b/Python/compile.c index a3c915c3c14a96..b49eda314eeef1 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -1789,6 +1789,8 @@ compiler_add_yield_from(struct compiler *c, location loc, int await) ADDOP(c, loc, CLEANUP_THROW); USE_LABEL(c, exit); + ADDOP_I(c, loc, SWAP, 2); + ADDOP(c, loc, POP_TOP); return SUCCESS; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index a224d4eb892785..656bfbf552faa0 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -886,7 +886,6 @@ PyObject *receiver = PEEK(2); PyObject *retval; assert(frame != &entry_frame); - bool jump = false; PySendResult gen_status; if (tstate->c_tracefunc == NULL) { gen_status = PyIter_Send(receiver, v, &retval); @@ -919,16 +918,12 @@ Py_DECREF(v); if (gen_status == PYGEN_RETURN) { assert(retval != NULL); - Py_DECREF(receiver); JUMPBY(oparg); - jump = true; } else { assert(gen_status == PYGEN_NEXT); assert(retval != NULL); } - STACK_SHRINK(1); - STACK_GROW(((!jump) ? 1 : 0)); POKE(1, retval); DISPATCH(); } @@ -1026,6 +1021,7 @@ PyObject *exc_value = PEEK(1); PyObject *last_sent_val = PEEK(2); PyObject *sub_iter = PEEK(3); + PyObject *none; PyObject *value; assert(throwflag); assert(exc_value && PyExceptionInstance_Check(exc_value)); @@ -1034,13 +1030,15 @@ Py_DECREF(sub_iter); Py_DECREF(last_sent_val); Py_DECREF(exc_value); + none = Py_NewRef(Py_None); } else { _PyErr_SetRaisedException(tstate, Py_NewRef(exc_value)); goto exception_unwind; } - STACK_SHRINK(2); + STACK_SHRINK(1); POKE(1, value); + POKE(2, none); DISPATCH(); } diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h index db1dfd37a90132..b037c9a9ec4bcf 100644 --- a/Python/opcode_metadata.h +++ b/Python/opcode_metadata.h @@ -453,7 +453,7 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { case GET_AWAITABLE: return 1; case SEND: - return ((!jump) ? 1 : 0) + 1; + return 2; case YIELD_VALUE: return 1; case POP_EXCEPT: @@ -465,7 +465,7 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { case END_ASYNC_FOR: return 0; case CLEANUP_THROW: - return 1; + return 2; case LOAD_ASSERTION_ERROR: return 1; case LOAD_BUILD_CLASS: From 685d559bfc6efb9e360fffdf7b4775855c6fc95e Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 10 Feb 2023 13:42:11 +0000 Subject: [PATCH 2/6] Specialize SEND for generators and coroutines. --- Include/internal/pycore_code.h | 7 +++ Include/internal/pycore_opcode.h | 5 +- Include/opcode.h | 1 + Lib/dis.py | 9 +-- Lib/importlib/_bootstrap_external.py | 2 +- Lib/opcode.py | 8 ++- Lib/test/test_dis.py | 18 +++--- Objects/frameobject.c | 2 +- Python/bytecodes.c | 82 +++++++++++++++++----------- Python/generated_cases.c.h | 79 ++++++++++++++++----------- Python/opcode_metadata.h | 7 ++- Python/opcode_targets.h | 2 +- Python/specialize.c | 25 ++++++++- 13 files changed, 163 insertions(+), 84 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index a287250acc1912..10f1e320a12ff4 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -92,6 +92,12 @@ typedef struct { #define INLINE_CACHE_ENTRIES_FOR_ITER CACHE_ENTRIES(_PyForIterCache) +typedef struct { + uint16_t counter; +} _PySendCache; + +#define INLINE_CACHE_ENTRIES_SEND CACHE_ENTRIES(_PySendCache) + // Borrowed references to common callables: struct callable_cache { PyObject *isinstance; @@ -233,6 +239,7 @@ extern void _Py_Specialize_CompareAndBranch(PyObject *lhs, PyObject *rhs, extern void _Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr, int oparg); extern void _Py_Specialize_ForIter(PyObject *iter, _Py_CODEUNIT *instr, int oparg); +extern void _Py_Specialize_Send(PyObject *receiver, _Py_CODEUNIT *instr); /* Finalizer function for static codeobjects used in deepfreeze.py */ extern void _PyStaticCode_Fini(PyCodeObject *co); diff --git a/Include/internal/pycore_opcode.h b/Include/internal/pycore_opcode.h index 47c84721335196..5e65adee9e00a5 100644 --- a/Include/internal/pycore_opcode.h +++ b/Include/internal/pycore_opcode.h @@ -50,6 +50,7 @@ const uint8_t _PyOpcode_Caches[256] = { [COMPARE_OP] = 1, [LOAD_GLOBAL] = 5, [BINARY_OP] = 1, + [SEND] = 1, [COMPARE_AND_BRANCH] = 1, [CALL] = 4, }; @@ -196,6 +197,7 @@ const uint8_t _PyOpcode_Deopt[256] = { [RETURN_GENERATOR] = RETURN_GENERATOR, [RETURN_VALUE] = RETURN_VALUE, [SEND] = SEND, + [SEND_GEN] = SEND, [SETUP_ANNOTATIONS] = SETUP_ANNOTATIONS, [SET_ADD] = SET_ADD, [SET_UPDATE] = SET_UPDATE, @@ -395,7 +397,7 @@ static const char *const _PyOpcode_OpName[263] = { [SET_UPDATE] = "SET_UPDATE", [DICT_MERGE] = "DICT_MERGE", [DICT_UPDATE] = "DICT_UPDATE", - [166] = "<166>", + [SEND_GEN] = "SEND_GEN", [167] = "<167>", [168] = "<168>", [169] = "<169>", @@ -496,7 +498,6 @@ static const char *const _PyOpcode_OpName[263] = { #endif #define EXTRA_CASES \ - case 166: \ case 167: \ case 168: \ case 169: \ diff --git a/Include/opcode.h b/Include/opcode.h index 77ad7c22440d72..d643741c3c3aa0 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -187,6 +187,7 @@ extern "C" { #define UNPACK_SEQUENCE_LIST 159 #define UNPACK_SEQUENCE_TUPLE 160 #define UNPACK_SEQUENCE_TWO_TUPLE 161 +#define SEND_GEN 166 #define DO_TRACING 255 #define HAS_ARG(op) ((((op) >= HAVE_ARGUMENT) && (!IS_PSEUDO_OPCODE(op)))\ diff --git a/Lib/dis.py b/Lib/dis.py index a6921008d9d0e5..9edde6ae8258da 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -39,6 +39,7 @@ BINARY_OP = opmap['BINARY_OP'] JUMP_BACKWARD = opmap['JUMP_BACKWARD'] FOR_ITER = opmap['FOR_ITER'] +SEND = opmap['SEND'] LOAD_ATTR = opmap['LOAD_ATTR'] CACHE = opmap["CACHE"] @@ -453,6 +454,7 @@ def _get_instructions_bytes(code, varname_from_oparg=None, argrepr = '' positions = Positions(*next(co_positions, ())) deop = _deoptop(op) + caches = _inline_cache_entries[deop] if arg is not None: # Set argval to the dereferenced value of the argument when # available, and argrepr to the string representation of argval. @@ -478,8 +480,7 @@ def _get_instructions_bytes(code, varname_from_oparg=None, elif deop in hasjrel: signed_arg = -arg if _is_backward_jump(deop) else arg argval = offset + 2 + signed_arg*2 - if deop == FOR_ITER: - argval += 2 + argval += 2 * caches argrepr = "to " + repr(argval) elif deop in haslocal or deop in hasfree: argval, argrepr = _get_name_info(arg, varname_from_oparg) @@ -633,12 +634,12 @@ def findlabels(code): for offset, op, arg in _unpack_opargs(code): if arg is not None: deop = _deoptop(op) + caches = _inline_cache_entries[deop] if deop in hasjrel: if _is_backward_jump(deop): arg = -arg label = offset + 2 + arg*2 - if deop == FOR_ITER: - label += 2 + label += 2 * caches elif deop in hasjabs: label = arg*2 else: diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 38d4a384c2cc95..5d582d85688953 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -445,7 +445,7 @@ def _write_atomic(path, data, mode=0o666): # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array # in PC/launcher.c must also be updated. -MAGIC_NUMBER = (3519).to_bytes(2, 'little') + b'\r\n' +MAGIC_NUMBER = (3520).to_bytes(2, 'little') + b'\r\n' _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c diff --git a/Lib/opcode.py b/Lib/opcode.py index 5f163d2ccb80df..b69cd1bbdd61ca 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -167,7 +167,7 @@ def pseudo_op(name, op, real_ops): def_op('RETURN_CONST', 121) hasconst.append(121) def_op('BINARY_OP', 122) -jrel_op('SEND', 123) # Number of bytes to skip +jrel_op('SEND', 123) # Number of words to skip def_op('LOAD_FAST', 124) # Local variable number, no null check haslocal.append(124) def_op('STORE_FAST', 125) # Local variable number @@ -370,6 +370,9 @@ def pseudo_op(name, op, real_ops): "UNPACK_SEQUENCE_TUPLE", "UNPACK_SEQUENCE_TWO_TUPLE", ], + "SEND": [ + "SEND_GEN", + ], } _specialized_instructions = [ opcode for family in _specializations.values() for opcode in family @@ -429,6 +432,9 @@ def pseudo_op(name, op, real_ops): "STORE_SUBSCR": { "counter": 1, }, + "SEND": { + "counter": 1, + }, } _inline_cache_entries = [ diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index eba03ede8559b0..9086824dd6f40c 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -475,10 +475,10 @@ async def _asyncwith(c): BEFORE_ASYNC_WITH GET_AWAITABLE 1 LOAD_CONST 0 (None) - >> SEND 3 (to 22) + >> SEND 3 (to 24) YIELD_VALUE 2 RESUME 3 - JUMP_BACKWARD_NO_INTERRUPT 4 (to 14) + JUMP_BACKWARD_NO_INTERRUPT 5 (to 14) >> SWAP 2 POP_TOP POP_TOP @@ -492,10 +492,10 @@ async def _asyncwith(c): CALL 2 GET_AWAITABLE 2 LOAD_CONST 0 (None) - >> SEND 3 (to 60) + >> SEND 3 (to 64) YIELD_VALUE 2 RESUME 3 - JUMP_BACKWARD_NO_INTERRUPT 4 (to 52) + JUMP_BACKWARD_NO_INTERRUPT 5 (to 54) >> POP_TOP POP_TOP @@ -504,21 +504,21 @@ async def _asyncwith(c): RETURN_CONST 0 (None) %3d >> CLEANUP_THROW - JUMP_BACKWARD 26 (to 22) + JUMP_BACKWARD 27 (to 24) >> CLEANUP_THROW - JUMP_BACKWARD 9 (to 60) + JUMP_BACKWARD 9 (to 64) >> PUSH_EXC_INFO WITH_EXCEPT_START GET_AWAITABLE 2 LOAD_CONST 0 (None) - >> SEND 4 (to 96) + >> SEND 4 (to 102) YIELD_VALUE 3 RESUME 3 - JUMP_BACKWARD_NO_INTERRUPT 4 (to 86) + JUMP_BACKWARD_NO_INTERRUPT 5 (to 90) >> CLEANUP_THROW >> SWAP 2 POP_TOP - POP_JUMP_IF_TRUE 1 (to 104) + POP_JUMP_IF_TRUE 1 (to 110) RERAISE 2 >> POP_TOP POP_EXCEPT diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 8e5846aa9924c6..581ed2d214c4d9 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -334,7 +334,7 @@ mark_stacks(PyCodeObject *code_obj, int len) break; } case SEND: - j = get_arg(code, i) + i + 1; + j = get_arg(code, i) + i + INLINE_CACHE_ENTRIES_SEND + 1; assert(j < len); assert(stacks[j] == UNINITIALIZED || stacks[j] == next_stack); stacks[j] = next_stack; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f6d872b47da073..429cd7fdafa168 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -680,48 +680,66 @@ dummy_func( PREDICT(LOAD_CONST); } - inst(SEND, (receiver, v -- receiver, retval)) { + family(for_iter, INLINE_CACHE_ENTRIES_FOR_ITER) = { + SEND, + SEND_GEN, + }; + + inst(SEND, (unused/1, receiver, v -- receiver, retval)) { + #if ENABLE_SPECIALIZATION + _PySendCache *cache = (_PySendCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + next_instr--; + _Py_Specialize_Send(receiver, next_instr); + DISPATCH_SAME_OPARG(); + } + STAT_INC(SEND, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); + #endif /* ENABLE_SPECIALIZATION */ assert(frame != &entry_frame); - PySendResult gen_status; - if (tstate->c_tracefunc == NULL) { - gen_status = PyIter_Send(receiver, v, &retval); - } else { - if (Py_IsNone(v) && PyIter_Check(receiver)) { - retval = Py_TYPE(receiver)->tp_iternext(receiver); - } - else { - retval = PyObject_CallMethodOneArg(receiver, &_Py_ID(send), v); - } - if (retval == NULL) { - if (tstate->c_tracefunc != NULL - && _PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) - call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj, tstate, frame); - if (_PyGen_FetchStopIterationValue(&retval) == 0) { - gen_status = PYGEN_RETURN; - } - else { - gen_status = PYGEN_ERROR; - } + if (Py_IsNone(v) && PyIter_Check(receiver)) { + retval = Py_TYPE(receiver)->tp_iternext(receiver); + } + else { + retval = PyObject_CallMethodOneArg(receiver, &_Py_ID(send), v); + } + if (retval == NULL) { + if (tstate->c_tracefunc != NULL + && _PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) + call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj, tstate, frame); + if (_PyGen_FetchStopIterationValue(&retval) == 0) { + assert(retval != NULL); + JUMPBY(oparg); } else { - gen_status = PYGEN_NEXT; + assert(retval == NULL); + goto error; } } - if (gen_status == PYGEN_ERROR) { - assert(retval == NULL); - goto error; - } - Py_DECREF(v); - if (gen_status == PYGEN_RETURN) { - assert(retval != NULL); - JUMPBY(oparg); - } else { - assert(gen_status == PYGEN_NEXT); assert(retval != NULL); } } + inst(SEND_GEN, (unused/1, receiver, v -- receiver)) { + assert(cframe.use_tracing == 0); + PyGenObject *gen = (PyGenObject *)receiver; + DEOPT_IF(Py_TYPE(gen) != &PyGen_Type && + Py_TYPE(gen) != &PyCoro_Type, SEND); + DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, SEND); + STAT_INC(SEND, hit); + _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; + frame->yield_offset = oparg; + STACK_SHRINK(1); + _PyFrame_StackPush(gen_frame, v); + gen->gi_frame_state = FRAME_EXECUTING; + gen->gi_exc_state.previous_item = tstate->exc_info; + tstate->exc_info = &gen->gi_exc_state; + JUMPBY(INLINE_CACHE_ENTRIES_SEND + oparg); + DISPATCH_INLINED(gen_frame); + } + inst(YIELD_VALUE, (retval -- unused)) { // NOTE: It's important that YIELD_VALUE never raises an exception! // The compiler treats any exception raised here as a failed close() diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 656bfbf552faa0..093ebff026b509 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -882,52 +882,69 @@ } TARGET(SEND) { + PREDICTED(SEND); PyObject *v = PEEK(1); PyObject *receiver = PEEK(2); PyObject *retval; + #if ENABLE_SPECIALIZATION + _PySendCache *cache = (_PySendCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + next_instr--; + _Py_Specialize_Send(receiver, next_instr); + DISPATCH_SAME_OPARG(); + } + STAT_INC(SEND, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); + #endif /* ENABLE_SPECIALIZATION */ assert(frame != &entry_frame); - PySendResult gen_status; - if (tstate->c_tracefunc == NULL) { - gen_status = PyIter_Send(receiver, v, &retval); - } else { - if (Py_IsNone(v) && PyIter_Check(receiver)) { - retval = Py_TYPE(receiver)->tp_iternext(receiver); - } - else { - retval = PyObject_CallMethodOneArg(receiver, &_Py_ID(send), v); - } - if (retval == NULL) { - if (tstate->c_tracefunc != NULL - && _PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) - call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj, tstate, frame); - if (_PyGen_FetchStopIterationValue(&retval) == 0) { - gen_status = PYGEN_RETURN; - } - else { - gen_status = PYGEN_ERROR; - } + if (Py_IsNone(v) && PyIter_Check(receiver)) { + retval = Py_TYPE(receiver)->tp_iternext(receiver); + } + else { + retval = PyObject_CallMethodOneArg(receiver, &_Py_ID(send), v); + } + if (retval == NULL) { + if (tstate->c_tracefunc != NULL + && _PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) + call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj, tstate, frame); + if (_PyGen_FetchStopIterationValue(&retval) == 0) { + assert(retval != NULL); + JUMPBY(oparg); } else { - gen_status = PYGEN_NEXT; + assert(retval == NULL); + goto error; } } - if (gen_status == PYGEN_ERROR) { - assert(retval == NULL); - goto error; - } - Py_DECREF(v); - if (gen_status == PYGEN_RETURN) { - assert(retval != NULL); - JUMPBY(oparg); - } else { - assert(gen_status == PYGEN_NEXT); assert(retval != NULL); } POKE(1, retval); + JUMPBY(1); DISPATCH(); } + TARGET(SEND_GEN) { + PyObject *v = PEEK(1); + PyObject *receiver = PEEK(2); + assert(cframe.use_tracing == 0); + PyGenObject *gen = (PyGenObject *)receiver; + DEOPT_IF(Py_TYPE(gen) != &PyGen_Type && + Py_TYPE(gen) != &PyCoro_Type, SEND); + DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, SEND); + STAT_INC(SEND, hit); + _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; + frame->yield_offset = oparg; + STACK_SHRINK(1); + _PyFrame_StackPush(gen_frame, v); + gen->gi_frame_state = FRAME_EXECUTING; + gen->gi_exc_state.previous_item = tstate->exc_info; + tstate->exc_info = &gen->gi_exc_state; + JUMPBY(INLINE_CACHE_ENTRIES_SEND + oparg); + DISPATCH_INLINED(gen_frame); + } + TARGET(YIELD_VALUE) { PyObject *retval = PEEK(1); // NOTE: It's important that YIELD_VALUE never raises an exception! diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h index b037c9a9ec4bcf..d622eb12c8cb2d 100644 --- a/Python/opcode_metadata.h +++ b/Python/opcode_metadata.h @@ -104,6 +104,8 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case SEND: return 2; + case SEND_GEN: + return 2; case YIELD_VALUE: return 1; case POP_EXCEPT: @@ -454,6 +456,8 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case SEND: return 2; + case SEND_GEN: + return 1; case YIELD_VALUE: return 1; case POP_EXCEPT: @@ -763,7 +767,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[256] = { [GET_AITER] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX }, [GET_ANEXT] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX }, [GET_AWAITABLE] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB }, - [SEND] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB }, + [SEND] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC }, + [SEND_GEN] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC }, [YIELD_VALUE] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX }, [POP_EXCEPT] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX }, [RERAISE] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB }, diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index eceb246fac4909..301ec6e005dad6 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -165,7 +165,7 @@ static void *opcode_targets[256] = { &&TARGET_SET_UPDATE, &&TARGET_DICT_MERGE, &&TARGET_DICT_UPDATE, - &&_unknown_opcode, + &&TARGET_SEND_GEN, &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, diff --git a/Python/specialize.c b/Python/specialize.c index 908ad6dceb57f3..789bc8e9b94e82 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -128,6 +128,7 @@ print_spec_stats(FILE *out, OpcodeStats *stats) fprintf(out, "opcode[%d].specializable : 1\n", BINARY_SLICE); fprintf(out, "opcode[%d].specializable : 1\n", COMPARE_OP); fprintf(out, "opcode[%d].specializable : 1\n", STORE_SLICE); + fprintf(out, "opcode[%d].specializable : 1\n", SEND); for (int i = 0; i < 256; i++) { if (_PyOpcode_Caches[i]) { fprintf(out, "opcode[%d].specializable : 1\n", i); @@ -1084,7 +1085,7 @@ PyObject *descr, DescriptorClassification kind) if (dict) { SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_ATTR_NOT_MANAGED_DICT); return 0; - } + } assert(owner_cls->tp_dictoffset > 0); assert(owner_cls->tp_dictoffset <= INT16_MAX); _py_set_opcode(instr, LOAD_ATTR_METHOD_LAZY_DICT); @@ -2183,3 +2184,25 @@ _Py_Specialize_ForIter(PyObject *iter, _Py_CODEUNIT *instr, int oparg) STAT_INC(FOR_ITER, success); cache->counter = adaptive_counter_cooldown(); } + +void +_Py_Specialize_Send(PyObject *receiver, _Py_CODEUNIT *instr) +{ + assert(ENABLE_SPECIALIZATION); + assert(_PyOpcode_Caches[SEND] == INLINE_CACHE_ENTRIES_SEND); + _PySendCache *cache = (_PySendCache *)(instr + 1); + PyTypeObject *tp = Py_TYPE(receiver); + if (tp == &PyGen_Type || tp == &PyCoro_Type) { + _py_set_opcode(instr, SEND_GEN); + goto success; + } + SPECIALIZATION_FAIL(SEND, + _PySpecialization_ClassifyIterator(iter)); + STAT_INC(SEND, failure); + _py_set_opcode(instr, SEND); + cache->counter = adaptive_counter_backoff(cache->counter); + return; +success: + STAT_INC(SEND, success); + cache->counter = adaptive_counter_cooldown(); +} From a2c1606d78ec99523e1ad7fb09c53368fa4c40a6 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 10 Feb 2023 14:04:59 +0000 Subject: [PATCH 3/6] Fix stats gathering --- Python/specialize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index 789bc8e9b94e82..4ede3122d38046 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2197,7 +2197,7 @@ _Py_Specialize_Send(PyObject *receiver, _Py_CODEUNIT *instr) goto success; } SPECIALIZATION_FAIL(SEND, - _PySpecialization_ClassifyIterator(iter)); + _PySpecialization_ClassifyIterator(receiver)); STAT_INC(SEND, failure); _py_set_opcode(instr, SEND); cache->counter = adaptive_counter_backoff(cache->counter); From eaa4be163933078087eff9280ef9b98e72e240c1 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 10 Feb 2023 15:55:32 +0000 Subject: [PATCH 4/6] Add news --- .../2023-02-10-15-54-57.gh-issue-87849.IUVvPz.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-02-10-15-54-57.gh-issue-87849.IUVvPz.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-02-10-15-54-57.gh-issue-87849.IUVvPz.rst b/Misc/NEWS.d/next/Core and Builtins/2023-02-10-15-54-57.gh-issue-87849.IUVvPz.rst new file mode 100644 index 00000000000000..0f875c4f097532 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-02-10-15-54-57.gh-issue-87849.IUVvPz.rst @@ -0,0 +1,3 @@ +Change the ``SEND`` instruction to leave the receiver on the stack. This +allows the specialized form of ``SEND``to skip the chain of C calls and jump +directly to the ``RESUME`` in the generator or coroutine. From 77dd329c9454ac35e4c3f09f16da7907f03814da Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 10 Feb 2023 16:26:12 +0000 Subject: [PATCH 5/6] Update magic number. --- Lib/importlib/_bootstrap_external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 5d582d85688953..38d4a384c2cc95 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -445,7 +445,7 @@ def _write_atomic(path, data, mode=0o666): # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array # in PC/launcher.c must also be updated. -MAGIC_NUMBER = (3520).to_bytes(2, 'little') + b'\r\n' +MAGIC_NUMBER = (3519).to_bytes(2, 'little') + b'\r\n' _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c From 62008eafc6a4d27e4942223ab5adddb1f4b72b83 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 10 Feb 2023 16:43:03 +0000 Subject: [PATCH 6/6] Fix formatting. --- .../2023-02-10-15-54-57.gh-issue-87849.IUVvPz.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-02-10-15-54-57.gh-issue-87849.IUVvPz.rst b/Misc/NEWS.d/next/Core and Builtins/2023-02-10-15-54-57.gh-issue-87849.IUVvPz.rst index 0f875c4f097532..da5f3ff79fd575 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2023-02-10-15-54-57.gh-issue-87849.IUVvPz.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2023-02-10-15-54-57.gh-issue-87849.IUVvPz.rst @@ -1,3 +1,3 @@ Change the ``SEND`` instruction to leave the receiver on the stack. This -allows the specialized form of ``SEND``to skip the chain of C calls and jump +allows the specialized form of ``SEND`` to skip the chain of C calls and jump directly to the ``RESUME`` in the generator or coroutine.