diff --git a/Include/cpython/compile.h b/Include/cpython/compile.h index cfdb7080d45f2b..3e5ec1c3d89c32 100644 --- a/Include/cpython/compile.h +++ b/Include/cpython/compile.h @@ -20,6 +20,7 @@ #define PyCF_ALLOW_TOP_LEVEL_AWAIT 0x2000 #define PyCF_ALLOW_INCOMPLETE_INPUT 0x4000 #define PyCF_OPTIMIZED_AST (0x8000 | PyCF_ONLY_AST) +#define PyCF_DONT_OPTIMIZE_AST 0x10000 #define PyCF_COMPILE_MASK (PyCF_ONLY_AST | PyCF_ALLOW_TOP_LEVEL_AWAIT | \ PyCF_TYPE_COMMENTS | PyCF_DONT_IMPLY_DEDENT | \ PyCF_ALLOW_INCOMPLETE_INPUT | PyCF_OPTIMIZED_AST) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 90214a314031d1..3498656a3f6d93 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1124,6 +1124,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(opener)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(operation)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(optimize)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(optimize_ast)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(options)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(order)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(origin)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 97a75d0c46c867..26a357d2fc91ca 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -613,6 +613,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(opener) STRUCT_FOR_ID(operation) STRUCT_FOR_ID(optimize) + STRUCT_FOR_ID(optimize_ast) STRUCT_FOR_ID(options) STRUCT_FOR_ID(order) STRUCT_FOR_ID(origin) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 4f928cc050bf8e..5527dc05e927d1 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1122,6 +1122,7 @@ extern "C" { INIT_ID(opener), \ INIT_ID(operation), \ INIT_ID(optimize), \ + INIT_ID(optimize_ast), \ INIT_ID(options), \ INIT_ID(order), \ INIT_ID(origin), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 5b78d038fc1192..3fdefd51bff291 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -2248,6 +2248,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(optimize_ast); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(options); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/test/support/bytecode_helper.py b/Lib/test/support/bytecode_helper.py index f6426c3e285b2d..5c76b624d00c4a 100644 --- a/Lib/test/support/bytecode_helper.py +++ b/Lib/test/support/bytecode_helper.py @@ -89,6 +89,13 @@ def assertInstructionsMatch(self, actual_seq, expected): idx = max([p[0] for p in enumerate(exp) if p[1] != -1]) self.assertEqual(exp[:idx], act[:idx]) + def assertNotInInstructionSequence(self, seq, expected_opcode): + self.assertIn(expected_opcode, dis.opmap) + for instr in seq.get_instructions(): + opcode, *_ = instr + if dis.opmap[expected_opcode] == opcode: + self.fail(f"{expected_opcode} appears in instructions sequence.") + def resolveAndRemoveLabels(self, insts): idx = 0 res = [] @@ -138,8 +145,8 @@ def check_instructions(self, insts): @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi") class CodegenTestCase(CompilationStepTestCase): - def generate_code(self, ast): - insts, _ = _testinternalcapi.compiler_codegen(ast, "my_file.py", 0) + def generate_code(self, ast, optimize_ast=True): + insts, _ = _testinternalcapi.compiler_codegen(ast, "my_file.py", 0, 0, optimize_ast) return insts diff --git a/Lib/test/test_compiler_codegen.py b/Lib/test/test_compiler_codegen.py index cf5e2d901db4de..ba7958f389aa7d 100644 --- a/Lib/test/test_compiler_codegen.py +++ b/Lib/test/test_compiler_codegen.py @@ -15,10 +15,10 @@ def assertInstructionsMatch_recursive(self, insts, expected_insts): for n_insts, n_expected in zip(insts.get_nested(), expected_nested): self.assertInstructionsMatch_recursive(n_insts, n_expected) - def codegen_test(self, snippet, expected_insts): + def codegen_test(self, snippet, expected_insts, optimize_ast=True): import ast a = ast.parse(snippet, "my_file.py", "exec") - insts = self.generate_code(a) + insts = self.generate_code(a, optimize_ast=optimize_ast) self.assertInstructionsMatch_recursive(insts, expected_insts) def test_if_expression(self): @@ -157,3 +157,57 @@ def test_syntax_error__return_not_in_function(self): self.assertIsNone(cm.exception.text) self.assertEqual(cm.exception.offset, 1) self.assertEqual(cm.exception.end_offset, 10) + + def test_dont_optimize_ast_before_codegen(self): + snippet = "1+2" + unoptimized = [ + ('RESUME', 0, 0), + ('LOAD_SMALL_INT', 1, 0), + ('LOAD_SMALL_INT', 2, 0), + ('BINARY_OP', 0, 0), + ('POP_TOP', None, 0), + ('LOAD_CONST', 0, 0), + ('RETURN_VALUE', None, 0), + ] + self.codegen_test(snippet, unoptimized, optimize_ast=False) + + optimized = [ + ('RESUME', 0, 0), + ('NOP', None, 0), + ('LOAD_CONST', 0, 0), + ('RETURN_VALUE', None, 0), + ] + self.codegen_test(snippet, optimized, optimize_ast=True) + + def test_match_case_fold_codegen(self): + snippet = textwrap.dedent(""" + match 0: + case -0: pass # match unary const int + case -0.1: pass # match unary const float + case -0j: pass # match unary const complex + case 1 + 2j: pass # match const int + const complex + case 1 - 2j: pass # match const int - const complex + case 1.1 + 2.1j: pass # match const float + const complex + case 1.1 - 2.1j: pass # match const float - const complex + case -0 + 1j: pass # match unary const int + complex + case -0 - 1j: pass # match unary const int - complex + case -0.1 + 1.1j: pass # match unary const float + complex + case -0.1 - 1.1j: pass # match unary const float - complex + + case {-0: 0}: pass # match unary const int + case {-0.1: 0}: pass # match unary const float + case {-0j: 0}: pass # match unary const complex + case {1 + 2j: 0}: pass # match const int + const complex + case {1 - 2j: 0}: pass # match const int - const complex + case {1.1 + 2.1j: 0}: pass # match const float + const complex + case {1.1 - 2.1j: 0}: pass # match const float - const complex + case {-0 + 1j: 0}: pass # match unary const int + complex + case {-0 - 1j: 0}: pass # match unary const int - complex + case {-0.1 + 1.1j: 0}: pass # match unary const float + complex + case {-0.1 - 1.1j: 0}: pass # match unary const float - complex + """) + import ast + a = ast.parse(snippet, "my_file.py", "exec") + code = self.generate_code(a, optimize_ast=False) + self.assertNotInInstructionSequence(code, 'BINARY_OP') + self.assertNotInInstructionSequence(code, 'UNARY_NEGATIVE') diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index e44b629897c58a..85aa6669a1902c 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -711,6 +711,7 @@ _testinternalcapi.compiler_codegen -> object filename: object optimize: int compile_mode: int = 0 + optimize_ast: int = 1 Apply compiler code generation to an AST. [clinic start generated code]*/ @@ -718,11 +719,13 @@ Apply compiler code generation to an AST. static PyObject * _testinternalcapi_compiler_codegen_impl(PyObject *module, PyObject *ast, PyObject *filename, int optimize, - int compile_mode) -/*[clinic end generated code: output=40a68f6e13951cc8 input=a0e00784f1517cd7]*/ + int compile_mode, int optimize_ast) +/*[clinic end generated code: output=b4bf87f1213effd1 input=ece7e3ca206d738a]*/ { - PyCompilerFlags *flags = NULL; - return _PyCompile_CodeGen(ast, filename, flags, optimize, compile_mode); + PyCompilerFlags flags = _PyCompilerFlags_INIT; + if (!optimize_ast) + flags.cf_flags = PyCF_DONT_OPTIMIZE_AST; + return _PyCompile_CodeGen(ast, filename, &flags, optimize, compile_mode); } diff --git a/Modules/clinic/_testinternalcapi.c.h b/Modules/clinic/_testinternalcapi.c.h index d98d69df22f982..2390755b453320 100644 --- a/Modules/clinic/_testinternalcapi.c.h +++ b/Modules/clinic/_testinternalcapi.c.h @@ -87,7 +87,8 @@ _testinternalcapi_new_instruction_sequence(PyObject *module, PyObject *Py_UNUSED } PyDoc_STRVAR(_testinternalcapi_compiler_codegen__doc__, -"compiler_codegen($module, /, ast, filename, optimize, compile_mode=0)\n" +"compiler_codegen($module, /, ast, filename, optimize, compile_mode=0,\n" +" optimize_ast=1)\n" "--\n" "\n" "Apply compiler code generation to an AST."); @@ -98,7 +99,7 @@ PyDoc_STRVAR(_testinternalcapi_compiler_codegen__doc__, static PyObject * _testinternalcapi_compiler_codegen_impl(PyObject *module, PyObject *ast, PyObject *filename, int optimize, - int compile_mode); + int compile_mode, int optimize_ast); static PyObject * _testinternalcapi_compiler_codegen(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -106,14 +107,14 @@ _testinternalcapi_compiler_codegen(PyObject *module, PyObject *const *args, Py_s PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 4 + #define NUM_KEYWORDS 5 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD PyObject *ob_item[NUM_KEYWORDS]; } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_item = { &_Py_ID(ast), &_Py_ID(filename), &_Py_ID(optimize), &_Py_ID(compile_mode), }, + .ob_item = { &_Py_ID(ast), &_Py_ID(filename), &_Py_ID(optimize), &_Py_ID(compile_mode), &_Py_ID(optimize_ast), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -122,22 +123,23 @@ _testinternalcapi_compiler_codegen(PyObject *module, PyObject *const *args, Py_s # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"ast", "filename", "optimize", "compile_mode", NULL}; + static const char * const _keywords[] = {"ast", "filename", "optimize", "compile_mode", "optimize_ast", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "compiler_codegen", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[4]; + PyObject *argsbuf[5]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 3; PyObject *ast; PyObject *filename; int optimize; int compile_mode = 0; + int optimize_ast = 1; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, - /*minpos*/ 3, /*maxpos*/ 4, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + /*minpos*/ 3, /*maxpos*/ 5, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!args) { goto exit; } @@ -150,12 +152,21 @@ _testinternalcapi_compiler_codegen(PyObject *module, PyObject *const *args, Py_s if (!noptargs) { goto skip_optional_pos; } - compile_mode = PyLong_AsInt(args[3]); - if (compile_mode == -1 && PyErr_Occurred()) { + if (args[3]) { + compile_mode = PyLong_AsInt(args[3]); + if (compile_mode == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + optimize_ast = PyLong_AsInt(args[4]); + if (optimize_ast == -1 && PyErr_Occurred()) { goto exit; } skip_optional_pos: - return_value = _testinternalcapi_compiler_codegen_impl(module, ast, filename, optimize, compile_mode); + return_value = _testinternalcapi_compiler_codegen_impl(module, ast, filename, optimize, compile_mode, optimize_ast); exit: return return_value; @@ -365,4 +376,4 @@ gh_119213_getargs(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO exit: return return_value; } -/*[clinic end generated code: output=ec77971c6c2663da input=a9049054013a1b77]*/ +/*[clinic end generated code: output=693e868239038bbd input=a9049054013a1b77]*/ diff --git a/Python/codegen.c b/Python/codegen.c index cd77b34c06296b..ecec1590812901 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -5359,6 +5359,126 @@ codegen_slice(compiler *c, expr_ty s) #define MATCH_VALUE_EXPR(N) \ ((N)->kind == Constant_kind || (N)->kind == Attribute_kind) +#define IS_CONST_EXPR(N) \ + ((N)->kind == Constant_kind) + +#define CONST_EXPR_VALUE(N) \ + ((N)->v.Constant.value) + +#define IS_COMPLEX_CONST_EXPR(N) \ + (IS_CONST_EXPR(N) && PyComplex_CheckExact(CONST_EXPR_VALUE(N))) + +#define IS_NUMERIC_CONST_EXPR(N) \ + (IS_CONST_EXPR(N) && (PyLong_CheckExact(CONST_EXPR_VALUE(N)) || PyFloat_CheckExact(CONST_EXPR_VALUE(N)))) + +#define IS_UNARY_EXPR(N) \ + ((N)->kind == UnaryOp_kind) + +#define UNARY_EXPR_OP(N) \ + ((N)->v.UnaryOp.op) + +#define UNARY_EXPR_OPERAND(N) \ + ((N)->v.UnaryOp.operand) + +#define UNARY_EXPR_OPERAND_CONST_VALUE(N) \ + (CONST_EXPR_VALUE(UNARY_EXPR_OPERAND(N))) + +#define IS_UNARY_SUB_EXPR(N) \ + (IS_UNARY_EXPR(N) && UNARY_EXPR_OP(N) == USub) + +#define IS_NUMERIC_UNARY_CONST_EXPR(N) \ + (IS_UNARY_SUB_EXPR(N) && IS_NUMERIC_CONST_EXPR(UNARY_EXPR_OPERAND(N))) + +#define IS_COMPLEX_UNARY_CONST_EXPR(N) \ + (IS_UNARY_SUB_EXPR(N) && IS_COMPLEX_CONST_EXPR(UNARY_EXPR_OPERAND(N))) + +#define BINARY_EXPR(N) \ + ((N)->v.BinOp) + +#define BINARY_EXPR_OP(N) \ + (BINARY_EXPR(N).op) + +#define BINARY_EXPR_LEFT(N) \ + (BINARY_EXPR(N).left) + +#define BINARY_EXPR_RIGHT(N) \ + (BINARY_EXPR(N).right) + +#define IS_BINARY_EXPR(N) \ + ((N)->kind == BinOp_kind) + +#define IS_BINARY_ADD_EXPR(N) \ + (IS_BINARY_EXPR(N) && BINARY_EXPR_OP(N) == Add) + +#define IS_BINARY_SUB_EXPR(N) \ + (IS_BINARY_EXPR(N) && BINARY_EXPR_OP(N) == Sub) + +#define IS_MATCH_NUMERIC_OR_COMPLEX_UNARY_CONST_EXPR(N) \ + (IS_NUMERIC_UNARY_CONST_EXPR(N) || IS_COMPLEX_UNARY_CONST_EXPR(N)) + +#define IS_MATCH_COMPLEX_BINARY_CONST_EXPR(N) \ + ( \ + (IS_BINARY_ADD_EXPR(N) || IS_BINARY_SUB_EXPR(N)) \ + && (IS_NUMERIC_UNARY_CONST_EXPR(BINARY_EXPR_LEFT(N)) || IS_CONST_EXPR(BINARY_EXPR_LEFT(N))) \ + && IS_COMPLEX_CONST_EXPR(BINARY_EXPR_RIGHT(N)) \ + ) + +static void +fold_node(expr_ty node, PyObject *folded) +{ + assert(!IS_CONST_EXPR(node)); + node->kind = Constant_kind; + node->v.Constant.kind = NULL; + node->v.Constant.value = folded; +} + +static int +fold_const_unary_or_complex_expr(expr_ty e) +{ + assert(IS_MATCH_NUMERIC_OR_COMPLEX_UNARY_CONST_EXPR(e)); + PyObject *constant = UNARY_EXPR_OPERAND_CONST_VALUE(e); + assert(UNARY_EXPR_OP(e) == USub); + PyObject* folded = PyNumber_Negative(constant); + if (folded == NULL) { + return ERROR; + } + fold_node(e, folded); + return SUCCESS; +} + +static int +fold_const_binary_complex_expr(expr_ty e) +{ + assert(IS_MATCH_COMPLEX_BINARY_CONST_EXPR(e)); + expr_ty left_expr = BINARY_EXPR_LEFT(e); + if (IS_NUMERIC_UNARY_CONST_EXPR(left_expr)) { + RETURN_IF_ERROR(fold_const_unary_or_complex_expr(left_expr)); + } + assert(IS_CONST_EXPR(BINARY_EXPR_LEFT(e))); + operator_ty op = BINARY_EXPR_OP(e); + PyObject *left = CONST_EXPR_VALUE(BINARY_EXPR_LEFT(e)); + PyObject *right = CONST_EXPR_VALUE(BINARY_EXPR_RIGHT(e)); + assert(op == Add || op == Sub); + PyObject *folded = op == Add ? PyNumber_Add(left, right) : PyNumber_Subtract(left, right); + if (folded == NULL) { + return ERROR; + } + fold_node(e, folded); + return SUCCESS; +} + +static int +try_fold_unary_or_binary_complex_const_expr(expr_ty e) +{ + if (IS_MATCH_NUMERIC_OR_COMPLEX_UNARY_CONST_EXPR(e)) { + return fold_const_unary_or_complex_expr(e); + } + if (IS_MATCH_COMPLEX_BINARY_CONST_EXPR(e)) { + return fold_const_binary_complex_expr(e); + } + return SUCCESS; +} + // Allocate or resize pc->fail_pop to allow for n items to be popped on failure. static int ensure_fail_pop(compiler *c, pattern_context *pc, Py_ssize_t n) @@ -5688,7 +5808,7 @@ codegen_pattern_mapping_key(compiler *c, PyObject *seen, pattern_ty p, Py_ssize_ location loc = LOC((pattern_ty) asdl_seq_GET(patterns, i)); return _PyCompile_Error(c, loc, e); } - + RETURN_IF_ERROR(try_fold_unary_or_binary_complex_const_expr(key)); if (key->kind == Constant_kind) { int in_seen = PySet_Contains(seen, key->v.Constant.value); RETURN_IF_ERROR(in_seen); @@ -6022,6 +6142,7 @@ codegen_pattern_value(compiler *c, pattern_ty p, pattern_context *pc) { assert(p->kind == MatchValue_kind); expr_ty value = p->v.MatchValue.value; + RETURN_IF_ERROR(try_fold_unary_or_binary_complex_const_expr(value)); if (!MATCH_VALUE_EXPR(value)) { const char *e = "patterns may only match literals and attribute lookups"; return _PyCompile_Error(c, LOC(p), e); diff --git a/Python/compile.c b/Python/compile.c index b58c12d4b881ac..e25b59440373f0 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -126,7 +126,8 @@ compiler_setup(compiler *c, mod_ty mod, PyObject *filename, c->c_optimize = (optimize == -1) ? _Py_GetConfig()->optimization_level : optimize; c->c_save_nested_seqs = false; - if (!_PyAST_Optimize(mod, arena, c->c_optimize, merged)) { + int ast_opt = !(flags->cf_flags & PyCF_DONT_OPTIMIZE_AST); + if (ast_opt && !_PyAST_Optimize(mod, arena, c->c_optimize, merged)) { return ERROR; } c->c_st = _PySymtable_Build(mod, filename, &c->c_future);