From ea726d2e55be3acdae4025b7a7807ad76147657b Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Tue, 25 Jun 2024 14:18:58 +0100 Subject: [PATCH 1/2] gh-119726: emit AArch64 trampolines in the data section Emit AArch64 trampolines in the data section (instead of the code) of the stencil. In many cases this allows the branch to the next micro-op at the end of the stencil to be replaced with a fall-through NOP. --- .../2024-06-28-18-34-49.gh-issue-119726.Fjv_Ab.rst | 2 ++ Tools/jit/_stencils.py | 14 ++++++-------- 2 files changed, 8 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-28-18-34-49.gh-issue-119726.Fjv_Ab.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-28-18-34-49.gh-issue-119726.Fjv_Ab.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-28-18-34-49.gh-issue-119726.Fjv_Ab.rst new file mode 100644 index 00000000000000..e3a321cf229ccf --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-28-18-34-49.gh-issue-119726.Fjv_Ab.rst @@ -0,0 +1,2 @@ +Optimise code layout for calls to C functions from the JIT on AArch64. +Patch by Diego Russo. diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 68eb1d13394170..1c6a9edb39840d 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -184,7 +184,7 @@ def pad(self, alignment: int) -> None: self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}") self.body.extend([0] * padding) - def emit_aarch64_trampoline(self, hole: Hole, alignment: int) -> None: + def emit_aarch64_trampoline(self, hole: Hole, alignment: int) -> Hole: """Even with the large code model, AArch64 Linux insists on 28-bit jumps.""" assert hole.symbol is not None reuse_trampoline = hole.symbol in self.trampolines @@ -194,14 +194,10 @@ def emit_aarch64_trampoline(self, hole: Hole, alignment: int) -> None: else: self.pad(alignment) base = len(self.body) - where = slice(hole.offset, hole.offset + 4) - instruction = int.from_bytes(self.body[where], sys.byteorder) - instruction &= 0xFC000000 - instruction |= ((base - hole.offset) >> 2) & 0x03FFFFFF - self.body[where] = instruction.to_bytes(4, sys.byteorder) + new_hole = hole.replace(addend=base, symbol=None, value=HoleValue.DATA) if reuse_trampoline: - return + return new_hole self.disassembly += [ f"{base + 4 * 0:x}: 58000048 ldr x8, 8", @@ -219,6 +215,7 @@ def emit_aarch64_trampoline(self, hole: Hole, alignment: int) -> None: self.body.extend(code) self.holes.append(hole.replace(offset=base + 8, kind="R_AARCH64_ABS64")) self.trampolines[hole.symbol] = base + return new_hole def remove_jump(self, *, alignment: int = 1) -> None: """Remove a zero-length continuation jump, if it exists.""" @@ -294,8 +291,9 @@ def process_relocations(self, *, alignment: int = 1) -> None: in {"R_AARCH64_CALL26", "R_AARCH64_JUMP26", "ARM64_RELOC_BRANCH26"} and hole.value is HoleValue.ZERO ): - self.code.emit_aarch64_trampoline(hole, alignment) + new_hole = self.data.emit_aarch64_trampoline(hole, alignment) self.code.holes.remove(hole) + self.code.holes.append(new_hole) self.code.remove_jump(alignment=alignment) self.code.pad(alignment) self.data.pad(8) From 13e5a2fc2a8e4bfcffcb4387a95b213fd705e985 Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Wed, 3 Jul 2024 10:01:11 +0100 Subject: [PATCH 2/2] Update Misc/NEWS.d/next/Core and Builtins/2024-06-28-18-34-49.gh-issue-119726.Fjv_Ab.rst Co-authored-by: Brandt Bucher --- .../2024-06-28-18-34-49.gh-issue-119726.Fjv_Ab.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-28-18-34-49.gh-issue-119726.Fjv_Ab.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-28-18-34-49.gh-issue-119726.Fjv_Ab.rst index e3a321cf229ccf..cf5d61450aa3ae 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-06-28-18-34-49.gh-issue-119726.Fjv_Ab.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-28-18-34-49.gh-issue-119726.Fjv_Ab.rst @@ -1,2 +1,2 @@ -Optimise code layout for calls to C functions from the JIT on AArch64. +Optimize code layout for calls to C functions from the JIT on AArch64. Patch by Diego Russo.