Skip to content

Commit b174fe8

Browse files
committed
Calculate the padding automatically
1 parent 8b228c0 commit b174fe8

File tree

1 file changed

+43
-23
lines changed

1 file changed

+43
-23
lines changed

Python/perf_jit_trampoline.c

Lines changed: 43 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,9 @@
9797
* /tmp/jitted-PID-0.so: [headers][.text][unwind_info][padding]
9898
* /tmp/jitted-PID-1.so: [headers][.text][unwind_info][padding]
9999
*
100-
* The padding size (0x100) is chosen to accommodate typical unwind info sizes
101-
* while maintaining 16-byte alignment requirements.
100+
* The padding size is now calculated automatically during initialization
101+
* based on the actual unwind information requirements.
102102
*/
103-
#define PERF_JIT_CODE_PADDING 0x100
104103

105104
/* Convenient access to the global trampoline API state */
106105
#define trampoline_api _PyRuntime.ceval.perf.trampoline_api
@@ -646,6 +645,8 @@ static void elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) {
646645
// DWARF EH FRAME GENERATION
647646
// =============================================================================
648647

648+
static void elf_init_ehframe(ELFObjectContext* ctx);
649+
649650
/*
650651
* Initialize DWARF .eh_frame section for a code region
651652
*
@@ -660,6 +661,23 @@ static void elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) {
660661
* Args:
661662
* ctx: ELF object context containing code size and buffer pointers
662663
*/
664+
static size_t calculate_eh_frame_size(void) {
665+
/* Calculate the EH frame size for the trampoline function */
666+
extern void *_Py_trampoline_func_start;
667+
extern void *_Py_trampoline_func_end;
668+
669+
size_t code_size = (char*)&_Py_trampoline_func_end - (char*)&_Py_trampoline_func_start;
670+
671+
ELFObjectContext ctx;
672+
char buffer[1024]; // Buffer for DWARF data (1KB should be sufficient)
673+
ctx.code_size = code_size;
674+
ctx.startp = ctx.p = (uint8_t*)buffer;
675+
ctx.fde_p = NULL;
676+
677+
elf_init_ehframe(&ctx);
678+
return ctx.p - ctx.startp;
679+
}
680+
663681
static void elf_init_ehframe(ELFObjectContext* ctx) {
664682
uint8_t* p = ctx->p;
665683
uint8_t* framep = p; // Remember start of frame data
@@ -856,7 +874,7 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
856874
*
857875
* The FDE describes unwinding information specific to this function.
858876
* It references the CIE and provides function-specific CFI instructions.
859-
*
877+
*
860878
* The PC-relative offset is calculated after the entire EH frame is built
861879
* to ensure accurate positioning relative to the synthesized DSO layout.
862880
*/
@@ -881,16 +899,16 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
881899
# endif
882900
DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance past push %rbp (1 byte)
883901
DWRF_U8(DWRF_CFA_def_cfa_offset); // def_cfa_offset 16
884-
DWRF_UV(16);
902+
DWRF_UV(16); // New offset: SP + 16
885903
DWRF_U8(DWRF_CFA_offset | DWRF_REG_BP); // offset r6 at cfa-16
886-
DWRF_UV(2);
904+
DWRF_UV(2); // Offset factor: 2 * 8 = 16 bytes
887905
DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance past mov %rsp,%rbp (3 bytes)
888906
DWRF_U8(DWRF_CFA_def_cfa_register); // def_cfa_register r6
889-
DWRF_UV(DWRF_REG_BP);
907+
DWRF_UV(DWRF_REG_BP); // Use base pointer register
890908
DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance past call *%rcx (2 bytes) + pop %rbp (1 byte) = 3
891909
DWRF_U8(DWRF_CFA_def_cfa); // def_cfa r7 ofs 8
892-
DWRF_UV(DWRF_REG_SP);
893-
DWRF_UV(8);
910+
DWRF_UV(DWRF_REG_SP); // Use stack pointer register
911+
DWRF_UV(8); // New offset: SP + 8
894912
#elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__)
895913
/* AArch64 calling convention unwinding rules */
896914
DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance location by 1 instruction (stp x29, x30)
@@ -914,11 +932,11 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
914932
)
915933

916934
ctx->p = p; // Update context pointer to end of generated data
917-
935+
918936
/* Calculate and update the PC-relative offset in the FDE
919-
*
937+
*
920938
* When perf processes the jitdump, it creates a synthesized DSO with this layout:
921-
*
939+
*
922940
* Synthesized DSO Memory Layout:
923941
* ┌─────────────────────────────────────────────────────────────┐ < code_start
924942
* │ Code Section │
@@ -936,33 +954,33 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
936954
* │ │ CFI Instructions... │ │
937955
* │ └─────────────────────────────────────────────────────┘ │
938956
* ├─────────────────────────────────────────────────────────────┤ < reference_point
939-
* │ EhFrameHeader │
957+
* │ EhFrameHeader │
940958
* │ (navigation metadata) │
941959
* └─────────────────────────────────────────────────────────────┘
942-
*
960+
*
943961
* The PC offset field in the FDE must contain the distance from itself to code_start:
944-
*
962+
*
945963
* distance = code_start - fde_pc_field
946-
*
964+
*
947965
* Where:
948966
* fde_pc_field_location = reference_point - eh_frame_size + fde_offset_in_frame
949967
* code_start_location = reference_point - eh_frame_size - round_up(code_size, 8)
950-
*
968+
*
951969
* Therefore:
952970
* distance = code_start_location - fde_pc_field_location
953971
* = (ref - eh_frame_size - rounded_code_size) - (ref - eh_frame_size + fde_offset_in_frame)
954972
* = -rounded_code_size - fde_offset_in_frame
955973
* = -(round_up(code_size, 8) + fde_offset_in_frame)
956974
*
957975
* Note: fde_offset_in_frame is the offset from EH frame start to the PC offset field,
958-
*
976+
*
959977
*/
960978
if (ctx->fde_p != NULL) {
961979
int32_t fde_offset_in_frame = (ctx->fde_p - ctx->startp);
962980
int32_t rounded_code_size = round_up(ctx->code_size, 8);
963981
int32_t pc_relative_offset = -(rounded_code_size + fde_offset_in_frame);
964-
965-
982+
983+
966984
// Update the PC-relative offset in the FDE
967985
*(int32_t*)ctx->fde_p = pc_relative_offset;
968986
}
@@ -1066,8 +1084,10 @@ static void* perf_map_jit_init(void) {
10661084
/* Initialize code ID counter */
10671085
perf_jit_map_state.code_id = 0;
10681086

1069-
/* Configure trampoline API with padding information */
1070-
trampoline_api.code_padding = PERF_JIT_CODE_PADDING;
1087+
/* Calculate padding size based on actual unwind info requirements */
1088+
size_t eh_frame_size = calculate_eh_frame_size();
1089+
size_t unwind_data_size = sizeof(EhFrameHeader) + eh_frame_size;
1090+
trampoline_api.code_padding = round_up(unwind_data_size, 16);
10711091

10721092
return &perf_jit_map_state;
10731093
}
@@ -1175,7 +1195,7 @@ static void perf_map_jit_write_entry(void *state, const void *code_addr,
11751195
ev2.unwind_data_size = sizeof(EhFrameHeader) + eh_frame_size;
11761196

11771197
/* Verify we don't exceed our padding budget */
1178-
assert(ev2.unwind_data_size <= PERF_JIT_CODE_PADDING);
1198+
assert(ev2.unwind_data_size <= (uint64_t)trampoline_api.code_padding);
11791199

11801200
ev2.eh_frame_hdr_size = sizeof(EhFrameHeader);
11811201
ev2.mapped_size = round_up(ev2.unwind_data_size, 16); // 16-byte alignment

0 commit comments

Comments
 (0)