From 7547a29863cbe98def5fc592d003251951516e53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Naz=C4=B1m=20Can=20Alt=C4=B1nova?= Date: Tue, 8 Jul 2025 19:21:56 +0200 Subject: [PATCH 01/11] Add perf trampoline support for macOS --- Python/asm_trampoline.S | 13 +++++++++++++ Python/perf_jit_trampoline.c | 24 +++++++++++++++++++++--- configure | 2 ++ configure.ac | 3 ++- 4 files changed, 38 insertions(+), 4 deletions(-) diff --git a/Python/asm_trampoline.S b/Python/asm_trampoline.S index a14e68c0e81932..93adae3d99038f 100644 --- a/Python/asm_trampoline.S +++ b/Python/asm_trampoline.S @@ -1,5 +1,9 @@ .text +#if defined(__APPLE__) + .globl __Py_trampoline_func_start +#else .globl _Py_trampoline_func_start +#endif # The following assembly is equivalent to: # PyObject * # trampoline(PyThreadState *ts, _PyInterpreterFrame *f, @@ -7,7 +11,11 @@ # { # return evaluator(ts, f, throwflag); # } +#if defined(__APPLE__) +__Py_trampoline_func_start: +#else _Py_trampoline_func_start: +#endif #ifdef __x86_64__ #if defined(__CET__) && (__CET__ & 1) endbr64 @@ -35,9 +43,14 @@ _Py_trampoline_func_start: addi sp,sp,16 jr ra #endif +#if defined(__APPLE__) + .globl __Py_trampoline_func_end +__Py_trampoline_func_end: +#else .globl _Py_trampoline_func_end _Py_trampoline_func_end: .section .note.GNU-stack,"",@progbits +#endif # Note for indicating the assembly code supports CET #if defined(__x86_64__) && defined(__CET__) && (__CET__ & 1) .section .note.gnu.property,"a" diff --git a/Python/perf_jit_trampoline.c b/Python/perf_jit_trampoline.c index 469882d9b2f025..e30028360b9996 100644 --- a/Python/perf_jit_trampoline.c +++ b/Python/perf_jit_trampoline.c @@ -66,7 +66,9 @@ #ifdef PY_HAVE_PERF_TRAMPOLINE /* Standard library includes for perf jitdump implementation */ -#include // ELF architecture constants +#if defined(__linux__) +# include // ELF architecture constants +#endif #include // File control operations #include // Standard I/O operations #include // Standard library functions @@ -74,7 +76,9 @@ #include // System data types #include // System calls (sysconf, getpid) #include // Time functions (gettimeofday) -#include // System call interface +#if defined(__linux__) +# include // System call interface +#endif // ============================================================================= // CONSTANTS AND CONFIGURATION @@ -101,6 +105,16 @@ * based on the actual unwind information requirements. */ + +/* These constants are defined inside , which we can't use outside of linux. */ +#if !defined(__linux__) +# define EM_386 3 +# define EM_X86_64 62 +# define EM_ARM 40 +# define EM_AARCH64 183 +# define EM_RISCV 243 +#endif + /* Convenient access to the global trampoline API state */ #define trampoline_api _PyRuntime.ceval.perf.trampoline_api @@ -194,7 +208,7 @@ struct BaseEvent { typedef struct { struct BaseEvent base; // Common event header uint32_t process_id; // Process ID where code was generated - uint32_t thread_id; // Thread ID where code was generated + uint64_t thread_id; // Thread ID where code was generated uint64_t vma; // Virtual memory address where code is loaded uint64_t code_address; // Address of the actual machine code uint64_t code_size; // Size of the machine code in bytes @@ -1263,7 +1277,11 @@ static void perf_map_jit_write_entry(void *state, const void *code_addr, ev.base.size = sizeof(ev) + (name_length+1) + size; ev.base.time_stamp = get_current_monotonic_ticks(); ev.process_id = getpid(); +#if defined(__APPLE__) + pthread_threadid_np(NULL, &ev.thread_id); +#else ev.thread_id = syscall(SYS_gettid); // Get thread ID via system call +#endif ev.vma = base; // Virtual memory address ev.code_address = base; // Same as VMA for our use case ev.code_size = size; diff --git a/configure b/configure index 3570eecdcef94f..b346dbc0e0fb68 100755 --- a/configure +++ b/configure @@ -13816,6 +13816,8 @@ case $PLATFORM_TRIPLET in #( perf_trampoline=yes ;; #( aarch64-linux-gnu) : perf_trampoline=yes ;; #( + darwin) : + perf_trampoline=yes ;; #( *) : perf_trampoline=no ;; diff --git a/configure.ac b/configure.ac index 3566c4b9038c2b..f142d50af2c164 100644 --- a/configure.ac +++ b/configure.ac @@ -3692,12 +3692,13 @@ case "$ac_sys_system" in esac AC_MSG_RESULT([$SHLIBS]) -dnl perf trampoline is Linux specific and requires an arch-specific +dnl perf trampoline is Linux and macOS specific and requires an arch-specific dnl trampoline in assembly. AC_MSG_CHECKING([perf trampoline]) AS_CASE([$PLATFORM_TRIPLET], [x86_64-linux-gnu], [perf_trampoline=yes], [aarch64-linux-gnu], [perf_trampoline=yes], + [darwin], [perf_trampoline=yes], [perf_trampoline=no] ) AC_MSG_RESULT([$perf_trampoline]) From 17df9c838c1d28b896c31dac5db4c5be3488d9e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Naz=C4=B1m=20Can=20Alt=C4=B1nova?= Date: Wed, 9 Jul 2025 00:11:37 +0200 Subject: [PATCH 02/11] Make sure that test_perfmaps.py test is not skipped on macOS --- Lib/test/test_perfmaps.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_perfmaps.py b/Lib/test/test_perfmaps.py index d4c6fe0124af18..647c32656abd6d 100644 --- a/Lib/test/test_perfmaps.py +++ b/Lib/test/test_perfmaps.py @@ -1,5 +1,5 @@ import os -import sys +import sysconfig import unittest try: @@ -7,10 +7,14 @@ except ImportError: raise unittest.SkipTest("requires _testinternalcapi") +def supports_trampoline_profiling(): + perf_trampoline = sysconfig.get_config_var("PY_HAVE_PERF_TRAMPOLINE") + if not perf_trampoline: + return False + return int(perf_trampoline) == 1 -if sys.platform != 'linux': - raise unittest.SkipTest('Linux only') - +if not supports_trampoline_profiling(): + raise unittest.SkipTest("perf trampoline profiling not supported") class TestPerfMapWriting(unittest.TestCase): def test_write_perf_map_entry(self): From 3dac7a9d81642134ac3be54f5450fde0a7487725 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Naz=C4=B1m=20Can=20Alt=C4=B1nova?= Date: Wed, 9 Jul 2025 00:13:22 +0200 Subject: [PATCH 03/11] Update the docs for perfmaps to mention that macOS is supported --- Doc/c-api/perfmaps.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Doc/c-api/perfmaps.rst b/Doc/c-api/perfmaps.rst index 77b5e3c0876bbb..81fb5673f008aa 100644 --- a/Doc/c-api/perfmaps.rst +++ b/Doc/c-api/perfmaps.rst @@ -5,11 +5,12 @@ Support for Perf Maps ---------------------- -On supported platforms (as of this writing, only Linux), the runtime can take +On supported platforms (as of this writing, Linux and macOS), the runtime can take advantage of *perf map files* to make Python functions visible to an external -profiling tool (such as `perf `_). -A running process may create a file in the ``/tmp`` directory, which contains entries -that can map a section of executable code to a name. This interface is described in the +profiling tool (such as `perf `_ or +`samply `_). A running process may create a +file in the ``/tmp`` directory, which contains entries that can map a section +of executable code to a name. This interface is described in the `documentation of the Linux Perf tool `_. From d428ba489520300f79fd516f4731f53ec23eab4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Naz=C4=B1m=20Can=20Alt=C4=B1nova?= Date: Wed, 9 Jul 2025 01:06:03 +0200 Subject: [PATCH 04/11] Add myself to Misc/ACKS --- Misc/ACKS | 1 + 1 file changed, 1 insertion(+) diff --git a/Misc/ACKS b/Misc/ACKS index fabd79b9f74210..cf9612e5501da2 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -43,6 +43,7 @@ Ray Allen Billy G. Allie Jamiel Almeida Kevin Altis +Nazım Can Altınova Samy Lahfa Skyler Leigh Amador Joe Amenta From 50e80bffec7c6efccc27374ee001b7a2c08bfb5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Naz=C4=B1m=20Can=20Alt=C4=B1nova?= Date: Wed, 9 Jul 2025 11:15:46 +0200 Subject: [PATCH 05/11] Add a Misc/NEWS.d entry --- .../2025-07-09-11-15-42.gh-issue-136459.m4Udh8.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-07-09-11-15-42.gh-issue-136459.m4Udh8.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-09-11-15-42.gh-issue-136459.m4Udh8.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-09-11-15-42.gh-issue-136459.m4Udh8.rst new file mode 100644 index 00000000000000..b74ff6b3b3347f --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-09-11-15-42.gh-issue-136459.m4Udh8.rst @@ -0,0 +1,3 @@ +Add support for perf trampoline on macOS, to allow profilers wit JIT map +support to read python calls. While profiling, ``PYTHONPERFSUPPORT=1`` can +be appended to enable the trampoline. From 3f9e24dbd9828fb507637e798f30461c29285f3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Naz=C4=B1m=20Can=20Alt=C4=B1nova?= Date: Wed, 9 Jul 2025 23:03:55 +0200 Subject: [PATCH 06/11] Define constants per-platform --- Python/perf_jit_trampoline.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/Python/perf_jit_trampoline.c b/Python/perf_jit_trampoline.c index e30028360b9996..999a9b05903470 100644 --- a/Python/perf_jit_trampoline.c +++ b/Python/perf_jit_trampoline.c @@ -108,11 +108,17 @@ /* These constants are defined inside , which we can't use outside of linux. */ #if !defined(__linux__) -# define EM_386 3 -# define EM_X86_64 62 -# define EM_ARM 40 -# define EM_AARCH64 183 -# define EM_RISCV 243 +# if defined(__i386__) || defined(_M_IX86) +# define EM_386 3 +# elif defined(__arm__) || defined(_M_ARM) +# define EM_ARM 40 +# elif defined(__x86_64__) || defined(_M_X64) +# define EM_X86_64 62 +# elif defined(__aarch64__) +# define EM_AARCH64 183 +# elif defined(__riscv) +# define EM_RISCV 243 +# endif #endif /* Convenient access to the global trampoline API state */ From dc54659a2fb0eb0ed1883b8a0066834136ccbc97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Naz=C4=B1m=20Can=20Alt=C4=B1nova?= Date: Wed, 9 Jul 2025 23:25:15 +0200 Subject: [PATCH 07/11] Do not mmap the jitdump file on macOS On macOS, we don't need to call mmap because samply has already detected the file path during the call to `open` before (it interposes `open` with a preloaded library), and because the mmap call can be slow. --- Python/perf_jit_trampoline.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Python/perf_jit_trampoline.c b/Python/perf_jit_trampoline.c index 999a9b05903470..3dcbcd34003102 100644 --- a/Python/perf_jit_trampoline.c +++ b/Python/perf_jit_trampoline.c @@ -1055,6 +1055,10 @@ static void* perf_map_jit_init(void) { return NULL; // Failed to get page size } +#if defined(__APPLE__) + // On macOS, samply uses a preload to find jitdumps and this mmap can be slow. + perf_jit_map_state.mapped_buffer = NULL; +#else /* * Map the first page of the jitdump file * @@ -1077,6 +1081,7 @@ static void* perf_map_jit_init(void) { close(fd); return NULL; // Memory mapping failed } +#endif perf_jit_map_state.mapped_size = page_size; From 8a20a4b860a4f8abe8030569a58b07248f1c5ff3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Naz=C4=B1m=20Can=20Alt=C4=B1nova?= Date: Thu, 10 Jul 2025 15:11:47 +0200 Subject: [PATCH 08/11] Update the perf profiling doc to include samply --- Doc/howto/perf_profiling.rst | 51 +++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/Doc/howto/perf_profiling.rst b/Doc/howto/perf_profiling.rst index 96d757ac452b5e..62f6699612ebeb 100644 --- a/Doc/howto/perf_profiling.rst +++ b/Doc/howto/perf_profiling.rst @@ -2,34 +2,35 @@ .. _perf_profiling: -============================================== -Python support for the Linux ``perf`` profiler -============================================== +======================================================== +Python support for the ``perf map`` compatible profilers +======================================================== :author: Pablo Galindo -`The Linux perf profiler `_ -is a very powerful tool that allows you to profile and obtain -information about the performance of your application. -``perf`` also has a very vibrant ecosystem of tools -that aid with the analysis of the data that it produces. +`The Linux perf profiler `_ and +`samply `_ are powerful tools that allow you to +profile and obtain information about the performance of your application. +Both tools have vibrant ecosystems that aid with the analysis of the data they produce. -The main problem with using the ``perf`` profiler with Python applications is that -``perf`` only gets information about native symbols, that is, the names of +The main problem with using these profilers with Python applications is that +they only get information about native symbols, that is, the names of functions and procedures written in C. This means that the names and file names -of Python functions in your code will not appear in the output of ``perf``. +of Python functions in your code will not appear in the profiler output. Since Python 3.12, the interpreter can run in a special mode that allows Python -functions to appear in the output of the ``perf`` profiler. When this mode is +functions to appear in the output of compatible profilers. When this mode is enabled, the interpreter will interpose a small piece of code compiled on the -fly before the execution of every Python function and it will teach ``perf`` the +fly before the execution of every Python function and it will teach the profiler the relationship between this piece of code and the associated Python function using :doc:`perf map files <../c-api/perfmaps>`. .. note:: - Support for the ``perf`` profiler is currently only available for Linux on - select architectures. Check the output of the ``configure`` build step or + Support for profiling is available on Linux and macOS on select architectures. + ``perf`` is available on Linux, while ``samply`` can be used on both Linux and macOS. + ``samply`` support on macOS is available starting from Python 3.14. + Check the output of the ``configure`` build step or check the output of ``python -m sysconfig | grep HAVE_PERF_TRAMPOLINE`` to see if your system is supported. @@ -148,6 +149,26 @@ Instead, if we run the same experiment with ``perf`` support enabled we get: +Using ``samply`` profiler +------------------------- + +``samply`` is a modern profiler that can be used as an alternative to ``perf``. +It uses the same perf map files that Python generates, making it compatible +with Python's profiling support. ``samply`` is particularly useful on macOS +where ``perf`` is not available. + +To use ``samply`` with Python, first install it following the instructions at +https://github.com/mstange/samply, then run:: + + $ samply record PYTHONPERFSUPPORT=1 python my_script.py + +This will open a web interface where you can analyze the profiling data +interactively. The advantage of ``samply`` is that it provides a modern +web-based interface for analyzing profiling data and works on both Linux +and macOS. + +On macOS, ``samply`` support requires Python 3.14 or later. + How to enable ``perf`` profiling support ---------------------------------------- From 1432cc8e696db4b73710d59623c021d97eb177a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Naz=C4=B1m=20Can=20Alt=C4=B1nova?= Date: Thu, 10 Jul 2025 15:57:14 +0200 Subject: [PATCH 09/11] Add some tests for samply profiling --- Lib/test/test_samply_profiler.py | 244 +++++++++++++++++++++++++++++++ 1 file changed, 244 insertions(+) create mode 100644 Lib/test/test_samply_profiler.py diff --git a/Lib/test/test_samply_profiler.py b/Lib/test/test_samply_profiler.py new file mode 100644 index 00000000000000..ec0ed37ffd047b --- /dev/null +++ b/Lib/test/test_samply_profiler.py @@ -0,0 +1,244 @@ +import unittest +import subprocess +import sys +import sysconfig +import os +import pathlib +from test import support +from test.support.script_helper import ( + make_script, +) +from test.support.os_helper import temp_dir + + +if not support.has_subprocess_support: + raise unittest.SkipTest("test module requires subprocess") + +if support.check_sanitizer(address=True, memory=True, ub=True, function=True): + # gh-109580: Skip the test because it does crash randomly if Python is + # built with ASAN. + raise unittest.SkipTest("test crash randomly on ASAN/MSAN/UBSAN build") + + +def supports_trampoline_profiling(): + perf_trampoline = sysconfig.get_config_var("PY_HAVE_PERF_TRAMPOLINE") + if not perf_trampoline: + return False + return int(perf_trampoline) == 1 + + +if not supports_trampoline_profiling(): + raise unittest.SkipTest("perf trampoline profiling not supported") + + +def samply_command_works(): + try: + cmd = ["samply", "--help"] + except (subprocess.SubprocessError, OSError): + return False + + # Check that we can run a simple samply run + with temp_dir() as script_dir: + try: + output_file = script_dir + "/profile.json.gz" + cmd = ( + "samply", + "record", + "--save-only", + "--output", + output_file, + sys.executable, + "-c", + 'print("hello")', + ) + env = {**os.environ, "PYTHON_JIT": "0"} + stdout = subprocess.check_output( + cmd, cwd=script_dir, text=True, stderr=subprocess.STDOUT, env=env + ) + except (subprocess.SubprocessError, OSError): + return False + + if "hello" not in stdout: + return False + + return True + + +def run_samply(cwd, *args, **env_vars): + env = os.environ.copy() + if env_vars: + env.update(env_vars) + env["PYTHON_JIT"] = "0" + output_file = cwd + "/profile.json.gz" + base_cmd = ( + "samply", + "record", + "--save-only", + "-o", output_file, + ) + proc = subprocess.run( + base_cmd + args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env, + ) + if proc.returncode: + print(proc.stderr, file=sys.stderr) + raise ValueError(f"Samply failed with return code {proc.returncode}") + + import gzip + with gzip.open(output_file, mode="rt", encoding="utf-8") as f: + return f.read() + + +@unittest.skipUnless(samply_command_works(), "samply command doesn't work") +class TestSamplyProfilerMixin: + def run_samply(self, script_dir, perf_mode, script): + raise NotImplementedError() + + def test_python_calls_appear_in_the_stack_if_perf_activated(self): + with temp_dir() as script_dir: + code = """if 1: + def foo(n): + x = 0 + for i in range(n): + x += i + + def bar(n): + foo(n) + + def baz(n): + bar(n) + + baz(10000000) + """ + script = make_script(script_dir, "perftest", code) + output = self.run_samply(script_dir, script) + + self.assertIn(f"py::foo:{script}", output) + self.assertIn(f"py::bar:{script}", output) + self.assertIn(f"py::baz:{script}", output) + + def test_python_calls_do_not_appear_in_the_stack_if_perf_deactivated(self): + with temp_dir() as script_dir: + code = """if 1: + def foo(n): + x = 0 + for i in range(n): + x += i + + def bar(n): + foo(n) + + def baz(n): + bar(n) + + baz(10000000) + """ + script = make_script(script_dir, "perftest", code) + output = self.run_samply( + script_dir, script, activate_trampoline=False + ) + + self.assertNotIn(f"py::foo:{script}", output) + self.assertNotIn(f"py::bar:{script}", output) + self.assertNotIn(f"py::baz:{script}", output) + + +@unittest.skipUnless(samply_command_works(), "samply command doesn't work") +class TestSamplyProfiler(unittest.TestCase, TestSamplyProfilerMixin): + def run_samply(self, script_dir, script, activate_trampoline=True): + if activate_trampoline: + return run_samply(script_dir, sys.executable, "-Xperf", script) + return run_samply(script_dir, sys.executable, script) + + def setUp(self): + super().setUp() + self.perf_files = set(pathlib.Path("/tmp/").glob("perf-*.map")) + + def tearDown(self) -> None: + super().tearDown() + files_to_delete = ( + set(pathlib.Path("/tmp/").glob("perf-*.map")) - self.perf_files + ) + for file in files_to_delete: + file.unlink() + + def test_pre_fork_compile(self): + code = """if 1: + import sys + import os + import sysconfig + from _testinternalcapi import ( + compile_perf_trampoline_entry, + perf_trampoline_set_persist_after_fork, + ) + + def foo_fork(): + pass + + def bar_fork(): + foo_fork() + + def foo(): + import time; time.sleep(1) + + def bar(): + foo() + + def compile_trampolines_for_all_functions(): + perf_trampoline_set_persist_after_fork(1) + for _, obj in globals().items(): + if callable(obj) and hasattr(obj, '__code__'): + compile_perf_trampoline_entry(obj.__code__) + + if __name__ == "__main__": + compile_trampolines_for_all_functions() + pid = os.fork() + if pid == 0: + print(os.getpid()) + bar_fork() + else: + bar() + """ + + with temp_dir() as script_dir: + script = make_script(script_dir, "perftest", code) + env = {**os.environ, "PYTHON_JIT": "0"} + with subprocess.Popen( + [sys.executable, "-Xperf", script], + universal_newlines=True, + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + env=env, + ) as process: + stdout, stderr = process.communicate() + + self.assertEqual(process.returncode, 0) + self.assertNotIn("Error:", stderr) + child_pid = int(stdout.strip()) + perf_file = pathlib.Path(f"/tmp/perf-{process.pid}.map") + perf_child_file = pathlib.Path(f"/tmp/perf-{child_pid}.map") + self.assertTrue(perf_file.exists()) + self.assertTrue(perf_child_file.exists()) + + perf_file_contents = perf_file.read_text() + self.assertIn(f"py::foo:{script}", perf_file_contents) + self.assertIn(f"py::bar:{script}", perf_file_contents) + self.assertIn(f"py::foo_fork:{script}", perf_file_contents) + self.assertIn(f"py::bar_fork:{script}", perf_file_contents) + + child_perf_file_contents = perf_child_file.read_text() + self.assertIn(f"py::foo_fork:{script}", child_perf_file_contents) + self.assertIn(f"py::bar_fork:{script}", child_perf_file_contents) + + # Pre-compiled perf-map entries of a forked process must be + # identical in both the parent and child perf-map files. + perf_file_lines = perf_file_contents.split("\n") + for line in perf_file_lines: + if f"py::foo_fork:{script}" in line or f"py::bar_fork:{script}" in line: + self.assertIn(line, child_perf_file_contents) + + +if __name__ == "__main__": + unittest.main() From a7b043da1e8155073176c85fd0113b6e5fe9b08f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Naz=C4=B1m=20Can=20Alt=C4=B1nova?= Date: Thu, 17 Jul 2025 11:37:28 +0200 Subject: [PATCH 10/11] Apply documentation suggestions from code review Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/c-api/perfmaps.rst | 2 +- Doc/howto/perf_profiling.rst | 18 +++++++++--------- ...5-07-09-11-15-42.gh-issue-136459.m4Udh8.rst | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Doc/c-api/perfmaps.rst b/Doc/c-api/perfmaps.rst index 81fb5673f008aa..76a1e9f528dc70 100644 --- a/Doc/c-api/perfmaps.rst +++ b/Doc/c-api/perfmaps.rst @@ -5,7 +5,7 @@ Support for Perf Maps ---------------------- -On supported platforms (as of this writing, Linux and macOS), the runtime can take +On supported platforms (Linux and macOS), the runtime can take advantage of *perf map files* to make Python functions visible to an external profiling tool (such as `perf `_ or `samply `_). A running process may create a diff --git a/Doc/howto/perf_profiling.rst b/Doc/howto/perf_profiling.rst index 62f6699612ebeb..431b9501d9e191 100644 --- a/Doc/howto/perf_profiling.rst +++ b/Doc/howto/perf_profiling.rst @@ -28,8 +28,8 @@ relationship between this piece of code and the associated Python function using .. note:: Support for profiling is available on Linux and macOS on select architectures. - ``perf`` is available on Linux, while ``samply`` can be used on both Linux and macOS. - ``samply`` support on macOS is available starting from Python 3.14. + Perf is available on Linux, while samply can be used on both Linux and macOS. + samply support on macOS is available starting from Python 3.14. Check the output of the ``configure`` build step or check the output of ``python -m sysconfig | grep HAVE_PERF_TRAMPOLINE`` to see if your system is supported. @@ -149,25 +149,25 @@ Instead, if we run the same experiment with ``perf`` support enabled we get: -Using ``samply`` profiler +Using the samply profiler ------------------------- -``samply`` is a modern profiler that can be used as an alternative to ``perf``. +samply is a modern profiler that can be used as an alternative to perf. It uses the same perf map files that Python generates, making it compatible -with Python's profiling support. ``samply`` is particularly useful on macOS -where ``perf`` is not available. +with Python's profiling support. samply is particularly useful on macOS +where perf is not available. -To use ``samply`` with Python, first install it following the instructions at +To use samply with Python, first install it following the instructions at https://github.com/mstange/samply, then run:: $ samply record PYTHONPERFSUPPORT=1 python my_script.py This will open a web interface where you can analyze the profiling data -interactively. The advantage of ``samply`` is that it provides a modern +interactively. The advantage of samply is that it provides a modern web-based interface for analyzing profiling data and works on both Linux and macOS. -On macOS, ``samply`` support requires Python 3.14 or later. +On macOS, samply support requires Python 3.14 or later. How to enable ``perf`` profiling support ---------------------------------------- diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-09-11-15-42.gh-issue-136459.m4Udh8.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-09-11-15-42.gh-issue-136459.m4Udh8.rst index b74ff6b3b3347f..470f3311526e83 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-09-11-15-42.gh-issue-136459.m4Udh8.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-09-11-15-42.gh-issue-136459.m4Udh8.rst @@ -1,3 +1,3 @@ Add support for perf trampoline on macOS, to allow profilers wit JIT map -support to read python calls. While profiling, ``PYTHONPERFSUPPORT=1`` can +support to read Python calls. While profiling, ``PYTHONPERFSUPPORT=1`` can be appended to enable the trampoline. From 3ae5cb22bb558bab4d3a1a4b74a6ddec0901cae9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Naz=C4=B1m=20Can=20Alt=C4=B1nova?= Date: Thu, 17 Jul 2025 11:45:29 +0200 Subject: [PATCH 11/11] Change the version number in the docs and mention macOS restrictions --- Doc/howto/perf_profiling.rst | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Doc/howto/perf_profiling.rst b/Doc/howto/perf_profiling.rst index 431b9501d9e191..fc4772bbccab57 100644 --- a/Doc/howto/perf_profiling.rst +++ b/Doc/howto/perf_profiling.rst @@ -29,7 +29,7 @@ relationship between this piece of code and the associated Python function using Support for profiling is available on Linux and macOS on select architectures. Perf is available on Linux, while samply can be used on both Linux and macOS. - samply support on macOS is available starting from Python 3.14. + samply support on macOS is available starting from Python 3.15. Check the output of the ``configure`` build step or check the output of ``python -m sysconfig | grep HAVE_PERF_TRAMPOLINE`` to see if your system is supported. @@ -167,7 +167,12 @@ interactively. The advantage of samply is that it provides a modern web-based interface for analyzing profiling data and works on both Linux and macOS. -On macOS, samply support requires Python 3.14 or later. +On macOS, samply support requires Python 3.15 or later. Also on macOS, samply +can't profile signed Python executables due to restrictions by macOS. You can +profile with Python binaries that you've compiled yourself, or which are +unsigned or locally-signed (such as anything installed by Homebrew). In +order to attach to running processes on macOS, run ``samply setup`` once (and +every time samply is updated) to self-sign the samply binary. How to enable ``perf`` profiling support ----------------------------------------