From a4db20b970d334f43fac86d1c388d07ab90ba110 Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Wed, 5 Nov 2025 02:14:29 +0000 Subject: [PATCH 01/20] add ltoir test support --- cuda_core/tests/test_program.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index adc778973c..5c966296fe 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -387,3 +387,33 @@ def test_nvvm_program_options(init_cuda, nvvm_ir, options): assert ".visible .entry simple(" in ptx_text program.close() + +@nvvm_available +@pytest.mark.parametrize( + "options", + [ + ProgramOptions(name="ltoir_test1", arch="sm_90", device_code_optimize=False), + ProgramOptions(name="ltoir_test2", arch="sm_100", link_time_optimization=True), + ProgramOptions( + name="ltoir_test3", + arch="sm_90", + ftz=True, + prec_sqrt=False, + prec_div=False, + fma=True, + device_code_optimize=True, + link_time_optimization=True, + ), + ], +) +def test_nvvm_program_options_ltoir(init_cuda, nvvm_ir, options): + """Test NVVM programs for LTOIR with different options""" + program = Program(nvvm_ir, "nvvm", options) + assert program.backend == "NVVM" + + ltoir_code = program.compile("ltoir") + assert isinstance(ltoir_code, ObjectCode) + assert ltoir_code.name == options.name + code_content = ltoir_code.code + assert len(ltoir_code.code) > 0 + program.close() From fb6cfb3902b8a22aba92624ea417616033018852 Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Tue, 25 Nov 2025 04:10:17 +0000 Subject: [PATCH 02/20] add options for multi-modules --- cuda_core/cuda/core/experimental/_program.py | 30 ++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 1db453fed1..2e0115be48 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -298,6 +298,7 @@ class ProgramOptions: split_compile: int | None = None fdevice_syntax_only: bool | None = None minimal: bool | None = None + extra_sources: Union[str, bytes, list[Union[str, bytes]], tuple[Union[str, bytes]]] | None = None def __post_init__(self): self._name = self.name.encode() @@ -479,6 +480,9 @@ def __init__(self, code, code_type, options: ProgramOptions = None): assert_type(code, str) # TODO: support pre-loaded headers & include names # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved + + if options.extra_sources is not None: + raise ValueError("extra_sources is not supported by the NVRTC backend (C++ code_type)") self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), options._name, 0, [], [])) self._mnff.backend = "NVRTC" @@ -487,6 +491,9 @@ def __init__(self, code, code_type, options: ProgramOptions = None): elif code_type == "ptx": assert_type(code, str) + if options.extra_sources is not None: + raise ValueError("extra_sources is not supported by the PTX backend.") + self._linker = Linker( ObjectCode._init(code.encode(), code_type), options=self._translate_program_options(options) ) @@ -502,6 +509,29 @@ def __init__(self, code, code_type, options: ProgramOptions = None): self._mnff.handle = nvvm.create_program() self._mnff.backend = "NVVM" nvvm.add_module_to_program(self._mnff.handle, code, len(code), options._name.decode()) + if options.extra_sources is not None: + if isinstance(options.extra_sources, (str, bytes)): + extra_sources = [options.extra_sources] + elif isinstance(options.extra_sources, (list, tuple)): + extra_sources = options.extra_sources + else: + raise TypeError("extra_sources must be str, bytes, list, or tuple") + + if len(extra_sources) == 0: + raise ValueError("extra_sources cannot be empty if provided") + + for i, extra_source in enumerate(extra_sources): + if isinstance(extra_source, str): + extra_source = extra_source.encode("utf-8") + elif not isinstance(extra_source, (bytes, bytearray)): + raise TypeError(f"Extra source {i} must be provided as str, bytes, or bytearray") + + if len(extra_source) == 0: + raise ValueError(f"Extra source {i} cannot be empty") + + extra_name = f"{options.name}_extra_{i}" + nvvm.add_module_to_program(self._mnff.handle, extra_source, len(extra_source), extra_name) + self._backend = "NVVM" self._linker = None From 7aaed4e4f36b890b73098430ae39384217e3d4d1 Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Tue, 25 Nov 2025 11:05:56 +0000 Subject: [PATCH 03/20] add tests --- cuda_core/tests/test_program.py | 119 ++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 5c966296fe..70b131556d 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -417,3 +417,122 @@ def test_nvvm_program_options_ltoir(init_cuda, nvvm_ir, options): code_content = ltoir_code.code assert len(ltoir_code.code) > 0 program.close() + + +@nvvm_available +def test_nvvm_program_with_single_extra_source(nvvm_ir): + """Test NVVM program with a single extra source""" + from cuda.core.experimental._program import _get_nvvm_module + nvvm = _get_nvvm_module() + major, minor, debug_major, debug_minor = nvvm.ir_version() + #helper nvvm ir for multiple module loading + helper_nvvm_ir = f"""target triple = "nvptx64-unknown-cuda" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + +define i32 @helper_add(i32 %x) {{ +entry: + %result = add i32 %x, 1 + ret i32 %result +}} + +!nvvmir.version = !{{!0}} +!0 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} +""" + + options = ProgramOptions(name="multi_module_test", extra_sources=helper_nvvm_ir) + program = Program(nvvm_ir, "nvvm", options) + + assert program.backend == "NVVM" + + ptx_code = program.compile("ptx") + assert isinstance(ptx_code, ObjectCode) + assert ptx_code.name == "multi_module_test" + + program.close() + +@nvvm_available +def test_nvvm_program_with_multiple_extra_sources(): + """Test NVVM program with multiple extra sources""" + from cuda.core.experimental._program import _get_nvvm_module + nvvm = _get_nvvm_module() + major, minor, debug_major, debug_minor = nvvm.ir_version() + + main_nvvm_ir = f"""target triple = "nvptx64-unknown-cuda" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + +declare i32 @helper_add(i32) nounwind readnone +declare i32 @helper_mul(i32) nounwind readnone + +define void @main_kernel(i32* %data) {{ +entry: + %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + %ptr = getelementptr inbounds i32, i32* %data, i32 %tid + %val = load i32, i32* %ptr, align 4 + + %val1 = call i32 @helper_add(i32 %val) + %val2 = call i32 @helper_mul(i32 %val1) + + store i32 %val2, i32* %ptr, align 4 + ret void +}} + +declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone + +!nvvm.annotations = !{{!0}} +!0 = !{{void (i32*)* @main_kernel, !"kernel", i32 1}} + +!nvvmir.version = !{{!1}} +!1 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} +""" + + helper1_ir = f"""target triple = "nvptx64-unknown-cuda" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + +define i32 @helper_add(i32 %x) nounwind readnone {{ +entry: + %result = add i32 %x, 1 + ret i32 %result +}} + +!nvvmir.version = !{{!0}} +!0 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} +""" + + helper2_ir = f"""target triple = "nvptx64-unknown-cuda" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + +define i32 @helper_mul(i32 %x) nounwind readnone {{ +entry: + %result = mul i32 %x, 2 + ret i32 %result +}} + +!nvvmir.version = !{{!0}} +!0 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} +""" + + options = ProgramOptions( + name="nvvm_multi_helper_test", + extra_sources=[helper1_ir, helper2_ir] + ) + program = Program(main_nvvm_ir, "nvvm", options) + + assert program.backend == "NVVM" + ptx_code = program.compile("ptx") + assert isinstance(ptx_code, ObjectCode) + assert ptx_code.name == "nvvm_multi_helper_test" + + ltoir_code = program.compile("ltoir") + assert isinstance(ltoir_code, ObjectCode) + assert ltoir_code.name == "nvvm_multi_helper_test" + + program.close() + +def test_cpp_program_with_extra_sources(): + #negative test with NVRTC with multiple sources + code = 'extern "C" __global__ void my_kernel(){}' + helper = 'extern "C" __global__ void helper(){}' + options = ProgramOptions(extra_sources = helper) + with pytest.raises(ValueError, match="extra_sources is not supported by the NVRTC backend"): + Program(code, "c++", options) + \ No newline at end of file From 64c7f7d3d7acc4785bd475530786777e5babe2ab Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Tue, 25 Nov 2025 14:48:43 +0000 Subject: [PATCH 04/20] add bitcode test --- cuda_core/tests/test_program.py | 45 +++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 70b131556d..24ae767220 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -527,6 +527,51 @@ def test_nvvm_program_with_multiple_extra_sources(): assert ltoir_code.name == "nvvm_multi_helper_test" program.close() + +@nvvm_available +def test_bitcode_format(): + import os + from pathlib import Path + bitcode_path = os.environ.get("BITCODE_NVVM_PATH") + if not bitcode_path: + pytest.skip(f"BITCODE_NVVM_PATH environment variable is not set." + "Disabling the test.") + bitcode_file = Path(bitcode_path) + if not bitcode_file.exists(): + pytest.skip(f"Bitcode file not found: {bitcode_path}") + + if not bitcode_file.suffix == '.bc': + pytest.skip(f"Expected .bc file, got: {bitcode_file.suffix}") + + try: + with open(bitcode_file, 'rb') as f: + bitcode_data = f.read() + + if len(bitcode_data) < 4: + pytest.skip("Bitcode file appears to be empty or invalid") + + options = ProgramOptions( + name=f"existing_bitcode_{bitcode_file.stem}", + arch="sm_90" + ) + program = Program(bitcode_data, "nvvm", options) + + assert program.backend == "NVVM" + ptx_result = program.compile("ptx") + assert isinstance(ptx_result, ObjectCode) + assert ptx_result.name.startswith("existing_bitcode_") + assert len(ptx_result.code) > 0 + try: + ltoir_result = program.compile("ltoir") + assert isinstance(ltoir_result, ObjectCode) + assert len(ltoir_result.code) > 0 + print(f"LTOIR size: {len(ltoir_result.code)} bytes") + except Exception as e: + print(f"LTOIR compilation failed : {e}") + program.close() + except Exception as e: + pytest.fail(f"Failed to compile existing bitcode file {bitcode_path}: {str(e)}") + def test_cpp_program_with_extra_sources(): #negative test with NVRTC with multiple sources From 42ba301078bc17c796cddbe99499c957633eb5d8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 25 Nov 2025 14:51:09 +0000 Subject: [PATCH 05/20] [pre-commit.ci] auto code formatting --- cuda_core/cuda/core/experimental/_program.py | 14 ++-- cuda_core/tests/test_program.py | 68 ++++++++++---------- 2 files changed, 40 insertions(+), 42 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 2e0115be48..eadd37ca95 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -480,7 +480,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None): assert_type(code, str) # TODO: support pre-loaded headers & include names # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved - + if options.extra_sources is not None: raise ValueError("extra_sources is not supported by the NVRTC backend (C++ code_type)") @@ -493,7 +493,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None): assert_type(code, str) if options.extra_sources is not None: raise ValueError("extra_sources is not supported by the PTX backend.") - + self._linker = Linker( ObjectCode._init(code.encode(), code_type), options=self._translate_program_options(options) ) @@ -516,22 +516,22 @@ def __init__(self, code, code_type, options: ProgramOptions = None): extra_sources = options.extra_sources else: raise TypeError("extra_sources must be str, bytes, list, or tuple") - + if len(extra_sources) == 0: raise ValueError("extra_sources cannot be empty if provided") - + for i, extra_source in enumerate(extra_sources): if isinstance(extra_source, str): extra_source = extra_source.encode("utf-8") elif not isinstance(extra_source, (bytes, bytearray)): raise TypeError(f"Extra source {i} must be provided as str, bytes, or bytearray") - + if len(extra_source) == 0: raise ValueError(f"Extra source {i} cannot be empty") - + extra_name = f"{options.name}_extra_{i}" nvvm.add_module_to_program(self._mnff.handle, extra_source, len(extra_source), extra_name) - + self._backend = "NVVM" self._linker = None diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 24ae767220..9aca1878d4 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -388,13 +388,14 @@ def test_nvvm_program_options(init_cuda, nvvm_ir, options): program.close() + @nvvm_available @pytest.mark.parametrize( "options", [ - ProgramOptions(name="ltoir_test1", arch="sm_90", device_code_optimize=False), - ProgramOptions(name="ltoir_test2", arch="sm_100", link_time_optimization=True), - ProgramOptions( + ProgramOptions(name="ltoir_test1", arch="sm_90", device_code_optimize=False), + ProgramOptions(name="ltoir_test2", arch="sm_100", link_time_optimization=True), + ProgramOptions( name="ltoir_test3", arch="sm_90", ftz=True, @@ -403,7 +404,7 @@ def test_nvvm_program_options(init_cuda, nvvm_ir, options): fma=True, device_code_optimize=True, link_time_optimization=True, - ), + ), ], ) def test_nvvm_program_options_ltoir(init_cuda, nvvm_ir, options): @@ -423,9 +424,10 @@ def test_nvvm_program_options_ltoir(init_cuda, nvvm_ir, options): def test_nvvm_program_with_single_extra_source(nvvm_ir): """Test NVVM program with a single extra source""" from cuda.core.experimental._program import _get_nvvm_module + nvvm = _get_nvvm_module() major, minor, debug_major, debug_minor = nvvm.ir_version() - #helper nvvm ir for multiple module loading + # helper nvvm ir for multiple module loading helper_nvvm_ir = f"""target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" @@ -441,22 +443,24 @@ def test_nvvm_program_with_single_extra_source(nvvm_ir): options = ProgramOptions(name="multi_module_test", extra_sources=helper_nvvm_ir) program = Program(nvvm_ir, "nvvm", options) - + assert program.backend == "NVVM" - + ptx_code = program.compile("ptx") assert isinstance(ptx_code, ObjectCode) assert ptx_code.name == "multi_module_test" - + program.close() + @nvvm_available def test_nvvm_program_with_multiple_extra_sources(): """Test NVVM program with multiple extra sources""" from cuda.core.experimental._program import _get_nvvm_module + nvvm = _get_nvvm_module() major, minor, debug_major, debug_minor = nvvm.ir_version() - + main_nvvm_ir = f"""target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" @@ -468,10 +472,10 @@ def test_nvvm_program_with_multiple_extra_sources(): %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() %ptr = getelementptr inbounds i32, i32* %data, i32 %tid %val = load i32, i32* %ptr, align 4 - + %val1 = call i32 @helper_add(i32 %val) %val2 = call i32 @helper_mul(i32 %val1) - + store i32 %val2, i32* %ptr, align 4 ret void }} @@ -511,51 +515,46 @@ def test_nvvm_program_with_multiple_extra_sources(): !0 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} """ - options = ProgramOptions( - name="nvvm_multi_helper_test", - extra_sources=[helper1_ir, helper2_ir] - ) + options = ProgramOptions(name="nvvm_multi_helper_test", extra_sources=[helper1_ir, helper2_ir]) program = Program(main_nvvm_ir, "nvvm", options) - + assert program.backend == "NVVM" ptx_code = program.compile("ptx") assert isinstance(ptx_code, ObjectCode) assert ptx_code.name == "nvvm_multi_helper_test" - + ltoir_code = program.compile("ltoir") assert isinstance(ltoir_code, ObjectCode) assert ltoir_code.name == "nvvm_multi_helper_test" - + program.close() + @nvvm_available def test_bitcode_format(): import os from pathlib import Path + bitcode_path = os.environ.get("BITCODE_NVVM_PATH") if not bitcode_path: - pytest.skip(f"BITCODE_NVVM_PATH environment variable is not set." - "Disabling the test.") + pytest.skip("BITCODE_NVVM_PATH environment variable is not set.Disabling the test.") bitcode_file = Path(bitcode_path) if not bitcode_file.exists(): pytest.skip(f"Bitcode file not found: {bitcode_path}") - - if not bitcode_file.suffix == '.bc': + + if not bitcode_file.suffix == ".bc": pytest.skip(f"Expected .bc file, got: {bitcode_file.suffix}") - + try: - with open(bitcode_file, 'rb') as f: + with open(bitcode_file, "rb") as f: bitcode_data = f.read() - + if len(bitcode_data) < 4: pytest.skip("Bitcode file appears to be empty or invalid") - - options = ProgramOptions( - name=f"existing_bitcode_{bitcode_file.stem}", - arch="sm_90" - ) + + options = ProgramOptions(name=f"existing_bitcode_{bitcode_file.stem}", arch="sm_90") program = Program(bitcode_data, "nvvm", options) - + assert program.backend == "NVVM" ptx_result = program.compile("ptx") assert isinstance(ptx_result, ObjectCode) @@ -572,12 +571,11 @@ def test_bitcode_format(): except Exception as e: pytest.fail(f"Failed to compile existing bitcode file {bitcode_path}: {str(e)}") - + def test_cpp_program_with_extra_sources(): - #negative test with NVRTC with multiple sources + # negative test with NVRTC with multiple sources code = 'extern "C" __global__ void my_kernel(){}' helper = 'extern "C" __global__ void helper(){}' - options = ProgramOptions(extra_sources = helper) + options = ProgramOptions(extra_sources=helper) with pytest.raises(ValueError, match="extra_sources is not supported by the NVRTC backend"): Program(code, "c++", options) - \ No newline at end of file From 7ca68992514e63cffbd51642a59a9f277e79540f Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Tue, 25 Nov 2025 18:14:22 +0000 Subject: [PATCH 06/20] fix format --- cuda_core/tests/test_program.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 9aca1878d4..7d3e1c8709 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -416,7 +416,7 @@ def test_nvvm_program_options_ltoir(init_cuda, nvvm_ir, options): assert isinstance(ltoir_code, ObjectCode) assert ltoir_code.name == options.name code_content = ltoir_code.code - assert len(ltoir_code.code) > 0 + assert len(code_content) > 0 program.close() @@ -439,7 +439,7 @@ def test_nvvm_program_with_single_extra_source(nvvm_ir): !nvvmir.version = !{{!0}} !0 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} -""" +""" # noqa: E501 options = ProgramOptions(name="multi_module_test", extra_sources=helper_nvvm_ir) program = Program(nvvm_ir, "nvvm", options) @@ -487,7 +487,7 @@ def test_nvvm_program_with_multiple_extra_sources(): !nvvmir.version = !{{!1}} !1 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} -""" +""" # noqa: E501 helper1_ir = f"""target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" @@ -500,7 +500,7 @@ def test_nvvm_program_with_multiple_extra_sources(): !nvvmir.version = !{{!0}} !0 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} -""" +""" # noqa: E501 helper2_ir = f"""target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" @@ -513,7 +513,7 @@ def test_nvvm_program_with_multiple_extra_sources(): !nvvmir.version = !{{!0}} !0 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} -""" +""" # noqa: E501 options = ProgramOptions(name="nvvm_multi_helper_test", extra_sources=[helper1_ir, helper2_ir]) program = Program(main_nvvm_ir, "nvvm", options) @@ -542,7 +542,7 @@ def test_bitcode_format(): if not bitcode_file.exists(): pytest.skip(f"Bitcode file not found: {bitcode_path}") - if not bitcode_file.suffix == ".bc": + if bitcode_file.suffix != ".bc": pytest.skip(f"Expected .bc file, got: {bitcode_file.suffix}") try: From 0674ea17dda35f886279bdef2f33a4aa56071562 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 25 Nov 2025 18:15:33 +0000 Subject: [PATCH 07/20] [pre-commit.ci] auto code formatting --- cuda_core/tests/test_program.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 7d3e1c8709..1b96a5f094 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -439,7 +439,7 @@ def test_nvvm_program_with_single_extra_source(nvvm_ir): !nvvmir.version = !{{!0}} !0 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} -""" # noqa: E501 +""" # noqa: E501 options = ProgramOptions(name="multi_module_test", extra_sources=helper_nvvm_ir) program = Program(nvvm_ir, "nvvm", options) @@ -487,7 +487,7 @@ def test_nvvm_program_with_multiple_extra_sources(): !nvvmir.version = !{{!1}} !1 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} -""" # noqa: E501 +""" # noqa: E501 helper1_ir = f"""target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" @@ -500,7 +500,7 @@ def test_nvvm_program_with_multiple_extra_sources(): !nvvmir.version = !{{!0}} !0 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} -""" # noqa: E501 +""" # noqa: E501 helper2_ir = f"""target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" @@ -513,7 +513,7 @@ def test_nvvm_program_with_multiple_extra_sources(): !nvvmir.version = !{{!0}} !0 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} -""" # noqa: E501 +""" # noqa: E501 options = ProgramOptions(name="nvvm_multi_helper_test", extra_sources=[helper1_ir, helper2_ir]) program = Program(main_nvvm_ir, "nvvm", options) From 03b122466a0eb10b4adb1b692b4a36b017f7241e Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Wed, 26 Nov 2025 10:26:42 +0000 Subject: [PATCH 08/20] refresh --- cuda_core/cuda/core/experimental/_program.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index eadd37ca95..7d3ea15e46 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -479,11 +479,11 @@ def __init__(self, code, code_type, options: ProgramOptions = None): if code_type == "c++": assert_type(code, str) # TODO: support pre-loaded headers & include names - # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved - + if options.extra_sources is not None: raise ValueError("extra_sources is not supported by the NVRTC backend (C++ code_type)") + # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), options._name, 0, [], [])) self._mnff.backend = "NVRTC" self._backend = "NVRTC" From 033f11cacf560de6ccf1bf9605070cc912081ddf Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Mon, 1 Dec 2025 11:45:49 +0000 Subject: [PATCH 09/20] apply bitcode file from cupy_test helpers --- cuda_core/tests/test_program.py | 61 ++++---- .../cuda_python_test_helpers/nvvm_bitcode.py | 136 ++++++++++++++++++ 2 files changed, 163 insertions(+), 34 deletions(-) create mode 100644 cuda_python_test_helpers/cuda_python_test_helpers/nvvm_bitcode.py diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 1b96a5f094..c62e1712f6 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -14,6 +14,13 @@ cuda_driver_version = handle_return(driver.cuDriverGetVersion()) is_culink_backend = _linker._decide_nvjitlink_or_driver() +try: + from cuda_python_test_helpers.nvvm_bitcode import ( + minimal_nvvmir + ) + _test_helpers_available = True +except ImportError: + _test_helpers_available = False def _is_nvvm_available(): """Check if NVVM is available.""" @@ -531,46 +538,32 @@ def test_nvvm_program_with_multiple_extra_sources(): @nvvm_available -def test_bitcode_format(): +@pytest.mark.skipif(not _test_helpers_available, reason = "cuda_python_test_helpers not accessible") +def test_bitcode_format(minimal_nvvmir): import os from pathlib import Path + + if len(minimal_nvvmir) < 4: + pytest.skip("Bitcode file is not valid or empty") - bitcode_path = os.environ.get("BITCODE_NVVM_PATH") - if not bitcode_path: - pytest.skip("BITCODE_NVVM_PATH environment variable is not set.Disabling the test.") - bitcode_file = Path(bitcode_path) - if not bitcode_file.exists(): - pytest.skip(f"Bitcode file not found: {bitcode_path}") - - if bitcode_file.suffix != ".bc": - pytest.skip(f"Expected .bc file, got: {bitcode_file.suffix}") + options = ProgramOptions(name=f"minimal_nvvmir_bitcode_test", arch="sm_90") + program = Program(minimal_nvvmir, "nvvm", options) + assert program.backend == "NVVM" + ptx_result = program.compile("ptx") + assert isinstance(ptx_result, ObjectCode) + assert ptx_result.name == "minimal_nvvmir_bitcode_test" + assert len(ptx_result.code) > 0 + prgram_lto = Program(minimal_nvvmir, "nvvm", options) try: - with open(bitcode_file, "rb") as f: - bitcode_data = f.read() - - if len(bitcode_data) < 4: - pytest.skip("Bitcode file appears to be empty or invalid") - - options = ProgramOptions(name=f"existing_bitcode_{bitcode_file.stem}", arch="sm_90") - program = Program(bitcode_data, "nvvm", options) - - assert program.backend == "NVVM" - ptx_result = program.compile("ptx") - assert isinstance(ptx_result, ObjectCode) - assert ptx_result.name.startswith("existing_bitcode_") - assert len(ptx_result.code) > 0 - try: - ltoir_result = program.compile("ltoir") - assert isinstance(ltoir_result, ObjectCode) - assert len(ltoir_result.code) > 0 - print(f"LTOIR size: {len(ltoir_result.code)} bytes") - except Exception as e: - print(f"LTOIR compilation failed : {e}") - program.close() + ltoir_result = program_lto.compile("ltoir") + assert isinstance(ltoir_result, ObjectCode) + assert len(ltoir_result.code) > 0 + print(f"LTOIR size: {len(ltoir_result.code)} bytes") except Exception as e: - pytest.fail(f"Failed to compile existing bitcode file {bitcode_path}: {str(e)}") - + print(f"LTOIR compilation failed : {e}") + finally: + program.close() def test_cpp_program_with_extra_sources(): # negative test with NVRTC with multiple sources diff --git a/cuda_python_test_helpers/cuda_python_test_helpers/nvvm_bitcode.py b/cuda_python_test_helpers/cuda_python_test_helpers/nvvm_bitcode.py new file mode 100644 index 0000000000..cba22d5425 --- /dev/null +++ b/cuda_python_test_helpers/cuda_python_test_helpers/nvvm_bitcode.py @@ -0,0 +1,136 @@ +import binascii +import re +from contextlib import contextmanager + +import pytest +from cuda.bindings import nvvm + +MINIMAL_NVVMIR_TXT_TEMPLATE = b"""\ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + +target triple = "nvptx64-nvidia-cuda" + +define void @kernel() { +entry: + ret void +} + +!nvvm.annotations = !{!0} +!0 = !{void ()* @kernel, !"kernel", i32 1} + +!nvvmir.version = !{!1} +!1 = !{i32 %d, i32 0, i32 %d, i32 0} +""" # noqa: E501 + +MINIMAL_NVVMIR_BITCODE_STATIC = { + (1, 3): # (major, debug_major) + "4243c0de3514000005000000620c30244a59be669dfbb4bf0b51804c01000000210c00007f010000" + "0b02210002000000160000000781239141c80449061032399201840c250508191e048b62800c4502" + "42920b42641032143808184b0a3232884870c421234412878c1041920264c808b1142043468820c9" + "01323284182a282a90317cb05c9120c3c8000000892000000b0000003222c80820624600212b2498" + "0c212524980c19270c85a4906032645c20246382a01801300128030173046000132677b00778a007" + "7cb0033a680377b0877420877408873618877a208770d8e012e5d006f0a0077640077a600774a007" + "7640076d900e71a00778a00778d006e980077a80077a80076d900e7160077a100776a0077160076d" + "900e7320077a300772a0077320076d900e7640077a600774a0077640076d900e71200778a0077120" + "0778a00771200778d006e6300772a0077320077a300772d006e6600774a0077640077a600774d006" + "f6100776a0077160077a100776d006f6300772a0077320077a300772d006f6600774a0077640077a" + "600774d006f610077280077a10077280077a10077280076de00e7160077a300772a0077640071a21" + "4c0e11de9c2e4fbbcfbe211560040000000000000000000000000620b141a0e86000004016080000" + "06000000321e980c19114c908c092647c6044362098c009401000000b1180000ac0000003308801c" + "c4e11c6614013d88433884c38c4280077978077398710ce6000fed100ef4800e330c421ec2c11dce" + "a11c6630053d88433884831bcc033dc8433d8c033dcc788c7470077b08077948877070077a700376" + "788770208719cc110eec900ee1300f6e300fe3f00ef0500e3310c41dde211cd8211dc2611e663089" + "3bbc833bd04339b4033cbc833c84033bccf0147660077b6807376887726807378087709087706007" + "76280776f8057678877780875f08877118877298877998812ceef00eeee00ef5c00eec300362c8a1" + "1ce4a11ccca11ce4a11cdc611cca211cc4811dca6106d6904339c84339984339c84339b8c3389443" + "3888033b94c32fbc833cfc823bd4033bb0c30cc7698770588772708374680778608774188774a087" + "19ce530fee000ff2500ee4900ee3400fe1200eec500e3320281ddcc11ec2411ed2211cdc811edce0" + "1ce4e11dea011e66185138b0433a9c833bcc50247660077b68073760877778077898514cf4900ff0" + "500e331e6a1eca611ce8211ddec11d7e011ee4a11ccc211df0610654858338ccc33bb0433dd04339" + "fcc23ce4433b88c33bb0c38cc50a877998877718877408077a28077298815ce3100eecc00ee5500e" + "f33023c1d2411ee4e117d8e11dde011e6648193bb0833db4831b84c3388c4339ccc33cb8c139c8c3" + "3bd4033ccc48b471080776600771088771588719dbc60eec600fede006f0200fe5300fe5200ff650" + "0e6e100ee3300ee5300ff3e006e9e00ee4500ef83023e2ec611cc2811dd8e117ec211de6211dc421" + "1dd8211de8211f66209d3bbc433db80339948339cc58bc7070077778077a08077a488777708719cb" + "e70eef300fe1e00ee9400fe9a00fe530c3010373a8077718875f988770708774a08774d087729881" + "844139e0c338b0433d904339cc40c4a01dcaa11de0411edec11c662463300ee1c00eec300fe9400f" + "e5000000792000001d000000721e482043880c19097232482023818c9191d144a01028643c313242" + "8e9021a318100a00060000006b65726e656c0000230802308240042308843082400c330c4230cc40" + "0c4441c84860821272b3b36b730973737ba30ba34b7b739b1b2528d271b3b36b4b9373b12b939b4b" + "7b731b2530000000a9180000250000000b0a7228877780077a587098433db8c338b04339d0c382e6" + "1cc6a10de8411ec2c11de6211de8211ddec11d1634e3600ee7500fe1200fe4400fe1200fe7500ef4" + "b08081077928877060077678877108077a28077258709cc338b4013ba4833d94c3026b1cd8211cdc" + "e11cdc201ce4611cdc201ce8811ec2611cd0a11cc8611cc2811dd861c1010ff4200fe1500ff4800e" + "00000000d11000000600000007cc3ca4833b9c033b94033da0833c94433890c30100000061200000" + "06000000130481860301000002000000075010cd14610000000000007120000003000000320e1022" + "8400fb020000000000000000650c00001f000000120394f000000000030000000600000006000000" + "4c000000010000005800000000000000580000000100000070000000000000000c00000013000000" + "1f000000080000000600000000000000700000000000000000000000010000000000000000000000" + "060000000000000006000000ffffffff00240000000000005d0c00000d0000001203946700000000" + "6b65726e656c31352e302e376e7670747836342d6e76696469612d637564613c737472696e673e00" + "00000000", + (2, 3): # (major, debug_major) + "4243c0de3514000005000000620c30244a59be669dfbb4bf0b51804c01000000210c000080010000" + "0b02210002000000160000000781239141c80449061032399201840c250508191e048b62800c4502" + "42920b42641032143808184b0a3232884870c421234412878c1041920264c808b1142043468820c9" + "01323284182a282a90317cb05c9120c3c8000000892000000b0000003222c80820624600212b2498" + "0c212524980c19270c85a4906032645c20246382a01801300128030173046000132677b00778a007" + "7cb0033a680377b0877420877408873618877a208770d8e012e5d006f0a0077640077a600774a007" + "7640076d900e71a00778a00778d006e980077a80077a80076d900e7160077a100776a0077160076d" + "900e7320077a300772a0077320076d900e7640077a600774a0077640076d900e71200778a0077120" + "0778a00771200778d006e6300772a0077320077a300772d006e6600774a0077640077a600774d006" + "f6100776a0077160077a100776d006f6300772a0077320077a300772d006f6600774a0077640077a" + "600774d006f610077280077a10077280077a10077280076de00e7160077a300772a0077640071a21" + "4c0e11de9c2e4fbbcfbe211560040000000000000000000000000620b141a0286100004016080000" + "06000000321e980c19114c908c092647c60443620914c10840190000b1180000ac0000003308801c" + "c4e11c6614013d88433884c38c4280077978077398710ce6000fed100ef4800e330c421ec2c11dce" + "a11c6630053d88433884831bcc033dc8433d8c033dcc788c7470077b08077948877070077a700376" + "788770208719cc110eec900ee1300f6e300fe3f00ef0500e3310c41dde211cd8211dc2611e663089" + "3bbc833bd04339b4033cbc833c84033bccf0147660077b6807376887726807378087709087706007" + "76280776f8057678877780875f08877118877298877998812ceef00eeee00ef5c00eec300362c8a1" + "1ce4a11ccca11ce4a11cdc611cca211cc4811dca6106d6904339c84339984339c84339b8c3389443" + "3888033b94c32fbc833cfc823bd4033bb0c30cc7698770588772708374680778608774188774a087" + "19ce530fee000ff2500ee4900ee3400fe1200eec500e3320281ddcc11ec2411ed2211cdc811edce0" + "1ce4e11dea011e66185138b0433a9c833bcc50247660077b68073760877778077898514cf4900ff0" + "500e331e6a1eca611ce8211ddec11d7e011ee4a11ccc211df0610654858338ccc33bb0433dd04339" + "fcc23ce4433b88c33bb0c38cc50a877998877718877408077a28077298815ce3100eecc00ee5500e" + "f33023c1d2411ee4e117d8e11dde011e6648193bb0833db4831b84c3388c4339ccc33cb8c139c8c3" + "3bd4033ccc48b471080776600771088771588719dbc60eec600fede006f0200fe5300fe5200ff650" + "0e6e100ee3300ee5300ff3e006e9e00ee4500ef83023e2ec611cc2811dd8e117ec211de6211dc421" + "1dd8211de8211f66209d3bbc433db80339948339cc58bc7070077778077a08077a488777708719cb" + "e70eef300fe1e00ee9400fe9a00fe530c3010373a8077718875f988770708774a08774d087729881" + "844139e0c338b0433d904339cc40c4a01dcaa11de0411edec11c662463300ee1c00eec300fe9400f" + "e5000000792000001e000000721e482043880c19097232482023818c9191d144a01028643c313242" + "8e9021a318100a00060000006b65726e656c0000230802308240042308843082400c23080431c320" + "04c30c045118858c04262821373bbb36973037b737ba30bab437b7b95102231d373bbbb6343917bb" + "32b9b9b437b7518203000000a9180000250000000b0a7228877780077a587098433db8c338b04339" + "d0c382e61cc6a10de8411ec2c11de6211de8211ddec11d1634e3600ee7500fe1200fe4400fe1200f" + "e7500ef4b08081077928877060077678877108077a28077258709cc338b4013ba4833d94c3026b1c" + "d8211cdce11cdc201ce4611cdc201ce8811ec2611cd0a11cc8611cc2811dd861c1010ff4200fe150" + "0ff4800e00000000d11000000600000007cc3ca4833b9c033b94033da0833c94433890c301000000" + "6120000006000000130481860301000002000000075010cd14610000000000007120000003000000" + "320e10228400fc020000000000000000650c00001f000000120394f0000000000300000006000000" + "060000004c000000010000005800000000000000580000000100000070000000000000000c000000" + "130000001f0000000800000006000000000000007000000000000000000000000100000000000000" + "00000000060000000000000006000000ffffffff00240000000000005d0c00000d00000012039467" + "000000006b65726e656c31352e302e376e7670747836342d6e76696469612d637564613c73747269" + "6e673e0000000000", +} + +@pytest.fixture(params=("txt", "bitcode_static")) +def minimal_nvvmir(request): + major, minor, debug_major, debug_minor = nvvm.ir_version() + + if request.param == "txt": + return MINIMAL_NVVMIR_TXT_TEMPLATE % (major, debug_major) + + bitcode_static_binascii = MINIMAL_NVVMIR_BITCODE_STATIC.get((major, debug_major)) + if bitcode_static_binascii: + return binascii.unhexlify(bitcode_static_binascii) + raise RuntimeError( + "Static bitcode for NVVM IR version " + f"{major}.{debug_major} is not available in this test.\n" + "Maintainers: Please run the helper script to generate it and add the " + "output to the MINIMAL_NVVMIR_BITCODE_STATIC dict:\n" + " ../../toolshed/build_static_bitcode_input.py" + ) \ No newline at end of file From 6e411ee8619868ec7ac6d4b4304a2e950edf490c Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Mon, 1 Dec 2025 17:44:23 +0000 Subject: [PATCH 10/20] use 2 tuples --- cuda_core/cuda/core/experimental/_program.py | 58 ++++++++++++-------- cuda_core/tests/test_program.py | 11 +++- 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 7d3ea15e46..ba6c4cf0af 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -298,7 +298,10 @@ class ProgramOptions: split_compile: int | None = None fdevice_syntax_only: bool | None = None minimal: bool | None = None - extra_sources: Union[str, bytes, list[Union[str, bytes]], tuple[Union[str, bytes]]] | None = None + # Creating as 2 tuples ((names, source), (names,source)) + extra_sources: ( + Union[List[Tuple[str, Union[str, bytes, bytearray]]], Tuple[Tuple[str, Union[str, bytes, bytearray]]]] | None + ) = None def __post_init__(self): self._name = self.name.encode() @@ -468,13 +471,14 @@ def close(self): nvvm.destroy_program(self.handle) self.handle = None - __slots__ = ("__weakref__", "_mnff", "_backend", "_linker", "_options") + __slots__ = ("__weakref__", "_mnff", "_backend", "_linker", "_options", "_module_count") def __init__(self, code, code_type, options: ProgramOptions = None): self._mnff = Program._MembersNeededForFinalize(self, None, None) self._options = options = check_or_create_options(ProgramOptions, options, "Program options") code_type = code_type.lower() + self._module_count = 0 if code_type == "c++": assert_type(code, str) @@ -509,28 +513,36 @@ def __init__(self, code, code_type, options: ProgramOptions = None): self._mnff.handle = nvvm.create_program() self._mnff.backend = "NVVM" nvvm.add_module_to_program(self._mnff.handle, code, len(code), options._name.decode()) + self._module_count = 1 + # Add extra modules if provided if options.extra_sources is not None: - if isinstance(options.extra_sources, (str, bytes)): - extra_sources = [options.extra_sources] - elif isinstance(options.extra_sources, (list, tuple)): - extra_sources = options.extra_sources - else: - raise TypeError("extra_sources must be str, bytes, list, or tuple") - - if len(extra_sources) == 0: - raise ValueError("extra_sources cannot be empty if provided") - - for i, extra_source in enumerate(extra_sources): - if isinstance(extra_source, str): - extra_source = extra_source.encode("utf-8") - elif not isinstance(extra_source, (bytes, bytearray)): - raise TypeError(f"Extra source {i} must be provided as str, bytes, or bytearray") - - if len(extra_source) == 0: - raise ValueError(f"Extra source {i} cannot be empty") - - extra_name = f"{options.name}_extra_{i}" - nvvm.add_module_to_program(self._mnff.handle, extra_source, len(extra_source), extra_name) + if not is_sequence(options.extra_sources): + raise TypeError("extra_modules must be a sequence of 2-tuples: ((name1, source1), (name2, source2), ...)") + for i, module in enumerate(options.extra_sources): + if not isinstance(module, tuple) or len(module) != 2: + raise TypeError( + f"Each extra module must be a 2-tuple (name, source), got {type(module).__name__} at index {i}" + ) + + module_name, module_source = module + + if not isinstance(module_name, str): + raise TypeError(f"Module name at index {i} must be a string, got {type(module_name).__name__}") + + if isinstance(module_source, str): + # Textual LLVM IR - encode to UTF-8 bytes + module_source = module_source.encode("utf-8") + elif not isinstance(module_source, (bytes, bytearray)): + raise TypeError( + f"Module source at index {i} must be str (textual LLVM IR), bytes (textual LLVM IR or bitcode), " + f"or bytearray, got {type(module_source).__name__}" + ) + + if len(module_source) == 0: + raise ValueError(f"Module source for '{module_name}' (index {i}) cannot be empty") + + nvvm.add_module_to_program(self._mnff.handle, module_source, len(module_source), module_name) + self._module_count += 1 self._backend = "NVVM" self._linker = None diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index c62e1712f6..aec975e84e 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -435,7 +435,7 @@ def test_nvvm_program_with_single_extra_source(nvvm_ir): nvvm = _get_nvvm_module() major, minor, debug_major, debug_minor = nvvm.ir_version() # helper nvvm ir for multiple module loading - helper_nvvm_ir = f"""target triple = "nvptx64-unknown-cuda" + helper_nvvmir = f"""target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define i32 @helper_add(i32 %x) {{ @@ -448,7 +448,12 @@ def test_nvvm_program_with_single_extra_source(nvvm_ir): !0 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} """ # noqa: E501 - options = ProgramOptions(name="multi_module_test", extra_sources=helper_nvvm_ir) + options = ProgramOptions( + name="multi_module_test", + extra_sources=[ + ("helper", helper_nvvmir), + ] + ) program = Program(nvvm_ir, "nvvm", options) assert program.backend == "NVVM" @@ -522,7 +527,7 @@ def test_nvvm_program_with_multiple_extra_sources(): !0 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} """ # noqa: E501 - options = ProgramOptions(name="nvvm_multi_helper_test", extra_sources=[helper1_ir, helper2_ir]) + options = ProgramOptions(name="nvvm_multi_helper_test", extra_sources=[("helper1", helper1_ir), ("helper2", helper2_ir),]) program = Program(main_nvvm_ir, "nvvm", options) assert program.backend == "NVVM" From aeb26aa1f8dcdaf8a81f6715faa47c40703ced4e Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Wed, 3 Dec 2025 04:16:32 +0000 Subject: [PATCH 11/20] refresh --- cuda_core/cuda/core/experimental/_program.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 9f74c3b10a..3aab5de3ae 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -303,7 +303,7 @@ class ProgramOptions: Union[List[Tuple[str, Union[str, bytes, bytearray]]], Tuple[Tuple[str, Union[str, bytes, bytearray]]]] | None ) = None numba_debug: bool | None = None # Custom option for Numba debugging - + def __post_init__(self): self._name = self.name.encode() @@ -423,8 +423,6 @@ def __post_init__(self): self._formatted_options.append("--fdevice-syntax-only") if self.minimal is not None and self.minimal: self._formatted_options.append("--minimal") - if self.numba_debug: - self._formatted_options.append("--numba-debug") def _as_bytes(self): # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved From b3d6d960faa8f9ff130cd558e91b56213d1b448f Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Wed, 3 Dec 2025 07:54:25 +0000 Subject: [PATCH 12/20] format --- cuda_core/cuda/core/experimental/_program.py | 27 +++++----- cuda_core/tests/test_program.py | 57 +++++++------------- 2 files changed, 35 insertions(+), 49 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 3aab5de3ae..efdb39807a 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -7,7 +7,7 @@ import weakref from contextlib import contextmanager from dataclasses import dataclass -from typing import TYPE_CHECKING, Union +from typing import TYPE_CHECKING, List, Tuple, Union from warnings import warn if TYPE_CHECKING: @@ -303,7 +303,7 @@ class ProgramOptions: Union[List[Tuple[str, Union[str, bytes, bytearray]]], Tuple[Tuple[str, Union[str, bytes, bytearray]]]] | None ) = None numba_debug: bool | None = None # Custom option for Numba debugging - + def __post_init__(self): self._name = self.name.encode() @@ -484,7 +484,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None): if code_type == "c++": assert_type(code, str) # TODO: support pre-loaded headers & include names - + if options.extra_sources is not None: raise ValueError("extra_sources is not supported by the NVRTC backend (C++ code_type)") @@ -515,21 +515,24 @@ def __init__(self, code, code_type, options: ProgramOptions = None): self._mnff.backend = "NVVM" nvvm.add_module_to_program(self._mnff.handle, code, len(code), options._name.decode()) self._module_count = 1 - # Add extra modules if provided + # Add extra modules if provided if options.extra_sources is not None: if not is_sequence(options.extra_sources): - raise TypeError("extra_modules must be a sequence of 2-tuples: ((name1, source1), (name2, source2), ...)") + raise TypeError( + "extra_modules must be a sequence of 2-tuples:((name1, source1), (name2, source2), ...)" + ) for i, module in enumerate(options.extra_sources): if not isinstance(module, tuple) or len(module) != 2: raise TypeError( - f"Each extra module must be a 2-tuple (name, source), got {type(module).__name__} at index {i}" + f"Each extra module must be a 2-tuple (name, source)" + f", got {type(module).__name__} at index {i}" ) - + module_name, module_source = module - + if not isinstance(module_name, str): - raise TypeError(f"Module name at index {i} must be a string, got {type(module_name).__name__}") - + raise TypeError(f"Module name at index {i} must be a string,got {type(module_name).__name__}") + if isinstance(module_source, str): # Textual LLVM IR - encode to UTF-8 bytes module_source = module_source.encode("utf-8") @@ -538,10 +541,10 @@ def __init__(self, code, code_type, options: ProgramOptions = None): f"Module source at index {i} must be str (textual LLVM IR), bytes (textual LLVM IR or bitcode), " f"or bytearray, got {type(module_source).__name__}" ) - + if len(module_source) == 0: raise ValueError(f"Module source for '{module_name}' (index {i}) cannot be empty") - + nvvm.add_module_to_program(self._mnff.handle, module_source, len(module_source), module_name) self._module_count += 1 diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index e0b9a61023..f9cb8dc5f2 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -15,13 +15,13 @@ is_culink_backend = _linker._decide_nvjitlink_or_driver() try: - from cuda_python_test_helpers.nvvm_bitcode import ( - minimal_nvvmir - ) + from cuda_python_test_helpers.nvvm_bitcode import minimal_nvvmir + _test_helpers_available = True except ImportError: _test_helpers_available = False + def _is_nvvm_available(): """Check if NVVM is available.""" try: @@ -38,29 +38,12 @@ def _is_nvvm_available(): ) try: - from cuda.core.experimental._utils.cuda_utils import driver, handle_return, nvrtc + from cuda.core.experimental._utils.cuda_utils import driver, handle_return _cuda_driver_version = handle_return(driver.cuDriverGetVersion()) except Exception: _cuda_driver_version = 0 - -def _get_nvrtc_version_for_tests(): - """ - Get NVRTC version. - - Returns: - int: Version in format major * 1000 + minor * 100 (e.g., 13200 for CUDA 13.2) - None: If NVRTC is not available - """ - try: - nvrtc_major, nvrtc_minor = handle_return(nvrtc.nvrtcVersion()) - version = nvrtc_major * 1000 + nvrtc_minor * 100 - return version - except Exception: - return None - - _libnvvm_version = None _libnvvm_version_attempted = False @@ -200,13 +183,6 @@ def ptx_code_object(): [ ProgramOptions(name="abc"), ProgramOptions(device_code_optimize=True, debug=True), - pytest.param( - ProgramOptions(debug=True, numba_debug=True), - marks=pytest.mark.skipif( - (_get_nvrtc_version_for_tests() or 0) < 13200, - reason="numba_debug requires NVRTC >= 13.2", - ), - ), ProgramOptions(relocatable_device_code=True, max_register_count=32), ProgramOptions(ftz=True, prec_sqrt=False, prec_div=False), ProgramOptions(fma=False, use_fast_math=True), @@ -473,11 +449,11 @@ def test_nvvm_program_with_single_extra_source(nvvm_ir): """ # noqa: E501 options = ProgramOptions( - name="multi_module_test", - extra_sources=[ - ("helper", helper_nvvmir), - ] - ) + name="multi_module_test", + extra_sources=[ + ("helper", helper_nvvmir), + ], + ) program = Program(nvvm_ir, "nvvm", options) assert program.backend == "NVVM" @@ -551,7 +527,13 @@ def test_nvvm_program_with_multiple_extra_sources(): !0 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} """ # noqa: E501 - options = ProgramOptions(name="nvvm_multi_helper_test", extra_sources=[("helper1", helper1_ir), ("helper2", helper2_ir),]) + options = ProgramOptions( + name="nvvm_multi_helper_test", + extra_sources=[ + ("helper1", helper1_ir), + ("helper2", helper2_ir), + ], + ) program = Program(main_nvvm_ir, "nvvm", options) assert program.backend == "NVVM" @@ -567,11 +549,11 @@ def test_nvvm_program_with_multiple_extra_sources(): @nvvm_available -@pytest.mark.skipif(not _test_helpers_available, reason = "cuda_python_test_helpers not accessible") +@pytest.mark.skipif(not _test_helpers_available, reason="cuda_python_test_helpers not accessible") def test_bitcode_format(minimal_nvvmir): import os from pathlib import Path - + if len(minimal_nvvmir) < 4: pytest.skip("Bitcode file is not valid or empty") @@ -583,7 +565,7 @@ def test_bitcode_format(minimal_nvvmir): assert isinstance(ptx_result, ObjectCode) assert ptx_result.name == "minimal_nvvmir_bitcode_test" assert len(ptx_result.code) > 0 - prgram_lto = Program(minimal_nvvmir, "nvvm", options) + program_lto = Program(minimal_nvvmir, "nvvm", options) try: ltoir_result = program_lto.compile("ltoir") assert isinstance(ltoir_result, ObjectCode) @@ -594,6 +576,7 @@ def test_bitcode_format(minimal_nvvmir): finally: program.close() + def test_cpp_program_with_extra_sources(): # negative test with NVRTC with multiple sources code = 'extern "C" __global__ void my_kernel(){}' From edd6401cc5300c03adb583c021da60b6da18bfe6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 3 Dec 2025 07:55:54 +0000 Subject: [PATCH 13/20] [pre-commit.ci] auto code formatting --- cuda_core/tests/test_program.py | 5 +---- .../cuda_python_test_helpers/nvvm_bitcode.py | 5 ++--- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index f9cb8dc5f2..d0de56a41c 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -551,13 +551,10 @@ def test_nvvm_program_with_multiple_extra_sources(): @nvvm_available @pytest.mark.skipif(not _test_helpers_available, reason="cuda_python_test_helpers not accessible") def test_bitcode_format(minimal_nvvmir): - import os - from pathlib import Path - if len(minimal_nvvmir) < 4: pytest.skip("Bitcode file is not valid or empty") - options = ProgramOptions(name=f"minimal_nvvmir_bitcode_test", arch="sm_90") + options = ProgramOptions(name="minimal_nvvmir_bitcode_test", arch="sm_90") program = Program(minimal_nvvmir, "nvvm", options) assert program.backend == "NVVM" diff --git a/cuda_python_test_helpers/cuda_python_test_helpers/nvvm_bitcode.py b/cuda_python_test_helpers/cuda_python_test_helpers/nvvm_bitcode.py index cba22d5425..e42dbff085 100644 --- a/cuda_python_test_helpers/cuda_python_test_helpers/nvvm_bitcode.py +++ b/cuda_python_test_helpers/cuda_python_test_helpers/nvvm_bitcode.py @@ -1,6 +1,4 @@ import binascii -import re -from contextlib import contextmanager import pytest from cuda.bindings import nvvm @@ -117,6 +115,7 @@ "6e673e0000000000", } + @pytest.fixture(params=("txt", "bitcode_static")) def minimal_nvvmir(request): major, minor, debug_major, debug_minor = nvvm.ir_version() @@ -133,4 +132,4 @@ def minimal_nvvmir(request): "Maintainers: Please run the helper script to generate it and add the " "output to the MINIMAL_NVVMIR_BITCODE_STATIC dict:\n" " ../../toolshed/build_static_bitcode_input.py" - ) \ No newline at end of file + ) From 8dbbafece0fc088fa8fbb7d042347f6ce905f9b5 Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Mon, 15 Dec 2025 17:59:25 +0000 Subject: [PATCH 14/20] fix from upstream --- cuda_core/cuda/core/experimental/_program.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index efdb39807a..2610bc1232 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -423,6 +423,8 @@ def __post_init__(self): self._formatted_options.append("--fdevice-syntax-only") if self.minimal is not None and self.minimal: self._formatted_options.append("--minimal") + if self.numba_debug: + self._formatted_options.append("--numba-debug") def _as_bytes(self): # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved From b78f0c3e820bce5b6f70903d3d83e10821b02fba Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Wed, 17 Dec 2025 13:22:24 +0000 Subject: [PATCH 15/20] refresh from upstream --- cuda_core/tests/test_program.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 91567fdced..02b9899eea 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -184,6 +184,13 @@ def ptx_code_object(): [ ProgramOptions(name="abc"), ProgramOptions(device_code_optimize=True, debug=True), + pytest.param( + ProgramOptions(debug=True, numba_debug=True), + marks=pytest.mark.skipif( + (_get_nvrtc_version_for_tests() or 0) < 13200, + reason="numba_debug requires NVRTC >= 13.2", + ), + ), ProgramOptions(relocatable_device_code=True, max_register_count=32), ProgramOptions(ftz=True, prec_sqrt=False, prec_div=False), ProgramOptions(fma=False, use_fast_math=True), @@ -714,4 +721,4 @@ def test_program_options_as_bytes_nvvm_unsupported_option(): """Test that unsupported options raise CUDAError for NVVM backend""" options = ProgramOptions(arch="sm_80", lineinfo=True) with pytest.raises(CUDAError, match="not supported by NVVM backend"): - options.as_bytes("nvvm") + options.as_bytes("nvvm") \ No newline at end of file From 99a5593d0d82d5de399228eadf4fc6b6ed825ff6 Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Wed, 17 Dec 2025 13:27:55 +0000 Subject: [PATCH 16/20] fix tests --- cuda_core/tests/test_program.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 02b9899eea..00d0709870 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -45,6 +45,21 @@ def _is_nvvm_available(): except Exception: _cuda_driver_version = 0 + +def _get_nvrtc_version_for_tests(): + """ + Get NVRTC version. + Returns: + int: Version in format major * 1000 + minor * 100 (e.g., 13200 for CUDA 13.2) + None: If NVRTC is not available + """ + try: + nvrtc_major, nvrtc_minor = handle_return(nvrtc.nvrtcVersion()) + version = nvrtc_major * 1000 + nvrtc_minor * 100 + return version + except Exception: + return None + _libnvvm_version = None _libnvvm_version_attempted = False From 783f6e5a419b74757c7620824a7a23ec847321fd Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Wed, 17 Dec 2025 13:29:35 +0000 Subject: [PATCH 17/20] take path_finder from PR 447 --- .../bindings/_internal/nvjitlink_linux.pyx | 1 + .../cuda/bindings/_path_finder/cuda_paths.py | 400 ++++++++++++++++++ .../find_nvidia_dynamic_library.py | 139 ++++++ .../cuda/bindings/_path_finder/findlib.py | 69 +++ .../load_nvidia_dynamic_library.py | 92 ++++ .../_path_finder/sys_path_find_sub_dirs.py | 40 ++ cuda_bindings/cuda/bindings/path_finder.py | 37 ++ 7 files changed, 778 insertions(+) create mode 100644 cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py create mode 100644 cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py create mode 100644 cuda_bindings/cuda/bindings/_path_finder/findlib.py create mode 100644 cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py create mode 100644 cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py create mode 100644 cuda_bindings/cuda/bindings/path_finder.py diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index 8c96b6d640..d095b3f524 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -12,6 +12,7 @@ from .utils import FunctionNotFoundError, NotSupportedError from cuda.pathfinder import load_nvidia_dynamic_lib + ############################################################################### # Extern ############################################################################### diff --git a/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py b/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py new file mode 100644 index 0000000000..c827699b3a --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py @@ -0,0 +1,400 @@ +import os +import platform +import re +import site +import sys +import traceback +import warnings +from collections import namedtuple +from pathlib import Path + +from .findlib import find_file, find_lib + +IS_WIN32 = sys.platform.startswith("win32") + +_env_path_tuple = namedtuple("_env_path_tuple", ["by", "info"]) + + +def _get_numba_CUDA_INCLUDE_PATH(): + # From numba/numba/core/config.py + + def _readenv(name, ctor, default): + value = os.environ.get(name) + if value is None: + return default() if callable(default) else default + try: + return ctor(value) + except Exception: + warnings.warn( # noqa: B028 + f"Environment variable '{name}' is defined but " + f"its associated value '{value}' could not be " + "parsed.\nThe parse failed with exception:\n" + f"{traceback.format_exc()}", + RuntimeWarning, + ) + return default + + if IS_WIN32: + cuda_path = os.environ.get("CUDA_PATH") + if cuda_path: # noqa: SIM108 + default_cuda_include_path = os.path.join(cuda_path, "include") + else: + default_cuda_include_path = "cuda_include_not_found" + else: + default_cuda_include_path = os.path.join(os.sep, "usr", "local", "cuda", "include") + CUDA_INCLUDE_PATH = _readenv("NUMBA_CUDA_INCLUDE_PATH", str, default_cuda_include_path) + return CUDA_INCLUDE_PATH + + +config_CUDA_INCLUDE_PATH = _get_numba_CUDA_INCLUDE_PATH() + + +def _find_valid_path(options): + """Find valid path from *options*, which is a list of 2-tuple of + (name, path). Return first pair where *path* is not None. + If no valid path is found, return ('', None) + """ + for by, data in options: + if data is not None: + return by, data + else: + return "", None + + +def _get_libdevice_path_decision(): + options = [ + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_libdevice_ctk()), + ("CUDA_HOME", get_cuda_home("nvvm", "libdevice")), + ("Debian package", get_debian_pkg_libdevice()), + ("NVIDIA NVCC Wheel", get_libdevice_wheel()), + ] + libdevice_ctk_dir = get_system_ctk("nvvm", "libdevice") + if libdevice_ctk_dir and os.path.exists(libdevice_ctk_dir): + options.append(("System", libdevice_ctk_dir)) + + by, libdir = _find_valid_path(options) + return by, libdir + + +def _nvvm_lib_dir(): + if IS_WIN32: + return "nvvm", "bin" + else: + return "nvvm", "lib64" + + +def _get_nvvm_path_decision(): + options = [ + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_nvvm_ctk()), + ("CUDA_HOME", get_cuda_home(*_nvvm_lib_dir())), + ("NVIDIA NVCC Wheel", _get_nvvm_wheel()), + ] + # need to ensure nvvm dir actually exists + nvvm_ctk_dir = get_system_ctk(*_nvvm_lib_dir()) + if nvvm_ctk_dir and os.path.exists(nvvm_ctk_dir): + options.append(("System", nvvm_ctk_dir)) + + by, path = _find_valid_path(options) + return by, path + + +def _get_nvvm_wheel(): + site_paths = [site.getusersitepackages()] + site.getsitepackages() + ["conda", None] + for sp in site_paths: + # The SONAME is taken based on public CTK 12.x releases + if sys.platform.startswith("linux"): + dso_dir = "lib64" + # Hack: libnvvm from Linux wheel + # does not have any soname (CUDAINST-3183) + dso_path = "libnvvm.so" + elif sys.platform.startswith("win32"): + dso_dir = "bin" + dso_path = "nvvm64_40_0.dll" + else: + raise AssertionError() + + if sp is not None: + dso_dir = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", dso_dir) + dso_path = os.path.join(dso_dir, dso_path) + if os.path.exists(dso_path): + return str(Path(dso_path).parent) + + +def _get_libdevice_paths(): + by, libdir = _get_libdevice_path_decision() + if by == "NVIDIA NVCC Wheel": + # The NVVM path is a directory, not a file + out = os.path.join(libdir, "libdevice.10.bc") + else: + # Search for pattern + pat = r"libdevice(\.\d+)*\.bc$" + candidates = find_file(re.compile(pat), libdir) + # Keep only the max (most recent version) of the bitcode files. + out = max(candidates, default=None) + return _env_path_tuple(by, out) + + +def _cudalib_path(): + if IS_WIN32: + return "bin" + else: + return "lib64" + + +def _cuda_home_static_cudalib_path(): + if IS_WIN32: + return ("lib", "x64") + else: + return ("lib64",) + + +def _get_cudalib_dir_path_decision(): + options = [ + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk()), + ("CUDA_HOME", get_cuda_home(_cudalib_path())), + ("System", get_system_ctk(_cudalib_path())), + ] + by, libdir = _find_valid_path(options) + return by, libdir + + +def _get_static_cudalib_dir_path_decision(): + options = [ + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_static_cudalib_ctk()), + ("CUDA_HOME", get_cuda_home(*_cuda_home_static_cudalib_path())), + ("System", get_system_ctk(_cudalib_path())), + ] + by, libdir = _find_valid_path(options) + return by, libdir + + +def _get_cudalib_dir(): + by, libdir = _get_cudalib_dir_path_decision() + return _env_path_tuple(by, libdir) + + +def _get_static_cudalib_dir(): + by, libdir = _get_static_cudalib_dir_path_decision() + return _env_path_tuple(by, libdir) + + +def get_system_ctk(*subdirs): + """Return path to system-wide cudatoolkit; or, None if it doesn't exist.""" + # Linux? + if sys.platform.startswith("linux"): + # Is cuda alias to /usr/local/cuda? + # We are intentionally not getting versioned cuda installation. + base = "/usr/local/cuda" + if os.path.exists(base): + return os.path.join(base, *subdirs) + + +def get_conda_ctk(): + """Return path to directory containing the shared libraries of cudatoolkit.""" + is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) + if not is_conda_env: + return + # Assume the existence of NVVM to imply cudatoolkit installed + paths = find_lib("nvvm") + if not paths: + return + # Use the directory name of the max path + return os.path.dirname(max(paths)) + + +def get_nvidia_nvvm_ctk(): + """Return path to directory containing the NVVM shared library.""" + is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) + if not is_conda_env: + return + + # Assume the existence of NVVM in the conda env implies that a CUDA toolkit + # conda package is installed. + + # First, try the location used on Linux and the Windows 11.x packages + libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path()) + if not os.path.exists(libdir) or not os.path.isdir(libdir): + # If that fails, try the location used for Windows 12.x packages + libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path()) + if not os.path.exists(libdir) or not os.path.isdir(libdir): + # If that doesn't exist either, assume we don't have the NVIDIA + # conda package + return + + paths = find_lib("nvvm", libdir=libdir) + if not paths: + return + # Use the directory name of the max path + return os.path.dirname(max(paths)) + + +def get_nvidia_libdevice_ctk(): + """Return path to directory containing the libdevice library.""" + nvvm_ctk = get_nvidia_nvvm_ctk() + if not nvvm_ctk: + return + nvvm_dir = os.path.dirname(nvvm_ctk) + return os.path.join(nvvm_dir, "libdevice") + + +def get_nvidia_cudalib_ctk(): + """Return path to directory containing the shared libraries of cudatoolkit.""" + nvvm_ctk = get_nvidia_nvvm_ctk() + if not nvvm_ctk: + return + env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) + subdir = "bin" if IS_WIN32 else "lib" + return os.path.join(env_dir, subdir) + + +def get_nvidia_static_cudalib_ctk(): + """Return path to directory containing the static libraries of cudatoolkit.""" + nvvm_ctk = get_nvidia_nvvm_ctk() + if not nvvm_ctk: + return + + if IS_WIN32 and ("Library" not in nvvm_ctk): # noqa: SIM108 + # Location specific to CUDA 11.x packages on Windows + dirs = ("Lib", "x64") + else: + # Linux, or Windows with CUDA 12.x packages + dirs = ("lib",) + + env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) + return os.path.join(env_dir, *dirs) + + +def get_cuda_home(*subdirs): + """Get paths of CUDA_HOME. + If *subdirs* are the subdirectory name to be appended in the resulting + path. + """ + cuda_home = os.environ.get("CUDA_HOME") + if cuda_home is None: + # Try Windows CUDA installation without Anaconda + cuda_home = os.environ.get("CUDA_PATH") + if cuda_home is not None: + return os.path.join(cuda_home, *subdirs) + + +def _get_nvvm_path(): + by, path = _get_nvvm_path_decision() + if by == "NVIDIA NVCC Wheel": + # The NVVM path is a directory, not a file + path = os.path.join(path, "libnvvm.so") + else: + candidates = find_lib("nvvm", path) + path = max(candidates) if candidates else None + return _env_path_tuple(by, path) + + +def get_cuda_paths(): + """Returns a dictionary mapping component names to a 2-tuple + of (source_variable, info). + The returned dictionary will have the following keys and infos: + - "nvvm": file_path + - "libdevice": List[Tuple[arch, file_path]] + - "cudalib_dir": directory_path + Note: The result of the function is cached. + """ + # Check cache + if hasattr(get_cuda_paths, "_cached_result"): + return get_cuda_paths._cached_result + else: + # Not in cache + d = { + "nvvm": _get_nvvm_path(), + "libdevice": _get_libdevice_paths(), + "cudalib_dir": _get_cudalib_dir(), + "static_cudalib_dir": _get_static_cudalib_dir(), + "include_dir": _get_include_dir(), + } + # Cache result + get_cuda_paths._cached_result = d + return d + + +def get_debian_pkg_libdevice(): + """ + Return the Debian NVIDIA Maintainers-packaged libdevice location, if it + exists. + """ + pkg_libdevice_location = "/usr/lib/nvidia-cuda-toolkit/libdevice" + if not os.path.exists(pkg_libdevice_location): + return None + return pkg_libdevice_location + + +def get_libdevice_wheel(): + nvvm_path = _get_nvvm_wheel() + if nvvm_path is None: + return None + nvvm_path = Path(nvvm_path) + libdevice_path = nvvm_path.parent / "libdevice" + + return str(libdevice_path) + + +def get_current_cuda_target_name(): + """Determine conda's CTK target folder based on system and machine arch. + CTK's conda package delivers headers based on its architecture type. For example, + `x86_64` machine places header under `$CONDA_PREFIX/targets/x86_64-linux`, and + `aarch64` places under `$CONDA_PREFIX/targets/sbsa-linux`. Read more about the + nuances at cudart's conda feedstock: + https://github.com/conda-forge/cuda-cudart-feedstock/blob/main/recipe/meta.yaml#L8-L11 # noqa: E501 + """ + system = platform.system() + machine = platform.machine() + + if system == "Linux": + arch_to_targets = {"x86_64": "x86_64-linux", "aarch64": "sbsa-linux"} + elif system == "Windows": + arch_to_targets = { + "AMD64": "x64", + } + else: + arch_to_targets = {} + + return arch_to_targets.get(machine, None) + + +def get_conda_include_dir(): + """ + Return the include directory in the current conda environment, if one + is active and it exists. + """ + is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) + if not is_conda_env: + return + + if platform.system() == "Windows": + include_dir = os.path.join(sys.prefix, "Library", "include") + elif target_name := get_current_cuda_target_name(): + include_dir = os.path.join(sys.prefix, "targets", target_name, "include") + else: + # A fallback when target cannot determined + # though usually it shouldn't. + include_dir = os.path.join(sys.prefix, "include") + + if ( + os.path.exists(include_dir) + and os.path.isdir(include_dir) + and os.path.exists(os.path.join(include_dir, "cuda_device_runtime_api.h")) + ): + return include_dir + return + + +def _get_include_dir(): + """Find the root include directory.""" + options = [ + ("Conda environment (NVIDIA package)", get_conda_include_dir()), + ("CUDA_INCLUDE_PATH Config Entry", config_CUDA_INCLUDE_PATH), + # TODO: add others + ] + by, include_dir = _find_valid_path(options) + return _env_path_tuple(by, include_dir) \ No newline at end of file diff --git a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py new file mode 100644 index 0000000000..d8413282e6 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py @@ -0,0 +1,139 @@ +# Copyright 2024-2025 NVIDIA Corporation. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import functools +import glob +import os + +from .cuda_paths import IS_WIN32, get_cuda_paths +from .sys_path_find_sub_dirs import sys_path_find_sub_dirs + + +def _no_such_file_in_sub_dirs(sub_dirs, file_wild, error_messages, attachments): + error_messages.append(f"No such file: {file_wild}") + for sub_dir in sys_path_find_sub_dirs(sub_dirs): + attachments.append(f' listdir("{sub_dir}"):') + for node in sorted(os.listdir(sub_dir)): + attachments.append(f" {node}") + + +def _find_so_using_nvidia_lib_dirs(libname, so_basename, error_messages, attachments): + if libname == "nvvm": # noqa: SIM108 + nvidia_sub_dirs = ("nvidia", "*", "nvvm", "lib64") + else: + nvidia_sub_dirs = ("nvidia", "*", "lib") + file_wild = so_basename + "*" + for lib_dir in sys_path_find_sub_dirs(nvidia_sub_dirs): + # First look for an exact match + so_name = os.path.join(lib_dir, so_basename) + if os.path.isfile(so_name): + return so_name + # Look for a versioned library + # Using sort here mainly to make the result deterministic. + for node in sorted(glob.glob(os.path.join(lib_dir, file_wild))): + so_name = os.path.join(lib_dir, node) + if os.path.isfile(so_name): + return so_name + _no_such_file_in_sub_dirs(nvidia_sub_dirs, file_wild, error_messages, attachments) + return None + + +def _find_dll_using_nvidia_bin_dirs(libname, error_messages, attachments): + if libname == "nvvm": # noqa: SIM108 + nvidia_sub_dirs = ("nvidia", "*", "nvvm", "bin") + else: + nvidia_sub_dirs = ("nvidia", "*", "bin") + file_wild = libname + "*.dll" + for bin_dir in sys_path_find_sub_dirs(nvidia_sub_dirs): + for node in sorted(glob.glob(os.path.join(bin_dir, file_wild))): + dll_name = os.path.join(bin_dir, node) + if os.path.isfile(dll_name): + return dll_name + _no_such_file_in_sub_dirs(nvidia_sub_dirs, file_wild, error_messages, attachments) + return None + + +def _get_cuda_paths_info(key, error_messages): + env_path_tuple = get_cuda_paths()[key] + if not env_path_tuple: + error_messages.append(f'Failure obtaining get_cuda_paths()["{key}"]') + return None + if not env_path_tuple.info: + error_messages.append(f'Failure obtaining get_cuda_paths()["{key}"].info') + return None + return env_path_tuple.info + + +def _find_so_using_cudalib_dir(so_basename, error_messages, attachments): + cudalib_dir = _get_cuda_paths_info("cudalib_dir", error_messages) + if cudalib_dir is None: + return None + primary_so_dir = cudalib_dir + "/" + candidate_so_dirs = [primary_so_dir] + libs = ["/lib/", "/lib64/"] + for _ in range(2): + alt_dir = libs[0].join(primary_so_dir.rsplit(libs[1], 1)) + if alt_dir not in candidate_so_dirs: + candidate_so_dirs.append(alt_dir) + libs.reverse() + candidate_so_names = [so_dirname + so_basename for so_dirname in candidate_so_dirs] + error_messages = [] + for so_name in candidate_so_names: + if os.path.isfile(so_name): + return so_name + error_messages.append(f"No such file: {so_name}") + for so_dirname in candidate_so_dirs: + attachments.append(f' listdir("{so_dirname}"):') + if not os.path.isdir(so_dirname): + attachments.append(" DIRECTORY DOES NOT EXIST") + else: + for node in sorted(os.listdir(so_dirname)): + attachments.append(f" {node}") + return None + + +def _find_dll_using_cudalib_dir(libname, error_messages, attachments): + cudalib_dir = _get_cuda_paths_info("cudalib_dir", error_messages) + if cudalib_dir is None: + return None + file_wild = libname + "*.dll" + for node in sorted(glob.glob(os.path.join(cudalib_dir, file_wild))): + dll_name = os.path.join(cudalib_dir, node) + if os.path.isfile(dll_name): + return dll_name + error_messages.append(f"No such file: {file_wild}") + attachments.append(f' listdir("{cudalib_dir}"):') + for node in sorted(os.listdir(cudalib_dir)): + attachments.append(f" {node}") + return None + + +@functools.cache +def find_nvidia_dynamic_library(name: str) -> str: + error_messages = [] + attachments = [] + + if IS_WIN32: + dll_name = _find_dll_using_nvidia_bin_dirs(name, error_messages, attachments) + if dll_name is None: + if name == "nvvm": + dll_name = _get_cuda_paths_info("nvvm", error_messages) + else: + dll_name = _find_dll_using_cudalib_dir(name, error_messages, attachments) + if dll_name is None: + attachments = "\n".join(attachments) + raise RuntimeError(f"Failure finding {name}*.dll: {', '.join(error_messages)}\n{attachments}") + return dll_name + + so_basename = f"lib{name}.so" + so_name = _find_so_using_nvidia_lib_dirs(name, so_basename, error_messages, attachments) + if so_name is None: + if name == "nvvm": + so_name = _get_cuda_paths_info("nvvm", error_messages) + else: + so_name = _find_so_using_cudalib_dir(so_basename, error_messages, attachments) + if so_name is None: + attachments = "\n".join(attachments) + raise RuntimeError(f"Failure finding {so_basename}: {', '.join(error_messages)}\n{attachments}") + return so_name \ No newline at end of file diff --git a/cuda_bindings/cuda/bindings/_path_finder/findlib.py b/cuda_bindings/cuda/bindings/_path_finder/findlib.py new file mode 100644 index 0000000000..c64c3c9577 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/findlib.py @@ -0,0 +1,69 @@ +# Forked from: +# https://github.com/numba/numba/blob/f0d24824fcd6a454827e3c108882395d00befc04/numba/misc/findlib.py + +import os +import re +import sys + + +def get_lib_dirs(): + """ + Anaconda specific + """ + if sys.platform == "win32": + # on windows, historically `DLLs` has been used for CUDA libraries, + # since approximately CUDA 9.2, `Library\bin` has been used. + dirnames = ["DLLs", os.path.join("Library", "bin")] + else: + dirnames = [ + "lib", + ] + libdirs = [os.path.join(sys.prefix, x) for x in dirnames] + return libdirs + + +DLLNAMEMAP = { + "linux": r"lib%(name)s\.so\.%(ver)s$", + "linux2": r"lib%(name)s\.so\.%(ver)s$", + "linux-static": r"lib%(name)s\.a$", + "darwin": r"lib%(name)s\.%(ver)s\.dylib$", + "win32": r"%(name)s%(ver)s\.dll$", + "win32-static": r"%(name)s\.lib$", + "bsd": r"lib%(name)s\.so\.%(ver)s$", +} + +RE_VER = r"[0-9]*([_\.][0-9]+)*" + + +def find_lib(libname, libdir=None, platform=None, static=False): + platform = platform or sys.platform + platform = "bsd" if "bsd" in platform else platform + if static: + platform = f"{platform}-static" + if platform not in DLLNAMEMAP: + # Return empty list if platform name is undefined. + # Not all platforms define their static library paths. + return [] + pat = DLLNAMEMAP[platform] % {"name": libname, "ver": RE_VER} + regex = re.compile(pat) + return find_file(regex, libdir) + + +def find_file(pat, libdir=None): + if libdir is None: + libdirs = get_lib_dirs() + elif isinstance(libdir, str): + libdirs = [ + libdir, + ] + else: + libdirs = list(libdir) + files = [] + for ldir in libdirs: + try: + entries = os.listdir(ldir) + except FileNotFoundError: + continue + candidates = [os.path.join(ldir, ent) for ent in entries if pat.match(ent)] + files.extend([c for c in candidates if os.path.isfile(c)]) + return files \ No newline at end of file diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py new file mode 100644 index 0000000000..69aadabcbf --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -0,0 +1,92 @@ +import functools +import sys + +if sys.platform == "win32": + import ctypes.wintypes + + import pywintypes + import win32api + + # Mirrors WinBase.h (unfortunately not defined already elsewhere) + _WINBASE_LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 + +else: + import ctypes + import os + + _LINUX_CDLL_MODE = os.RTLD_NOW | os.RTLD_GLOBAL + +from .find_nvidia_dynamic_library import find_nvidia_dynamic_library + + +@functools.cache +def _windows_cuDriverGetVersion() -> int: + handle = win32api.LoadLibrary("nvcuda.dll") + + kernel32 = ctypes.WinDLL("kernel32", use_last_error=True) + GetProcAddress = kernel32.GetProcAddress + GetProcAddress.argtypes = [ctypes.wintypes.HMODULE, ctypes.wintypes.LPCSTR] + GetProcAddress.restype = ctypes.c_void_p + cuDriverGetVersion = GetProcAddress(handle, b"cuDriverGetVersion") + assert cuDriverGetVersion + + FUNC_TYPE = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.POINTER(ctypes.c_int)) + cuDriverGetVersion_fn = FUNC_TYPE(cuDriverGetVersion) + driver_ver = ctypes.c_int() + err = cuDriverGetVersion_fn(ctypes.byref(driver_ver)) + assert err == 0 + return driver_ver.value + + +@functools.cache +def _windows_load_with_dll_basename(name: str) -> int: + driver_ver = _windows_cuDriverGetVersion() + del driver_ver # Keeping this here because it will probably be needed in the future. + + if name == "nvJitLink": + dll_name = "nvJitLink_120_0.dll" + elif name == "nvrtc": + dll_name = "nvrtc64_120_0.dll" + elif name == "nvvm": + dll_name = "nvvm64_40_0.dll" + + try: + return win32api.LoadLibrary(dll_name) + except pywintypes.error: + pass + + return None + + +@functools.cache +def load_nvidia_dynamic_library(name: str) -> int: + # First try using the platform-specific dynamic loader search mechanisms + if sys.platform == "win32": + handle = _windows_load_with_dll_basename(name) + if handle: + return handle + else: + dl_path = f"lib{name}.so" # Version intentionally no specified. + try: + handle = ctypes.CDLL(dl_path, _LINUX_CDLL_MODE) + except OSError: + pass + else: + # Use `cdef void* ptr = ` in cython to convert back to void* + return handle._handle # C unsigned int + + dl_path = find_nvidia_dynamic_library(name) + if sys.platform == "win32": + try: + handle = win32api.LoadLibrary(dl_path) + except pywintypes.error as e: + raise RuntimeError(f"Failed to load DLL at {dl_path}: {e}") from e + # Use `cdef void* ptr = ` in cython to convert back to void* + return handle # C signed int, matches win32api.GetProcAddress + else: + try: + handle = ctypes.CDLL(dl_path, _LINUX_CDLL_MODE) + except OSError as e: + raise RuntimeError(f"Failed to dlopen {dl_path}: {e}") from e + # Use `cdef void* ptr = ` in cython to convert back to void* + return handle._handle # C unsigned int \ No newline at end of file diff --git a/cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py b/cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py new file mode 100644 index 0000000000..324cdeec30 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py @@ -0,0 +1,40 @@ +# Copyright 2024-2025 NVIDIA Corporation. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import functools +import os +import sys + + +@functools.cache +def _impl(sys_path, sub_dirs): + results = [] + for base in sys_path: + stack = [(base, 0)] # (current_path, index into sub_dirs) + while stack: + current_path, idx = stack.pop() + if idx == len(sub_dirs): + if os.path.isdir(current_path): + results.append(current_path) + continue + + sub = sub_dirs[idx] + if sub == "*": + try: + entries = sorted(os.listdir(current_path)) + except OSError: + continue + for entry in entries: + entry_path = os.path.join(current_path, entry) + if os.path.isdir(entry_path): + stack.append((entry_path, idx + 1)) + else: + next_path = os.path.join(current_path, sub) + if os.path.isdir(next_path): + stack.append((next_path, idx + 1)) + return results + + +def sys_path_find_sub_dirs(sub_dirs): + return _impl(tuple(sys.path), tuple(sub_dirs)) \ No newline at end of file diff --git a/cuda_bindings/cuda/bindings/path_finder.py b/cuda_bindings/cuda/bindings/path_finder.py new file mode 100644 index 0000000000..12c02b930b --- /dev/null +++ b/cuda_bindings/cuda/bindings/path_finder.py @@ -0,0 +1,37 @@ +# Copyright 2024-2025 NVIDIA Corporation. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +from cuda.bindings._path_finder.cuda_paths import ( + get_conda_ctk, + get_conda_include_dir, + get_cuda_home, + get_cuda_paths, + get_current_cuda_target_name, + get_debian_pkg_libdevice, + get_libdevice_wheel, + get_nvidia_cudalib_ctk, + get_nvidia_libdevice_ctk, + get_nvidia_nvvm_ctk, + get_nvidia_static_cudalib_ctk, + get_system_ctk, +) +from cuda.bindings._path_finder.find_nvidia_dynamic_library import find_nvidia_dynamic_library +from cuda.bindings._path_finder.load_nvidia_dynamic_library import load_nvidia_dynamic_library + +__all__ = [ + "find_nvidia_dynamic_library", + "load_nvidia_dynamic_library", + "get_conda_ctk", + "get_conda_include_dir", + "get_cuda_home", + "get_cuda_paths", + "get_current_cuda_target_name", + "get_debian_pkg_libdevice", + "get_libdevice_wheel", + "get_nvidia_cudalib_ctk", + "get_nvidia_libdevice_ctk", + "get_nvidia_nvvm_ctk", + "get_nvidia_static_cudalib_ctk", + "get_system_ctk", +] \ No newline at end of file From 5dbfb2d0061d6ca9a656e151d922f4fd9ec255b5 Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Wed, 17 Dec 2025 13:54:03 +0000 Subject: [PATCH 18/20] add builder files --- cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx | 4 ++-- cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index d095b3f524..785f587951 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -9,7 +9,7 @@ from libc.stdint cimport intptr_t, uintptr_t import threading from .utils import FunctionNotFoundError, NotSupportedError -from cuda.pathfinder import load_nvidia_dynamic_lib +from cuda.bindings import path_finder @@ -77,7 +77,7 @@ cdef void* __nvJitLinkVersion = NULL cdef void* load_library() except* with gil: - cdef uintptr_t handle = load_nvidia_dynamic_lib("nvJitLink")._handle_uint + cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink") return handle diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx index 408a2cb592..15845e3228 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx @@ -10,6 +10,7 @@ import threading from .utils import FunctionNotFoundError, NotSupportedError from cuda.pathfinder import load_nvidia_dynamic_lib +from cuda.bindings import path_finder ############################################################################### @@ -75,7 +76,7 @@ cdef void* __nvvmGetProgramLog = NULL cdef void* load_library() except* with gil: - cdef uintptr_t handle = load_nvidia_dynamic_lib("nvvm")._handle_uint + cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm") return handle From 0a9eea9967e017bb20eea4801fdbba16e078ce50 Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Wed, 17 Dec 2025 14:06:11 +0000 Subject: [PATCH 19/20] use python lists/tuples --- cuda_core/cuda/core/experimental/_program.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 4da8191f26..1422ceda5f 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -7,7 +7,7 @@ import weakref from contextlib import contextmanager from dataclasses import dataclass -from typing import TYPE_CHECKING, List, Tuple, Union +from typing import TYPE_CHECKING, Union from warnings import warn if TYPE_CHECKING: From 79138c0950aaee94426e5858df48010bb6db4530 Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Thu, 18 Dec 2025 20:01:25 +0000 Subject: [PATCH 20/20] libdevice integration --- cuda_core/cuda/core/experimental/_program.py | 58 +++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 1422ceda5f..a5ad2a3be2 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -102,6 +102,47 @@ def _get_nvvm_module(): _nvvm_module = None raise e +def _find_libdevice_path(): + """ + Find libdevice.10.bc using cuda.bindings.path_finder. + + Returns: + str: Path to libdevice.10.bc, or None if not found + """ + try: + from cuda.bindings.path_finder import ( + get_nvidia_libdevice_ctk, + get_libdevice_wheel, + get_debian_pkg_libdevice, + ) + + for getter in [get_nvidia_libdevice_ctk, get_libdevice_wheel, get_debian_pkg_libdevice]: + try: + result = getter() + if result is not None and result.info is not None: + return result.info + except Exception: + continue + + return None + except ImportError: + import os + + # CUDA_HOME + cuda_home = os.environ.get("CUDA_HOME") or os.environ.get("CUDA_PATH") + if cuda_home: + libdevice_path = os.path.join(cuda_home, "nvvm", "libdevice", "libdevice.10.bc") + if os.path.isfile(libdevice_path): + return libdevice_path + + # Linux paths + for base in ["/usr/local/cuda", "/opt/cuda"]: + libdevice_path = os.path.join(base, "nvvm", "libdevice", "libdevice.10.bc") + if os.path.isfile(libdevice_path): + return libdevice_path + + return None + def _process_define_macro_inner(formatted_options, macro): if isinstance(macro, str): @@ -352,6 +393,7 @@ class ProgramOptions: pch_messages: bool | None = None instantiate_templates_in_pch: bool | None = None numba_debug: bool | None = None # Custom option for Numba debugging + use_libdevice: bool | None = None # Use libdevice def __post_init__(self): self._name = self.name.encode() @@ -748,7 +790,8 @@ def __init__(self, code, code_type, options: ProgramOptions = None): nvvm.add_module_to_program(self._mnff.handle, module_source, len(module_source), module_name) self._module_count += 1 - + + self._use_libdevice = options.use_libdevice self._backend = "NVVM" self._linker = None @@ -866,6 +909,19 @@ def compile(self, target_type, name_expressions=(), logs=None): nvvm = _get_nvvm_module() with _nvvm_exception_manager(self): nvvm.verify_program(self._mnff.handle, len(nvvm_options), nvvm_options) + # Invoke libdevice + if getattr(self, '_use_libdevice', False): + libdevice_path = _find_libdevice_path() + if libdevice_path is None: + raise RuntimeError( + "use_libdevice=True but could not find libdevice.10.bc. " + "Ensure CUDA toolkit is installed." + ) + with open(libdevice_path, "rb") as f: + libdevice_bc = f.read() + # Use lazy_add_module for libdevice bitcode only following numba-cuda + nvvm.lazy_add_module_to_program(self._mnff.handle, libdevice_bc, len(libdevice_bc), None) + nvvm.compile_program(self._mnff.handle, len(nvvm_options), nvvm_options) size = nvvm.get_compiled_result_size(self._mnff.handle)