diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 1b901a27fd19d..42c4a7c4d4bd1 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -391,6 +391,9 @@ RISC-V Support CUDA/HIP Language Changes ^^^^^^^^^^^^^^^^^^^^^^^^^ +- PTX is no longer included by default when compiling for CUDA. Using + ``--cuda-include-ptx=all`` will return the old behavior. + CUDA Support ^^^^^^^^^^^^ diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index cecd34acbc92c..96e6ad77f5e50 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4625,7 +4625,15 @@ Action *Driver::BuildOffloadingActions(Compilation &C, DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); OffloadAction::DeviceDependences DDep; DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); + + // Compiling CUDA in non-RDC mode uses the PTX output if available. + for (Action *Input : A->getInputs()) + if (Kind == Action::OFK_Cuda && A->getType() == types::TY_Object && + !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, + false)) + DDep.add(*Input, *TCAndArch->first, TCAndArch->second.data(), Kind); OffloadActions.push_back(C.MakeAction(DDep, A->getType())); + ++TCAndArch; } } diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 177fd6310e7ee..c6007d3cfab86 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -503,18 +503,20 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, Exec, CmdArgs, Inputs, Output)); } -static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) { - bool includePTX = true; - for (Arg *A : Args) { - if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) || - A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ))) - continue; +static bool shouldIncludePTX(const ArgList &Args, StringRef InputArch) { + // The new driver does not include PTX by default to avoid overhead. + bool includePTX = !Args.hasFlag(options::OPT_offload_new_driver, + options::OPT_no_offload_new_driver, false); + for (Arg *A : Args.filtered(options::OPT_cuda_include_ptx_EQ, + options::OPT_no_cuda_include_ptx_EQ)) { A->claim(); const StringRef ArchStr = A->getValue(); - if (ArchStr == "all" || ArchStr == gpu_arch) { - includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ); - continue; - } + if (A->getOption().matches(options::OPT_cuda_include_ptx_EQ) && + (ArchStr == "all" || ArchStr == InputArch)) + includePTX = true; + else if (A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ) && + (ArchStr == "all" || ArchStr == InputArch)) + includePTX = false; } return includePTX; } diff --git a/clang/test/Driver/cuda-phases.cu b/clang/test/Driver/cuda-phases.cu index 9a231091de2bd..a1c3c9b51b1e4 100644 --- a/clang/test/Driver/cuda-phases.cu +++ b/clang/test/Driver/cuda-phases.cu @@ -244,31 +244,32 @@ // NEW-DRIVER-RDC-NEXT: 18: assembler, {17}, object, (host-cuda) // NEW-DRIVER-RDC-NEXT: 19: clang-linker-wrapper, {18}, image, (host-cuda) -// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver -fgpu-rdc \ +// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \ // RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW-DRIVER %s -// NEW-DRIVER: 0: input, "[[INPUT:.+]]", cuda -// NEW-DRIVER-NEXT: 1: preprocessor, {0}, cuda-cpp-output -// NEW-DRIVER-NEXT: 2: compiler, {1}, ir -// NEW-DRIVER-NEXT: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52) +// NEW-DRIVER: 0: input, "[[CUDA:.+]]", cuda, (host-cuda) +// NEW-DRIVER-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) +// NEW-DRIVER-NEXT: 2: compiler, {1}, ir, (host-cuda) +// NEW-DRIVER-NEXT: 3: input, "[[CUDA]]", cuda, (device-cuda, sm_52) // NEW-DRIVER-NEXT: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52) // NEW-DRIVER-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52) // NEW-DRIVER-NEXT: 6: backend, {5}, assembler, (device-cuda, sm_52) // NEW-DRIVER-NEXT: 7: assembler, {6}, object, (device-cuda, sm_52) -// NEW-DRIVER-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object -// NEW-DRIVER-NEXT: 9: input, "[[INPUT]]", cuda, (device-cuda, sm_70) +// NEW-DRIVER-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, object +// NEW-DRIVER-NEXT: 9: input, "[[CUDA]]", cuda, (device-cuda, sm_70) // NEW-DRIVER-NEXT: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70) // NEW-DRIVER-NEXT: 11: compiler, {10}, ir, (device-cuda, sm_70) // NEW-DRIVER-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70) // NEW-DRIVER-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70) -// NEW-DRIVER-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object -// NEW-DRIVER-NEXT: 15: clang-offload-packager, {8, 14}, image -// NEW-DRIVER-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (powerpc64le-ibm-linux-gnu)" {15}, ir +// NEW-DRIVER-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {12}, object +// NEW-DRIVER-NEXT: 15: linker, {8, 14}, cuda-fatbin, (device-cuda) +// NEW-DRIVER-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {15}, ir // NEW-DRIVER-NEXT: 17: backend, {16}, assembler, (host-cuda) // NEW-DRIVER-NEXT: 18: assembler, {17}, object, (host-cuda) // NEW-DRIVER-NEXT: 19: clang-linker-wrapper, {18}, image, (host-cuda) // RUN: %clang -### --target=powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \ // RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s %S/Inputs/empty.cpp 2>&1 | FileCheck --check-prefix=NON-CUDA-INPUT %s + // NON-CUDA-INPUT: 0: input, "[[CUDA:.+]]", cuda, (host-cuda) // NON-CUDA-INPUT-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) // NON-CUDA-INPUT-NEXT: 2: compiler, {1}, ir, (host-cuda) @@ -277,13 +278,13 @@ // NON-CUDA-INPUT-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52) // NON-CUDA-INPUT-NEXT: 6: backend, {5}, assembler, (device-cuda, sm_52) // NON-CUDA-INPUT-NEXT: 7: assembler, {6}, object, (device-cuda, sm_52) -// NON-CUDA-INPUT-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object +// NON-CUDA-INPUT-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, object // NON-CUDA-INPUT-NEXT: 9: input, "[[CUDA]]", cuda, (device-cuda, sm_70) // NON-CUDA-INPUT-NEXT: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70) // NON-CUDA-INPUT-NEXT: 11: compiler, {10}, ir, (device-cuda, sm_70) // NON-CUDA-INPUT-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70) // NON-CUDA-INPUT-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70) -// NON-CUDA-INPUT-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object +// NON-CUDA-INPUT-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {12}, object // NON-CUDA-INPUT-NEXT: 15: linker, {8, 14}, cuda-fatbin, (device-cuda) // NON-CUDA-INPUT-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {15}, ir // NON-CUDA-INPUT-NEXT: 17: backend, {16}, assembler, (host-cuda)