Skip to content

Commit 6d1b325

Browse files
committed
[CUDA] Include PTX in non-RDC mode using the new driver
Summary: The old driver embed PTX in rdc-mode and so does the `nvcc` compiler. The new drivers currently does not do this, so we should keep it consistent in this case. This simply requires adding the assembler output as an input to the offloading action that gets fed to fatbin.
1 parent e9901d8 commit 6d1b325

File tree

3 files changed

+24
-13
lines changed

3 files changed

+24
-13
lines changed

clang/lib/Driver/Driver.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4625,7 +4625,15 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
46254625
DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind);
46264626
OffloadAction::DeviceDependences DDep;
46274627
DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind);
4628+
4629+
// Compiling CUDA in non-RDC mode uses the PTX output if available.
4630+
for (Action *Input : A->getInputs())
4631+
if (Kind == Action::OFK_Cuda && A->getType() == types::TY_Object &&
4632+
!Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
4633+
false))
4634+
DDep.add(*Input, *TCAndArch->first, TCAndArch->second.data(), Kind);
46284635
OffloadActions.push_back(C.MakeAction<OffloadAction>(DDep, A->getType()));
4636+
46294637
++TCAndArch;
46304638
}
46314639
}

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -504,7 +504,9 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
504504
}
505505

506506
static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
507-
bool includePTX = true;
507+
// The new driver does not include PTX by default.
508+
bool includePTX = !Args.hasFlag(options::OPT_offload_new_driver,
509+
options::OPT_no_offload_new_driver, false);
508510
for (Arg *A : Args) {
509511
if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
510512
A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))

clang/test/Driver/cuda-phases.cu

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -244,31 +244,32 @@
244244
// NEW-DRIVER-RDC-NEXT: 18: assembler, {17}, object, (host-cuda)
245245
// NEW-DRIVER-RDC-NEXT: 19: clang-linker-wrapper, {18}, image, (host-cuda)
246246

247-
// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver -fgpu-rdc \
247+
// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \
248248
// RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW-DRIVER %s
249-
// NEW-DRIVER: 0: input, "[[INPUT:.+]]", cuda
250-
// NEW-DRIVER-NEXT: 1: preprocessor, {0}, cuda-cpp-output
251-
// NEW-DRIVER-NEXT: 2: compiler, {1}, ir
252-
// NEW-DRIVER-NEXT: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52)
249+
// NEW-DRIVER: 0: input, "[[CUDA:.+]]", cuda, (host-cuda)
250+
// NEW-DRIVER-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
251+
// NEW-DRIVER-NEXT: 2: compiler, {1}, ir, (host-cuda)
252+
// NEW-DRIVER-NEXT: 3: input, "[[CUDA]]", cuda, (device-cuda, sm_52)
253253
// NEW-DRIVER-NEXT: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52)
254254
// NEW-DRIVER-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52)
255255
// NEW-DRIVER-NEXT: 6: backend, {5}, assembler, (device-cuda, sm_52)
256256
// NEW-DRIVER-NEXT: 7: assembler, {6}, object, (device-cuda, sm_52)
257-
// NEW-DRIVER-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object
258-
// NEW-DRIVER-NEXT: 9: input, "[[INPUT]]", cuda, (device-cuda, sm_70)
257+
// NEW-DRIVER-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, object
258+
// NEW-DRIVER-NEXT: 9: input, "[[CUDA]]", cuda, (device-cuda, sm_70)
259259
// NEW-DRIVER-NEXT: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70)
260260
// NEW-DRIVER-NEXT: 11: compiler, {10}, ir, (device-cuda, sm_70)
261261
// NEW-DRIVER-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70)
262262
// NEW-DRIVER-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70)
263-
// NEW-DRIVER-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object
264-
// NEW-DRIVER-NEXT: 15: clang-offload-packager, {8, 14}, image
265-
// NEW-DRIVER-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (powerpc64le-ibm-linux-gnu)" {15}, ir
263+
// NEW-DRIVER-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {12}, object
264+
// NEW-DRIVER-NEXT: 15: linker, {8, 14}, cuda-fatbin, (device-cuda)
265+
// NEW-DRIVER-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {15}, ir
266266
// NEW-DRIVER-NEXT: 17: backend, {16}, assembler, (host-cuda)
267267
// NEW-DRIVER-NEXT: 18: assembler, {17}, object, (host-cuda)
268268
// NEW-DRIVER-NEXT: 19: clang-linker-wrapper, {18}, image, (host-cuda)
269269

270270
// RUN: %clang -### --target=powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \
271271
// RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s %S/Inputs/empty.cpp 2>&1 | FileCheck --check-prefix=NON-CUDA-INPUT %s
272+
272273
// NON-CUDA-INPUT: 0: input, "[[CUDA:.+]]", cuda, (host-cuda)
273274
// NON-CUDA-INPUT-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
274275
// NON-CUDA-INPUT-NEXT: 2: compiler, {1}, ir, (host-cuda)
@@ -277,13 +278,13 @@
277278
// NON-CUDA-INPUT-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52)
278279
// NON-CUDA-INPUT-NEXT: 6: backend, {5}, assembler, (device-cuda, sm_52)
279280
// NON-CUDA-INPUT-NEXT: 7: assembler, {6}, object, (device-cuda, sm_52)
280-
// NON-CUDA-INPUT-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object
281+
// NON-CUDA-INPUT-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, object
281282
// NON-CUDA-INPUT-NEXT: 9: input, "[[CUDA]]", cuda, (device-cuda, sm_70)
282283
// NON-CUDA-INPUT-NEXT: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70)
283284
// NON-CUDA-INPUT-NEXT: 11: compiler, {10}, ir, (device-cuda, sm_70)
284285
// NON-CUDA-INPUT-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70)
285286
// NON-CUDA-INPUT-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70)
286-
// NON-CUDA-INPUT-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object
287+
// NON-CUDA-INPUT-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {12}, object
287288
// NON-CUDA-INPUT-NEXT: 15: linker, {8, 14}, cuda-fatbin, (device-cuda)
288289
// NON-CUDA-INPUT-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {15}, ir
289290
// NON-CUDA-INPUT-NEXT: 17: backend, {16}, assembler, (host-cuda)

0 commit comments

Comments
 (0)