diff --git a/clang/docs/HIPSupport.rst b/clang/docs/HIPSupport.rst index 84cee45e83ba3..803dd5ab2b854 100644 --- a/clang/docs/HIPSupport.rst +++ b/clang/docs/HIPSupport.rst @@ -266,3 +266,31 @@ Example Usage Base* basePtr = &obj; basePtr->virtualFunction(); // Allowed since obj is constructed in device code } + +SPIRV Support on HIPAMD ToolChain +================================= + +SPIRV is a target-neutral device executable format. The support for SPIRV in the ROCm and HIPAMD toolchain is under active development. + +Compilation Process +------------------- + +When compiling HIP programs with the intent of utilizing SPIRV, the process diverges from the traditional compilation flow: + +Using ``--offload-arch=generic`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- **Target Triple**: The ``--offload-arch=generic`` flag instructs the compiler to use the target triple ``spirv64-unknown-unknown``. This approach does not generate ISA (Instruction Set Architecture) for a specific GPU architecture. + +- **LLVM IR Translation**: The program is compiled to LLVM Intermediate Representation (IR), which is subsequently translated into SPIRV. + +- **Clang Offload Bundler**: The resulting SPIRV is embedded in the Clang offload bundler with the bundle ID ``hipv4-hip-amdgcn-amd-amdhsa-generic``. + +Mixed with Normal ``--offload-arch`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- **ISA Generation**: Alongside SPIRV, the compiler can also generate ISA for specific GPU architectures when normal ``--offload-arch`` options are used. + +- **Runtime Behavior**: The HIP runtime prioritizes the use of ISA for a specific GPU if available. In its absence, and if SPIRV is available, the runtime will JIT (Just-In-Time) compile SPIRV into ISA. + +This approach allows for greater flexibility and portability in HIP programming, particularly in environments where the specific GPU architecture may vary or be unknown at compile time. The ability to mix SPIRV with specific ISA generation also provides a balanced solution for optimizing performance while maintaining portability. diff --git a/clang/lib/Basic/TargetID.cpp b/clang/lib/Basic/TargetID.cpp index 3c06d9bad1dc0..06c8830de57be 100644 --- a/clang/lib/Basic/TargetID.cpp +++ b/clang/lib/Basic/TargetID.cpp @@ -46,8 +46,11 @@ getAllPossibleTargetIDFeatures(const llvm::Triple &T, /// Returns canonical processor name or empty string if \p Processor is invalid. static llvm::StringRef getCanonicalProcessorName(const llvm::Triple &T, llvm::StringRef Processor) { - if (T.isAMDGPU()) + if (T.isAMDGPU()) { + if (Processor == "generic") + return Processor; return llvm::AMDGPU::getCanonicalArchName(T, Processor); + } return Processor; } diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index e241706b9082e..538820246cb3c 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3438,7 +3438,8 @@ class OffloadingActionBuilder final { // compiler phases, including backend and assemble phases. ActionList AL; Action *BackendAction = nullptr; - if (ToolChains.front()->getTriple().isSPIRV()) { + if (ToolChains.front()->getTriple().isSPIRV() || + StringRef(GpuArchList[I]) == "generic") { // Emit LLVM bitcode for SPIR-V targets. SPIR-V device tool chain // (HIPSPVToolChain) runs post-link LLVM IR passes. types::ID Output = Args.hasArg(options::OPT_S) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index ab19166f18c2d..6613a843c4ad3 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -1008,6 +1008,10 @@ std::string ToolChain::ComputeLLVMTriple(const ArgList &Args, tools::arm::setFloatABIInTriple(getDriver(), Args, Triple); return Triple.getTriple(); } + case llvm::Triple::amdgcn: + if (Args.getLastArgValue(options::OPT_mcpu_EQ) == "generic") + return "spirv64-unknown-unknown"; + return getTripleString(); } } diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index cad206ea4df1b..5e83136d402f8 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -930,7 +930,7 @@ bool RocmInstallationDetector::checkCommonBitcodeLibs( D.Diag(diag::err_drv_no_rocm_device_lib) << 0; return false; } - if (LibDeviceFile.empty()) { + if (!GPUArch.empty() && LibDeviceFile.empty()) { D.Diag(diag::err_drv_no_rocm_device_lib) << 1 << GPUArch; return false; } @@ -958,7 +958,8 @@ RocmInstallationDetector::getCommonBitcodeLibs( AddBCLib(getFiniteOnlyPath(FiniteOnly || FastRelaxedMath)); AddBCLib(getCorrectlyRoundedSqrtPath(CorrectSqrt)); AddBCLib(getWavefrontSize64Path(Wave64)); - AddBCLib(LibDeviceFile); + if (!LibDeviceFile.empty()) + AddBCLib(LibDeviceFile); auto ABIVerPath = getABIVersionPath(ABIVer); if (!ABIVerPath.empty()) AddBCLib(ABIVerPath); diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index ccb36a6c846c8..95c820715a762 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -10,6 +10,7 @@ #include "AMDGPU.h" #include "CommonArgs.h" #include "HIPUtility.h" +#include "SPIRV.h" #include "clang/Basic/Cuda.h" #include "clang/Basic/TargetID.h" #include "clang/Driver/Compilation.h" @@ -209,6 +210,13 @@ void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (JA.getType() == types::TY_LLVM_BC) return constructLlvmLinkCommand(C, JA, Inputs, Output, Args); + if (Args.getLastArgValue(options::OPT_mcpu_EQ) == "generic") { + llvm::opt::ArgStringList TrArgs{"--spirv-max-version=1.1", + "--spirv-ext=+all"}; + return SPIRV::constructTranslateCommand(C, *this, JA, Output, Inputs[0], + TrArgs); + } + return constructLldCommand(C, JA, Inputs, Output, Args); } diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip index e976583820ccf..b7660652937b2 100644 --- a/clang/test/Driver/hip-phases.hip +++ b/clang/test/Driver/hip-phases.hip @@ -11,10 +11,13 @@ // // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=gfx803 %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=BIN,NRD,OLD %s +// RUN: | FileCheck -check-prefixes=BIN,NRD,OLD,GFX803 %s // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \ // RUN: --offload-new-driver --cuda-gpu-arch=gfx803 %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=BIN,NRD,NEW %s +// RUN: | FileCheck -check-prefixes=BIN,NRD,NEW,GFX803 %s +// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --offload-arch=generic %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=BIN,NRD,OLD,GENERIC %s // // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=gfx803 -fgpu-rdc %s 2>&1 \ @@ -26,11 +29,14 @@ // RDC-DAG: [[P12:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) // RDC-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]]) -// BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH:gfx803]]) +// GFX803-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH:gfx803]]) +// RDC-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH:gfx803]]) +// GENERIC-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH:generic]]) // BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]]) -// NRD-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]]) -// NRD-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]]) +// GFX803-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]]) +// GFX803-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]]) +// GENERIC-DAG: [[P7:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]]) // RDC-DAG: [[P7:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]]) // BIN-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH]]) // BIN-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image diff --git a/clang/test/Driver/hip-toolchain-no-rdc.hip b/clang/test/Driver/hip-toolchain-no-rdc.hip index e72df739b64b1..dbd65f21f6119 100644 --- a/clang/test/Driver/hip-toolchain-no-rdc.hip +++ b/clang/test/Driver/hip-toolchain-no-rdc.hip @@ -39,6 +39,11 @@ // RUN: %t/a.o %t/b.o \ // RUN: 2>&1 | FileCheck -check-prefixes=LKONLY %s +// RUN: %clang -### --target=x86_64-linux-gnu \ +// RUN: --offload-arch=generic --offload-arch=gfx900 \ +// RUN: %s -nogpuinc -nogpulib \ +// RUN: 2>&1 | FileCheck -check-prefixes=GENERIC %s + // // Compile device code in a.cu to code object for gfx803. // @@ -180,3 +185,15 @@ // LKONLY-NOT: {{".*/llc"}} // LKONLY: [[LD:".*ld.*"]] {{.*}} "{{.*/a.o}}" "{{.*/b.o}}" // LKONLY-NOT: "-T" "{{.*}}.lk" + +// +// Check mixed SPIRV and GPU arch. +// + +// GENERIC: "-cc1" "-triple" "spirv64-unknown-unknown" {{.*}}"-emit-llvm-bc" {{.*}} "-o" "[[GEN_BC:.*bc]]" +// GENERIC: {{".*llvm-spirv"}} "--spirv-max-version=1.1" "--spirv-ext=+all" "[[GEN_BC]]" "-o" "[[GEN_SPV:.*out]]" +// GENERIC: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}}"-emit-obj" {{.*}}"-target-cpu" "gfx900"{{.*}} "-o" "[[GFX900_OBJ:.*o]]" +// GENERIC: {{".*lld"}} {{.*}}"-plugin-opt=mcpu=gfx900" {{.*}} "-o" "[[GFX900_CO:.*out]]" {{.*}}"[[GFX900_OBJ]]" +// GENERIC: {{".*clang-offload-bundler"}} "-type=o" +// GENERIC-SAME: "-targets={{.*}}hipv4-amdgcn-amd-amdhsa--generic,hipv4-amdgcn-amd-amdhsa--gfx900" +// GENERIC-SAME: "-input=[[GEN_SPV]]" "-input=[[GFX900_CO]]"