Skip to content

Commit 8d5c50f

Browse files
committed
[NVPTX] Switch front-ends and tests to ptx_kernel cc
1 parent 94c0db0 commit 8d5c50f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+305
-477
lines changed

clang/lib/CodeGen/Targets/NVPTX.cpp

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "ABIInfoImpl.h"
1010
#include "TargetInfo.h"
1111
#include "llvm/ADT/STLExtras.h"
12+
#include "llvm/IR/CallingConv.h"
1213
#include "llvm/IR/IntrinsicsNVPTX.h"
1314

1415
using namespace clang;
@@ -79,13 +80,11 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
7980
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
8081
// resulting MDNode to the nvvm.annotations MDNode.
8182
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
82-
int Operand,
83-
const SmallVectorImpl<int> &GridConstantArgs);
83+
int Operand);
8484

85-
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
86-
int Operand) {
87-
addNVVMMetadata(GV, Name, Operand, SmallVector<int, 1>(0));
88-
}
85+
static void
86+
addGridConstantNVVMMetadata(llvm::GlobalValue *GV,
87+
const SmallVectorImpl<int> &GridConstantArgs);
8988

9089
private:
9190
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
@@ -259,7 +258,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
259258
if (FD->hasAttr<OpenCLKernelAttr>()) {
260259
// OpenCL __kernel functions get kernel metadata
261260
// Create !{<func-ref>, metadata !"kernel", i32 1} node
262-
addNVVMMetadata(F, "kernel", 1);
261+
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
263262
// And kernel functions are not subject to inlining
264263
F->addFnAttr(llvm::Attribute::NoInline);
265264
}
@@ -277,21 +276,21 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
277276
// For some reason arg indices are 1-based in NVVM
278277
GCI.push_back(IV.index() + 1);
279278
// Create !{<func-ref>, metadata !"kernel", i32 1} node
280-
addNVVMMetadata(F, "kernel", 1, GCI);
279+
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
280+
addGridConstantNVVMMetadata(F, GCI);
281281
}
282282
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
283283
M.handleCUDALaunchBoundsAttr(F, Attr);
284284
}
285285

286286
// Attach kernel metadata directly if compiling for NVPTX.
287287
if (FD->hasAttr<NVPTXKernelAttr>()) {
288-
addNVVMMetadata(F, "kernel", 1);
288+
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
289289
}
290290
}
291291

292-
void NVPTXTargetCodeGenInfo::addNVVMMetadata(
293-
llvm::GlobalValue *GV, StringRef Name, int Operand,
294-
const SmallVectorImpl<int> &GridConstantArgs) {
292+
void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
293+
StringRef Name, int Operand) {
295294
llvm::Module *M = GV->getParent();
296295
llvm::LLVMContext &Ctx = M->getContext();
297296

@@ -302,6 +301,21 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(
302301
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
303302
llvm::ConstantAsMetadata::get(
304303
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
304+
305+
// Append metadata to nvvm.annotations
306+
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
307+
}
308+
309+
void NVPTXTargetCodeGenInfo::addGridConstantNVVMMetadata(
310+
llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs) {
311+
312+
llvm::Module *M = GV->getParent();
313+
llvm::LLVMContext &Ctx = M->getContext();
314+
315+
// Get "nvvm.annotations" metadata node
316+
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
317+
318+
SmallVector<llvm::Metadata *, 5> MDVals = {llvm::ConstantAsMetadata::get(GV)};
305319
if (!GridConstantArgs.empty()) {
306320
SmallVector<llvm::Metadata *, 10> GCM;
307321
for (int I : GridConstantArgs)
@@ -310,6 +324,7 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(
310324
MDVals.append({llvm::MDString::get(Ctx, "grid_constant"),
311325
llvm::MDNode::get(Ctx, GCM)});
312326
}
327+
313328
// Append metadata to nvvm.annotations
314329
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
315330
}

clang/test/CodeGen/nvptx_attributes.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,14 @@
1010
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RET_ADDR]], align 8
1111
// CHECK-NEXT: store i32 1, ptr [[TMP0]], align 4
1212
// CHECK-NEXT: ret void
13+
//
1314
__attribute__((nvptx_kernel)) void foo(int *ret) {
1415
*ret = 1;
1516
}
1617

17-
// CHECK: !0 = !{ptr @foo, !"kernel", i32 1}
18+
//.
19+
// CHECK: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx32,+sm_61" }
20+
//.
21+
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
22+
// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
23+
//.

clang/test/CodeGenCUDA/device-fun-linkage.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ template __device__ void func<int>();
1717
// RDC: define weak_odr void @_Z4funcIiEvv()
1818

1919
template __global__ void kernel<int>();
20-
// NORDC: define void @_Z6kernelIiEvv()
21-
// RDC: define weak_odr void @_Z6kernelIiEvv()
20+
// NORDC: define ptx_kernel void @_Z6kernelIiEvv()
21+
// RDC: define weak_odr ptx_kernel void @_Z6kernelIiEvv()
2222

2323
// Ensure that unused static device function is eliminated
2424
static __device__ void static_func() {}
@@ -28,5 +28,5 @@ static __device__ void static_func() {}
2828
// Ensure that kernel function has external or weak_odr
2929
// linkage regardless static specifier
3030
static __global__ void static_kernel() {}
31-
// NORDC: define void @_ZL13static_kernelv()
32-
// RDC: define weak_odr void @_ZL13static_kernelv[[FILEID:.*]]()
31+
// NORDC: define ptx_kernel void @_ZL13static_kernelv()
32+
// RDC: define weak_odr ptx_kernel void @_ZL13static_kernelv[[FILEID:.*]]()

clang/test/CodeGenCUDA/grid-constant.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@ void foo() {
2121
}
2222
//.
2323
//.
24-
// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"kernel", i32 1, !"grid_constant", [[META1:![0-9]+]]}
24+
// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"grid_constant", [[META1:![0-9]+]]}
2525
// CHECK: [[META1]] = !{i32 1, i32 3}
26-
// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3:![0-9]+]]}
26+
// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"grid_constant", [[META3:![0-9]+]]}
2727
// CHECK: [[META3]] = !{i32 1}
28-
// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3]]}
29-
// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"kernel", i32 1, !"grid_constant", [[META6:![0-9]+]]}
28+
// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"grid_constant", [[META3]]}
29+
// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"grid_constant", [[META6:![0-9]+]]}
3030
// CHECK: [[META6]] = !{i32 2}
3131
//.

clang/test/CodeGenCUDA/offload_via_llvm.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#define __OFFLOAD_VIA_LLVM__ 1
88
#include "Inputs/cuda.h"
99

10-
// HST-LABEL: define dso_local void @_Z18__device_stub__fooisPvS_(
10+
// HST-LABEL: define dso_local ptx_kernel void @_Z18__device_stub__fooisPvS_(
1111
// HST-SAME: i32 noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], ptr noundef [[TMP2:%.*]], ptr noundef [[TMP3:%.*]]) #[[ATTR0:[0-9]+]] {
1212
// HST-NEXT: [[ENTRY:.*:]]
1313
// HST-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
@@ -50,7 +50,7 @@
5050
// HST: [[SETUP_END]]:
5151
// HST-NEXT: ret void
5252
//
53-
// DEV-LABEL: define dso_local void @_Z3fooisPvS_(
53+
// DEV-LABEL: define dso_local ptx_kernel void @_Z3fooisPvS_(
5454
// DEV-SAME: i32 noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], ptr noundef [[TMP2:%.*]], ptr noundef [[TMP3:%.*]]) #[[ATTR0:[0-9]+]] {
5555
// DEV-NEXT: [[ENTRY:.*:]]
5656
// DEV-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4

clang/test/CodeGenCUDA/ptx-kernels.cu

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
extern "C"
1111
__device__ void device_function() {}
1212

13-
// CHECK-LABEL: define{{.*}} void @global_function
13+
// CHECK-LABEL: define{{.*}} ptx_kernel void @global_function
1414
extern "C"
1515
__global__ void global_function() {
1616
// CHECK: call void @device_function
@@ -19,7 +19,7 @@ __global__ void global_function() {
1919

2020
// Make sure host-instantiated kernels are preserved on device side.
2121
template <typename T> __global__ void templated_kernel(T param) {}
22-
// CHECK-DAG: define{{.*}} void @_Z16templated_kernelIiEvT_(
22+
// CHECK-DAG: define{{.*}} ptx_kernel void @_Z16templated_kernelIiEvT_(
2323

2424
namespace {
2525
__global__ void anonymous_ns_kernel() {}
@@ -30,6 +30,3 @@ void host_function() {
3030
templated_kernel<<<0, 0>>>(0);
3131
anonymous_ns_kernel<<<0,0>>>();
3232
}
33-
34-
// CHECK: !{{[0-9]+}} = !{ptr @global_function, !"kernel", i32 1}
35-
// CHECK: !{{[0-9]+}} = !{ptr @_Z16templated_kernelIiEvT_, !"kernel", i32 1}

clang/test/CodeGenCUDA/usual-deallocators.cu

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ __host__ __device__ void tests_hd(void *t) {
109109
}
110110

111111
// Make sure that we've generated the kernel used by A::~A.
112-
// DEVICE-LABEL: define void @_Z1fIiEvT_
112+
// DEVICE-LABEL: define ptx_kernel void @_Z1fIiEvT_
113113

114114
// Make sure we've picked deallocator for the correct side of compilation.
115115

@@ -147,5 +147,3 @@ __host__ __device__ void tests_hd(void *t) {
147147
// COMMON-LABEL: define linkonce_odr void @_ZN8H1H2D1D2dlEPv(ptr noundef %0)
148148
// DEVICE: call void @dev_fn()
149149
// HOST: call void @host_fn()
150-
151-
// DEVICE: !0 = !{ptr @_Z1fIiEvT_, !"kernel", i32 1}

clang/test/CodeGenOpenCL/ptx-calls.cl

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,5 @@ void device_function() {
77
__kernel void kernel_function() {
88
device_function();
99
}
10-
// CHECK-LABEL: define{{.*}} spir_kernel void @kernel_function()
10+
// CHECK-LABEL: define{{.*}} ptx_kernel void @kernel_function()
1111
// CHECK: call void @device_function()
12-
// CHECK: !{{[0-9]+}} = !{ptr @kernel_function, !"kernel", i32 1}
13-

clang/test/CodeGenOpenCL/ptx-kernels.cl

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,4 @@ void device_function() {
66

77
__kernel void kernel_function() {
88
}
9-
// CHECK-LABEL: define{{.*}} spir_kernel void @kernel_function()
10-
11-
// CHECK: !{{[0-9]+}} = !{ptr @kernel_function, !"kernel", i32 1}
9+
// CHECK-LABEL: define{{.*}} ptx_kernel void @kernel_function()

clang/test/CodeGenOpenCL/reflect.cl

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ bool device_function() {
1212
return __nvvm_reflect("__CUDA_ARCH") >= 700;
1313
}
1414

15-
// CHECK-LABEL: define dso_local spir_kernel void @kernel_function(
16-
// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
15+
// CHECK-LABEL: define dso_local ptx_kernel void @kernel_function(
16+
// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] {
1717
// CHECK-NEXT: entry:
1818
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4
1919
// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4
@@ -26,3 +26,9 @@ bool device_function() {
2626
__kernel void kernel_function(__global int *i) {
2727
*i = device_function();
2828
}
29+
//.
30+
// CHECK: [[META3]] = !{i32 1}
31+
// CHECK: [[META4]] = !{!"none"}
32+
// CHECK: [[META5]] = !{!"int*"}
33+
// CHECK: [[META6]] = !{!""}
34+
//.

0 commit comments

Comments
 (0)