diff --git a/clang/include/clang/AST/SYCLKernelInfo.h b/clang/include/clang/AST/SYCLKernelInfo.h index 4a4827e601053..3825af86c14e3 100644 --- a/clang/include/clang/AST/SYCLKernelInfo.h +++ b/clang/include/clang/AST/SYCLKernelInfo.h @@ -22,9 +22,10 @@ namespace clang { class SYCLKernelInfo { public: SYCLKernelInfo(CanQualType KernelNameType, - const FunctionDecl *KernelEntryPointDecl) + const FunctionDecl *KernelEntryPointDecl, + const std::string &KernelName) : KernelNameType(KernelNameType), - KernelEntryPointDecl(KernelEntryPointDecl) {} + KernelEntryPointDecl(KernelEntryPointDecl), KernelName(KernelName) {} CanQualType getKernelNameType() const { return KernelNameType; } @@ -32,9 +33,12 @@ class SYCLKernelInfo { return KernelEntryPointDecl; } + const std::string &GetKernelName() const { return KernelName; } + private: CanQualType KernelNameType; const FunctionDecl *KernelEntryPointDecl; + std::string KernelName; }; } // namespace clang diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index bf24704e48eaa..860e6ec0fb47e 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -12825,6 +12825,15 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) { if (!FD->doesThisDeclarationHaveABody()) return FD->doesDeclarationForceExternallyVisibleDefinition(); + // Function definitions with the sycl_kernel_entry_point attribute are + // required during device compilation so that SYCL kernel caller offload + // entry points are emitted. + if (LangOpts.SYCLIsDevice && FD->hasAttr()) + return true; + + // FIXME: Functions declared with SYCL_EXTERNAL are required during + // device compilation. + // Constructors and destructors are required. if (FD->hasAttr() || FD->hasAttr()) return true; @@ -14832,9 +14841,36 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap &FeatureMap, } } -static SYCLKernelInfo BuildSYCLKernelInfo(CanQualType KernelNameType, +static SYCLKernelInfo BuildSYCLKernelInfo(ASTContext &Context, + CanQualType KernelNameType, const FunctionDecl *FD) { - return {KernelNameType, FD}; + // Host and device compilation may use different ABIs and different ABIs + // may allocate name mangling discriminators differently. A discriminator + // override is used to ensure consistent discriminator allocation across + // host and device compilation. + auto DeviceDiscriminatorOverrider = + [](ASTContext &Ctx, const NamedDecl *ND) -> UnsignedOrNone { + if (const auto *RD = dyn_cast(ND)) + if (RD->isLambda()) + return RD->getDeviceLambdaManglingNumber(); + return std::nullopt; + }; + std::unique_ptr MC{ItaniumMangleContext::create( + Context, Context.getDiagnostics(), DeviceDiscriminatorOverrider)}; + + // Construct a mangled name for the SYCL kernel caller offload entry point. + // FIXME: The Itanium typeinfo mangling (_ZTS) is currently used to + // name the SYCL kernel caller offload entry point function. This mangling + // does not suffice to clearly identify symbols that correspond to SYCL + // kernel caller functions, nor is this mangling natural for targets that + // use a non-Itanium ABI. + std::string Buffer; + Buffer.reserve(128); + llvm::raw_string_ostream Out(Buffer); + MC->mangleCanonicalTypeName(KernelNameType, Out); + std::string KernelName = Out.str(); + + return {KernelNameType, FD, KernelName}; } void ASTContext::registerSYCLEntryPointFunction(FunctionDecl *FD) { @@ -14855,8 +14891,8 @@ void ASTContext::registerSYCLEntryPointFunction(FunctionDecl *FD) { declaresSameEntity(FD, IT->second.getKernelEntryPointDecl())) && "SYCL kernel name conflict"); (void)IT; - SYCLKernels.insert( - std::make_pair(KernelNameType, BuildSYCLKernelInfo(KernelNameType, FD))); + SYCLKernels.insert(std::make_pair( + KernelNameType, BuildSYCLKernelInfo(*this, KernelNameType, FD))); } const SYCLKernelInfo &ASTContext::getSYCLKernelInfo(QualType T) const { diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index bc1035163a8eb..8cb27420dd911 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -739,6 +739,17 @@ CodeGenTypes::arrangeBuiltinFunctionDeclaration(CanQualType resultType, RequiredArgs::All); } +const CGFunctionInfo & +CodeGenTypes::arrangeSYCLKernelCallerDeclaration(QualType resultType, + const FunctionArgList &args) { + CanQualTypeList argTypes = getArgTypesForDeclaration(Context, args); + + return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None, + argTypes, + FunctionType::ExtInfo(CC_OpenCLKernel), + /*paramInfos=*/{}, RequiredArgs::All); +} + /// Arrange a call to a C++ method, passing the given arguments. /// /// numPrefixArgs is the number of ABI-specific prefix arguments we have. It diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt index dc5b2a35583b4..c377ac0786747 100644 --- a/clang/lib/CodeGen/CMakeLists.txt +++ b/clang/lib/CodeGen/CMakeLists.txt @@ -102,6 +102,7 @@ add_clang_library(clangCodeGen CodeGenFunction.cpp CodeGenModule.cpp CodeGenPGO.cpp + CodeGenSYCL.cpp CodeGenTBAA.cpp CodeGenTypes.cpp ConstantInitBuilder.cpp diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 26e09fe239242..83d8d4f758195 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -3309,6 +3309,27 @@ void CodeGenModule::EmitDeferred() { CurDeclsToEmit.swap(DeferredDeclsToEmit); for (GlobalDecl &D : CurDeclsToEmit) { + // Functions declared with the sycl_kernel_entry_point attribute are + // emitted normally during host compilation. During device compilation, + // a SYCL kernel caller offload entry point function is generated and + // emitted in place of each of these functions. + if (const auto *FD = D.getDecl()->getAsFunction()) { + if (LangOpts.SYCLIsDevice && FD->hasAttr() && + FD->isDefined()) { + // Functions with an invalid sycl_kernel_entry_point attribute are + // ignored during device compilation. + if (!FD->getAttr()->isInvalidAttr()) { + // Generate and emit the SYCL kernel caller function. + EmitSYCLKernelCaller(FD, getContext()); + // Recurse to emit any symbols directly or indirectly referenced + // by the SYCL kernel caller function. + EmitDeferred(); + } + // Do not emit the sycl_kernel_entry_point attributed function. + continue; + } + } + // We should call GetAddrOfGlobal with IsForDefinition set to true in order // to get GlobalValue with exactly the type we need, not something that // might had been created for another decl with the same mangled name but @@ -3644,6 +3665,10 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { // Defer until all versions have been semantically checked. if (FD->hasAttr() && !FD->isMultiVersion()) return false; + // Defer emission of SYCL kernel entry point functions during device + // compilation. + if (LangOpts.SYCLIsDevice && FD->hasAttr()) + return false; } if (const auto *VD = dyn_cast(Global)) { if (Context.getInlineVariableDefinitionKind(VD) == diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 46de3d868f901..9a0bc675e0baa 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1972,6 +1972,11 @@ class CodeGenModule : public CodeGenTypeCache { /// .gcda files in a way that persists in .bc files. void EmitCoverageFile(); + /// Given a sycl_kernel_entry_point attributed function, emit the + /// corresponding SYCL kernel caller offload entry point function. + void EmitSYCLKernelCaller(const FunctionDecl *KernelEntryPointFn, + ASTContext &Ctx); + /// Determine whether the definition must be emitted; if this returns \c /// false, the definition can be emitted lazily if it's used. bool MustBeEmitted(const ValueDecl *D); diff --git a/clang/lib/CodeGen/CodeGenSYCL.cpp b/clang/lib/CodeGen/CodeGenSYCL.cpp new file mode 100644 index 0000000000000..b9a96fe8ab838 --- /dev/null +++ b/clang/lib/CodeGen/CodeGenSYCL.cpp @@ -0,0 +1,72 @@ +//===--------- CodeGenSYCL.cpp - Code for SYCL kernel generation ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code required for generation of SYCL kernel caller offload +// entry point functions. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenFunction.h" +#include "CodeGenModule.h" + +using namespace clang; +using namespace CodeGen; + +static void SetSYCLKernelAttributes(llvm::Function *Fn, CodeGenFunction &CGF) { + // SYCL 2020 device language restrictions require forward progress and + // disallow recursion. + Fn->setDoesNotRecurse(); + if (CGF.checkIfFunctionMustProgress()) + Fn->addFnAttr(llvm::Attribute::MustProgress); +} + +void CodeGenModule::EmitSYCLKernelCaller(const FunctionDecl *KernelEntryPointFn, + ASTContext &Ctx) { + assert(Ctx.getLangOpts().SYCLIsDevice && + "SYCL kernel caller offload entry point functions can only be emitted" + " during device compilation"); + + const auto *KernelEntryPointAttr = + KernelEntryPointFn->getAttr(); + assert(KernelEntryPointAttr && "Missing sycl_kernel_entry_point attribute"); + assert(!KernelEntryPointAttr->isInvalidAttr() && + "sycl_kernel_entry_point attribute is invalid"); + + // Find the SYCLKernelCallStmt. + SYCLKernelCallStmt *KernelCallStmt = + cast(KernelEntryPointFn->getBody()); + + // Retrieve the SYCL kernel caller parameters from the OutlinedFunctionDecl. + FunctionArgList Args; + const OutlinedFunctionDecl *OutlinedFnDecl = + KernelCallStmt->getOutlinedFunctionDecl(); + Args.append(OutlinedFnDecl->param_begin(), OutlinedFnDecl->param_end()); + + // Compute the function info and LLVM function type. + const CGFunctionInfo &FnInfo = + getTypes().arrangeSYCLKernelCallerDeclaration(Ctx.VoidTy, Args); + llvm::FunctionType *FnTy = getTypes().GetFunctionType(FnInfo); + + // Retrieve the generated name for the SYCL kernel caller function. + CanQualType KernelNameType = + Ctx.getCanonicalType(KernelEntryPointAttr->getKernelName()); + const SYCLKernelInfo &KernelInfo = Ctx.getSYCLKernelInfo(KernelNameType); + auto *Fn = llvm::Function::Create(FnTy, llvm::Function::ExternalLinkage, + KernelInfo.GetKernelName(), &getModule()); + + // Emit the SYCL kernel caller function. + CodeGenFunction CGF(*this); + SetLLVMFunctionAttributes(GlobalDecl(), FnInfo, Fn, false); + SetSYCLKernelAttributes(Fn, CGF); + CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, FnInfo, Args, + SourceLocation(), SourceLocation()); + CGF.EmitFunctionBody(OutlinedFnDecl->getBody()); + setDSOLocal(Fn); + SetLLVMFunctionAttributesForDefinition(cast(OutlinedFnDecl), Fn); + CGF.FinishFunction(); +} diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h index 307048bcc510d..29f6f1ec80bc3 100644 --- a/clang/lib/CodeGen/CodeGenTypes.h +++ b/clang/lib/CodeGen/CodeGenTypes.h @@ -229,6 +229,13 @@ class CodeGenTypes { const CGFunctionInfo &arrangeBuiltinFunctionCall(QualType resultType, const CallArgList &args); + /// A SYCL kernel caller function is an offload device entry point function + /// with a target device dependent calling convention such as amdgpu_kernel, + /// ptx_kernel, or spir_kernel. + const CGFunctionInfo & + arrangeSYCLKernelCallerDeclaration(QualType resultType, + const FunctionArgList &args); + /// Objective-C methods are C functions with some implicit parameters. const CGFunctionInfo &arrangeObjCMethodDeclaration(const ObjCMethodDecl *MD); const CGFunctionInfo &arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD, diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index f617e645a9eaf..25ab28c54b659 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -77,6 +77,10 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { return true; } + unsigned getOpenCLKernelCallingConv() const override { + return llvm::CallingConv::PTX_Kernel; + } + // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the // resulting MDNode to the nvvm.annotations MDNode. static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, diff --git a/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp b/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp new file mode 100644 index 0000000000000..195f1d9d26d7d --- /dev/null +++ b/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp @@ -0,0 +1,184 @@ +// RUN: %clang_cc1 -fsycl-is-host -emit-llvm -triple x86_64-unknown-linux-gnu -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-HOST,CHECK-HOST-LINUX %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple amdgcn-amd-amdhsa -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-AMDGCN %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple nvptx-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple nvptx64-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s +// RUN: %clang_cc1 -fsycl-is-host -emit-llvm -triple x86_64-pc-windows-msvc -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-HOST,CHECK-HOST-WINDOWS %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple amdgcn-amd-amdhsa -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-AMDGCN %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple nvptx-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple nvptx64-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s + +// Test the generation of SYCL kernel caller functions. These functions are +// generated from functions declared with the sycl_kernel_entry_point attribute +// and emited during device compilation. They are not emitted during device +// compilation. + +struct single_purpose_kernel_name; +struct single_purpose_kernel { + void operator()() const {} +}; + +[[clang::sycl_kernel_entry_point(single_purpose_kernel_name)]] +void single_purpose_kernel_task(single_purpose_kernel kernelFunc) { + kernelFunc(); +} + +template +[[clang::sycl_kernel_entry_point(KernelName)]] +void kernel_single_task(KernelType kernelFunc) { + kernelFunc(42); +} + +int main() { + single_purpose_kernel obj; + single_purpose_kernel_task(obj); + int capture; + auto lambda = [=](auto) { (void) capture; }; + kernel_single_task(lambda); +} + +// Verify that SYCL kernel caller functions are not emitted during host +// compilation. +// +// CHECK-HOST-NOT: _ZTS26single_purpose_kernel_name +// CHECK-HOST-NOT: _ZTSZ4mainE18lambda_kernel_name + +// Verify that sycl_kernel_entry_point attributed functions are not emitted +// during device compilation. +// +// CHECK-DEVICE-NOT: single_purpose_kernel_task +// CHECK-DEVICE-NOT: kernel_single_task + +// Verify that no code is generated for the bodies of sycl_kernel_entry_point +// attributed functions during host compilation. ODR-use of these functions may +// require them to be emitted, but they have no effect if called. +// +// CHECK-HOST-LINUX: define dso_local void @_Z26single_purpose_kernel_task21single_purpose_kernel() #{{[0-9]+}} { +// CHECK-HOST-LINUX-NEXT: entry: +// CHECK-HOST-LINUX-NEXT: %kernelFunc = alloca %struct.single_purpose_kernel, align 1 +// CHECK-HOST-LINUX-NEXT: ret void +// CHECK-HOST-LINUX-NEXT: } +// +// CHECK-HOST-LINUX: define internal void @_Z18kernel_single_taskIZ4mainEUlT_E_S1_EvT0_(i32 %kernelFunc.coerce) #{{[0-9]+}} { +// CHECK-HOST-LINUX-NEXT: entry: +// CHECK-HOST-LINUX-NEXT: %kernelFunc = alloca %class.anon, align 4 +// CHECK-HOST-LINUX-NEXT: %coerce.dive = getelementptr inbounds nuw %class.anon, ptr %kernelFunc, i32 0, i32 0 +// CHECK-HOST-LINUX-NEXT: store i32 %kernelFunc.coerce, ptr %coerce.dive, align 4 +// CHECK-HOST-LINUX-NEXT: ret void +// CHECK-HOST-LINUX-NEXT: } +// +// CHECK-HOST-WINDOWS: define dso_local void @"?single_purpose_kernel_task@@YAXUsingle_purpose_kernel@@@Z"(i8 %kernelFunc.coerce) #{{[0-9]+}} { +// CHECK-HOST-WINDOWS-NEXT: entry: +// CHECK-HOST-WINDOWS-NEXT: %kernelFunc = alloca %struct.single_purpose_kernel, align 1 +// CHECK-HOST-WINDOWS-NEXT: %coerce.dive = getelementptr inbounds nuw %struct.single_purpose_kernel, ptr %kernelFunc, i32 0, i32 0 +// CHECK-HOST-WINDOWS-NEXT: store i8 %kernelFunc.coerce, ptr %coerce.dive, align 1 +// CHECK-HOST-WINDOWS-NEXT: ret void +// CHECK-HOST-WINDOWS-NEXT: } +// +// CHECK-HOST-WINDOWS: define internal void @"??$kernel_single_task@V@?0??main@@9@V1?0??2@9@@@YAXV@?0??main@@9@@Z"(i32 %kernelFunc.coerce) #{{[0-9]+}} { +// CHECK-HOST-WINDOWS-NEXT: entry: +// CHECK-HOST-WINDOWS-NEXT: %kernelFunc = alloca %class.anon, align 4 +// CHECK-HOST-WINDOWS-NEXT: %coerce.dive = getelementptr inbounds nuw %class.anon, ptr %kernelFunc, i32 0, i32 0 +// CHECK-HOST-WINDOWS-NEXT: store i32 %kernelFunc.coerce, ptr %coerce.dive, align 4 +// CHECK-HOST-WINDOWS-NEXT: ret void +// CHECK-HOST-WINDOWS-NEXT: } + +// Verify that SYCL kernel caller functions are emitted for each device target. +// +// FIXME: The following set of matches are used to skip over the declaration of +// main(). main() shouldn't be emitted in device code, but that pruning isn't +// performed yet. +// CHECK-DEVICE: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-DEVICE-NEXT: define {{[a-z_ ]*}}noundef i32 @main() #0 + +// IR for the SYCL kernel caller function generated for +// single_purpose_kernel_task with single_purpose_kernel_name as the SYCL kernel +// name type. +// +// CHECK-AMDGCN: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-AMDGCN-NEXT: define dso_local amdgpu_kernel void @_ZTS26single_purpose_kernel_name +// CHECK-AMDGCN-SAME: (ptr addrspace(4) noundef byref(%struct.single_purpose_kernel) align 1 %0) #[[AMDGCN_ATTR0:[0-9]+]] { +// CHECK-AMDGCN-NEXT: entry: +// CHECK-AMDGCN-NEXT: %coerce = alloca %struct.single_purpose_kernel, align 1, addrspace(5) +// CHECK-AMDGCN-NEXT: %kernelFunc = addrspacecast ptr addrspace(5) %coerce to ptr +// CHECK-AMDGCN-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 1 %kernelFunc, ptr addrspace(4) align 1 %0, i64 1, i1 false) +// CHECK-AMDGCN-NEXT: call void @_ZNK21single_purpose_kernelclEv +// CHECK-AMDGCN-SAME: (ptr noundef nonnull align 1 dereferenceable(1) %kernelFunc) #[[AMDGCN_ATTR1:[0-9]+]] +// CHECK-AMDGCN-NEXT: ret void +// CHECK-AMDGCN-NEXT: } +// CHECK-AMDGCN: define linkonce_odr void @_ZNK21single_purpose_kernelclEv +// +// CHECK-NVPTX: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-NVPTX-NEXT: define dso_local ptx_kernel void @_ZTS26single_purpose_kernel_name +// CHECK-NVPTX-SAME: (ptr noundef byval(%struct.single_purpose_kernel) align 1 %kernelFunc) #[[NVPTX_ATTR0:[0-9]+]] { +// CHECK-NVPTX-NEXT: entry: +// CHECK-NVPTX-NEXT: call void @_ZNK21single_purpose_kernelclEv +// CHECK-NVPTX-SAME: (ptr noundef nonnull align 1 dereferenceable(1) %kernelFunc) #[[NVPTX_ATTR1:[0-9]+]] +// CHECK-NVPTX-NEXT: ret void +// CHECK-NVPTX-NEXT: } +// CHECK-NVPTX: define linkonce_odr void @_ZNK21single_purpose_kernelclEv +// +// CHECK-SPIR: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-SPIR-NEXT: define {{[a-z_ ]*}}spir_kernel void @_ZTS26single_purpose_kernel_name +// CHECK-SPIR-SAME: (ptr noundef byval(%struct.single_purpose_kernel) align 1 %kernelFunc) #[[SPIR_ATTR0:[0-9]+]] { +// CHECK-SPIR-NEXT: entry: +// CHECK-SPIR-NEXT: %kernelFunc.ascast = addrspacecast ptr %kernelFunc to ptr addrspace(4) +// CHECK-SPIR-NEXT: call spir_func void @_ZNK21single_purpose_kernelclEv +// CHECK-SPIR-SAME: (ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %kernelFunc.ascast) #[[SPIR_ATTR1:[0-9]+]] +// CHECK-SPIR-NEXT: ret void +// CHECK-SPIR-NEXT: } +// CHECK-SPIR: define linkonce_odr spir_func void @_ZNK21single_purpose_kernelclEv + +// IR for the SYCL kernel caller function generated for kernel_single_task with +// lambda_kernel_name as the SYCL kernel name type. +// +// CHECK-AMDGCN: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-AMDGCN-NEXT: define dso_local amdgpu_kernel void @_ZTSZ4mainEUlT_E_ +// CHECK-AMDGCN-SAME: (i32 %kernelFunc.coerce) #[[AMDGCN_ATTR0]] { +// CHECK-AMDGCN-NEXT: entry: +// CHECK-AMDGCN-NEXT: %kernelFunc = alloca %class.anon, align 4, addrspace(5) +// CHECK-AMDGCN-NEXT: %kernelFunc1 = addrspacecast ptr addrspace(5) %kernelFunc to ptr +// CHECK-AMDGCN-NEXT: %coerce.dive = getelementptr inbounds nuw %class.anon, ptr %kernelFunc1, i32 0, i32 0 +// CHECK-AMDGCN-NEXT: store i32 %kernelFunc.coerce, ptr %coerce.dive, align 4 +// CHECK-AMDGCN-NEXT: call void @_ZZ4mainENKUlT_E_clIiEEDaS_ +// CHECK-AMDGCN-SAME: (ptr noundef nonnull align 4 dereferenceable(4) %kernelFunc1, i32 noundef 42) #[[AMDGCN_ATTR1]] +// CHECK-AMDGCN-NEXT: ret void +// CHECK-AMDGCN-NEXT: } +// CHECK-AMDGCN: define internal void @_ZZ4mainENKUlT_E_clIiEEDaS_ +// +// CHECK-NVPTX: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-NVPTX-NEXT: define dso_local ptx_kernel void @_ZTSZ4mainEUlT_E_ +// CHECK-NVPTX-SAME: (ptr noundef byval(%class.anon) align 4 %kernelFunc) #[[NVPTX_ATTR0]] { +// CHECK-NVPTX-NEXT: entry: +// CHECK-NVPTX-NEXT: call void @_ZZ4mainENKUlT_E_clIiEEDaS_ +// CHECK-NVPTX-SAME: (ptr noundef nonnull align 4 dereferenceable(4) %kernelFunc, i32 noundef 42) #[[NVPTX_ATTR1]] +// CHECK-NVPTX-NEXT: ret void +// CHECK-NVPTX-NEXT: } +// CHECK-NVPTX: define internal void @_ZZ4mainENKUlT_E_clIiEEDaS_ +// +// CHECK-SPIR: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-SPIR-NEXT: define {{[a-z_ ]*}}spir_kernel void @_ZTSZ4mainEUlT_E_ +// CHECK-SPIR-SAME: (ptr noundef byval(%class.anon) align 4 %kernelFunc) #[[SPIR_ATTR0]] { +// CHECK-SPIR-NEXT: entry: +// CHECK-SPIR-NEXT: %kernelFunc.ascast = addrspacecast ptr %kernelFunc to ptr addrspace(4) +// CHECK-SPIR-NEXT: call spir_func void @_ZZ4mainENKUlT_E_clIiEEDaS_ +// CHECK-SPIR-SAME: (ptr addrspace(4) noundef align 4 dereferenceable_or_null(4) %kernelFunc.ascast, i32 noundef 42) #[[SPIR_ATTR1]] +// CHECK-SPIR-NEXT: ret void +// CHECK-SPIR-NEXT: } +// CHECK-SPIR: define internal spir_func void @_ZZ4mainENKUlT_E_clIiEEDaS_ + +// CHECK-AMDGCN: #[[AMDGCN_ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// CHECK-AMDGCN: #[[AMDGCN_ATTR1]] = { convergent nounwind } +// +// CHECK-NVPTX: #[[NVPTX_ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" } +// CHECK-NVPTX: #[[NVPTX_ATTR1]] = { convergent nounwind } +// +// CHECK-SPIR: #[[SPIR_ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// CHECK-SPIR: #[[SPIR_ATTR1]] = { convergent nounwind }