Skip to content

Commit ac2fcbe

Browse files
Address Sergio's comments
1 parent 8e45bd4 commit ac2fcbe

File tree

2 files changed

+82
-78
lines changed

2 files changed

+82
-78
lines changed

llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2496,7 +2496,7 @@ class OpenMPIRBuilder {
24962496
TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc,
24972497
OpenMPIRBuilder::InsertPointTy AllocaIP,
24982498
const SmallVector<llvm::OpenMPIRBuilder::DependData> &Dependencies,
2499-
TargetDataRTArgs &RTArgs, bool HasNoWait);
2499+
const TargetDataRTArgs &RTArgs, bool HasNoWait);
25002500

25012501
/// Emit the arguments to be passed to the runtime library based on the
25022502
/// arrays of base pointers, pointers, sizes, map types, and mappers. If

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 81 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -7150,27 +7150,52 @@ static Expected<Function *> createOutlinedFunction(
71507150
ValueReplacementMap);
71517151
return Func;
71527152
}
7153+
/// Given a task descriptor, TaskWithPrivates, return the pointer to the block
7154+
/// of pointers contain shared data between the parent task and the created
7155+
/// task.
7156+
///
7157+
static LoadInst *loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder,
7158+
IRBuilderBase &Builder,
7159+
Value *TaskWithPrivates,
7160+
Type *TaskWithPrivatesTy) {
71537161

7162+
Type *TaskTy = OMPIRBuilder.Task;
7163+
LLVMContext &Ctx = Builder.getContext();
7164+
Value *TaskT =
7165+
Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
7166+
Value *Shareds = TaskT;
7167+
// TaskWithPrivatesTy can be one of the following
7168+
// 1. %struct.task_with_privates = type { %struct.kmp_task_ompbuilder_t,
7169+
// %struct.privates }
7170+
// 2. %struct.kmp_task_ompbuilder_t ;; This is simply TaskTy
7171+
//
7172+
// In the former case, that is when TaskWithPrivatesTy != TaskTy,
7173+
// its first member has to be the task descriptor. TaskTy is the type of the
7174+
// task descriptor. TaskT is the pointer to the task descriptor. Loading the
7175+
// first member of TaskT, gives us the pointer to shared data.
7176+
if (TaskWithPrivatesTy != TaskTy)
7177+
Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
7178+
return Builder.CreateLoad(PointerType::getUnqual(Ctx), Shareds);
7179+
}
71547180
/// Create an entry point for a target task with the following.
71557181
/// It'll have the following signature
71567182
/// void @.omp_target_task_proxy_func(i32 %thread.id, ptr %task)
71577183
/// This function is called from emitTargetTask once the
71587184
/// code to launch the target kernel has been outlined already.
7185+
/// NumOffloadingArrays is the number of offloading arrays that we need to copy
7186+
/// into the task structure so that the deferred target task can access this
7187+
/// data even after the stack frame of the generating task has been rolled
7188+
/// back. Offloading arrays contain base pointers, pointers, sizes etc
7189+
/// of the data that the target kernel will access. These in effect are the
7190+
/// non-empty arrays of pointers held by OpenMPIRBuilder::TargetDataRTArgs.
71597191
static Function *emitTargetTaskProxyFunction(
71607192
OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI,
71617193
StructType *PrivatesTy, StructType *TaskWithPrivatesTy,
71627194
const size_t NumOffloadingArrays, const int SharedArgsOperandNo) {
71637195

7164-
// NumOffloadingArrays is the number of offloading arrays that we need to copy
7165-
// into the task structure so that the deferred target task can access this
7166-
// data even after the stack frame of the generating task has been rolled
7167-
// back. Offloading arrays contain base pointers, pointers, sizes etc
7168-
// of the data that the target kernel will access. In other words, the
7169-
// arrays of pointers held by OpenMPIRBuilder::TargetDataRTArgs
7170-
// The number of arrays and the size of each array depends on the specifics of
7171-
// the target call. These arrays are copied into a struct whose type is
7172-
// PrivatesTy. So, if NumOffloadingArrays is non-zero, PrivatesTy better
7173-
// not be nullptr
7196+
// if NumOffloadingArrays is non-zero, PrivatesTy better not be nullptr
7197+
// This is because PrivatesTy is the type of the structure in which
7198+
// we pass the offloading arrays to the deferred target task.
71747199
assert((!NumOffloadingArrays || PrivatesTy) &&
71757200
"PrivatesTy cannot be nullptr when there are offloadingArrays"
71767201
"to privatize");
@@ -7214,18 +7239,17 @@ static Function *emitTargetTaskProxyFunction(
72147239
auto ProxyFn = Function::Create(ProxyFnTy, GlobalValue::InternalLinkage,
72157240
".omp_target_task_proxy_func",
72167241
Builder.GetInsertBlock()->getModule());
7217-
ProxyFn->getArg(0)->setName("thread.id");
7218-
ProxyFn->getArg(1)->setName("task");
7242+
Value *ThreadId = ProxyFn->getArg(0);
7243+
Value *TaskWithPrivates = ProxyFn->getArg(1);
7244+
ThreadId->setName("thread.id");
7245+
TaskWithPrivates->setName("task");
72197246

72207247
bool HasShareds = SharedArgsOperandNo > 0;
72217248
bool HasOffloadingArrays = NumOffloadingArrays > 0;
72227249
BasicBlock *EntryBB =
72237250
BasicBlock::Create(Builder.getContext(), "entry", ProxyFn);
72247251
Builder.SetInsertPoint(EntryBB);
72257252

7226-
Value *ThreadId = ProxyFn->getArg(0);
7227-
Value *TaskWithPrivates = ProxyFn->getArg(1);
7228-
72297253
SmallVector<Value *> KernelLaunchArgs;
72307254
KernelLaunchArgs.reserve(StaleCI->arg_size());
72317255
KernelLaunchArgs.push_back(ThreadId);
@@ -7255,23 +7279,8 @@ static Function *emitTargetTaskProxyFunction(
72557279
Value *SharedsSize =
72567280
Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
72577281

7258-
Value *TaskT =
7259-
Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
7260-
Value *Shareds = TaskT;
7261-
// TaskWithPrivatesTy can be
7262-
// %struct.task_with_privates = type { %struct.kmp_task_ompbuilder_t,
7263-
// %struct.privates }
7264-
// OR
7265-
// %struct.kmp_task_ompbuilder_t ;; This is simply TaskTy
7266-
// In the former case, that is when TaskWithPrivatesTy is not the same as
7267-
// TaskTy, then its first member has to be the task descriptor. TaskTy is
7268-
// the type of the task descriptor. TaskT is the pointer to the task
7269-
// descriptor. Loading the first member of TaskT, gives us the pointer to
7270-
// shared data.
7271-
if (TaskWithPrivatesTy != TaskTy)
7272-
Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
7273-
LoadInst *LoadShared =
7274-
Builder.CreateLoad(PointerType::getUnqual(Ctx), Shareds);
7282+
LoadInst *LoadShared = loadSharedDataFromTaskDescriptor(
7283+
OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
72757284

72767285
Builder.CreateMemCpy(
72777286
NewArgStructAlloca, NewArgStructAlloca->getAlign(), LoadShared,
@@ -7282,7 +7291,16 @@ static Function *emitTargetTaskProxyFunction(
72827291
Builder.CreateRetVoid();
72837292
return ProxyFn;
72847293
}
7294+
static Type *getOffloadingArrayType(Value *V) {
7295+
7296+
if (auto *GEP = dyn_cast<GetElementPtrInst>(V))
7297+
return GEP->getSourceElementType();
7298+
if (auto *Alloca = dyn_cast<AllocaInst>(V))
7299+
return Alloca->getAllocatedType();
72857300

7301+
llvm_unreachable("Unhandled Instruction type");
7302+
return nullptr;
7303+
}
72867304
// This function returns a struct that has at most two members.
72877305
// The first member is always %struct.kmp_task_ompbuilder_t, that is the task
72887306
// descriptor. The second member, if needed, is a struct containing arrays
@@ -7298,27 +7316,24 @@ static Function *emitTargetTaskProxyFunction(
72987316
// If there aren't any offloading arrays to pass to the target kernel,
72997317
// %struct.kmp_task_ompbuilder_t is returned.
73007318
static StructType *
7301-
createTaskWithPrivatesTy(Type *Task,
7319+
createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder,
73027320
ArrayRef<Value *> OffloadingArraysToPrivatize) {
73037321

73047322
if (OffloadingArraysToPrivatize.empty())
7305-
return static_cast<StructType *>(Task);
7323+
return OMPIRBuilder.Task;
73067324

73077325
SmallVector<Type *, 4> StructFieldTypes;
7308-
for (auto &V : OffloadingArraysToPrivatize) {
7326+
for (Value *V : OffloadingArraysToPrivatize) {
73097327
assert(V->getType()->isPointerTy() &&
73107328
"Expected pointer to array to privatize. Got a non-pointer value "
73117329
"instead");
7312-
if (auto *GEP = dyn_cast<GetElementPtrInst>(V))
7313-
StructFieldTypes.push_back(GEP->getSourceElementType());
7314-
else if (auto *Alloca = dyn_cast<AllocaInst>(V))
7315-
StructFieldTypes.push_back(Alloca->getAllocatedType());
7316-
else
7317-
llvm_unreachable("Unhandled Instruction type");
7330+
Type *ArrayTy = getOffloadingArrayType(V);
7331+
assert(ArrayTy && "ArrayType cannot be nullptr");
7332+
StructFieldTypes.push_back(ArrayTy);
73187333
}
73197334
StructType *PrivatesStructTy =
73207335
StructType::create(StructFieldTypes, "struct.privates");
7321-
return StructType::create({Task, PrivatesStructTy},
7336+
return StructType::create({OMPIRBuilder.Task, PrivatesStructTy},
73227337
"struct.task_with_privates");
73237338
}
73247339
static Error emitTargetOutlinedFunction(
@@ -7346,7 +7361,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
73467361
TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc,
73477362
OpenMPIRBuilder::InsertPointTy AllocaIP,
73487363
const SmallVector<llvm::OpenMPIRBuilder::DependData> &Dependencies,
7349-
TargetDataRTArgs &RTArgs, bool HasNoWait) {
7364+
const TargetDataRTArgs &RTArgs, bool HasNoWait) {
73507365

73517366
// The following explains the code-gen scenario for the `target` directive. A
73527367
// similar scneario is followed for other device-related directives (e.g.
@@ -7393,11 +7408,11 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
73937408
//
73947409
// That is we create the following
73957410
// struct task_with_privates {
7396-
// struct kmp_task_ompbuilder_t;
7411+
// struct kmp_task_ompbuilder_t task_struct;
73977412
// struct privates {
7398-
// [2 x ptr], ; baseptrs
7399-
// [2 x ptr] ; ptrs
7400-
// [2 x i64] ; sizes
7413+
// [2 x ptr] ; baseptrs
7414+
// [2 x ptr] ; ptrs
7415+
// [2 x i64] ; sizes
74017416
// }
74027417
// }
74037418
// void user_code_that_offloads(...) {
@@ -7415,12 +7430,13 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
74157430
// sizeof(structArg),
74167431
// @.omp_target_task_proxy_func,
74177432
// ...)
7418-
// memcpy(target_task->shareds, %structArg, sizeof(structArg))
7419-
// memcpy(target_task->privates->baseptrs,
7433+
// memcpy(target_task_with_privates->task_struct->shareds, %structArg,
7434+
// sizeof(structArg))
7435+
// memcpy(target_task_with_privates->privates->baseptrs,
74207436
// offload_baseptrs, sizeof(offload_baseptrs)
7421-
// memcpy(target_task->privates->ptrs,
7437+
// memcpy(target_task_with_privates->privates->ptrs,
74227438
// offload_ptrs, sizeof(offload_ptrs)
7423-
// memcpy(target_task->privates->sizes,
7439+
// memcpy(target_task_with_privates->privates->sizes,
74247440
// offload_sizes, sizeof(offload_sizes)
74257441
// dependencies_array = ...
74267442
// ;; if nowait not present
@@ -7451,7 +7467,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
74517467
// that of the kernel_launch function.
74527468
//
74537469
// kernel_launch_function is generated by emitKernelLaunch and has the
7454-
// always_inline attribute. For this example, it'll look like so
7470+
// always_inline attribute. For this example, it'll look like so:
74557471
// void kernel_launch_function(%thread_id, %offload_baseptrs, %offload_ptrs,
74567472
// %offload_sizes, %structArg) alwaysinline {
74577473
// %kernel_args = alloca %struct.__tgt_kernel_arguments, align 8
@@ -7506,19 +7522,20 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
75067522
/*IsFinished=*/true);
75077523

75087524
SmallVector<Value *, 2> OffloadingArraysToPrivatize;
7509-
if (DeviceID && HasNoWait) {
7525+
bool NeedsTargetTask = HasNoWait && DeviceID;
7526+
if (NeedsTargetTask) {
75107527
for (auto *V :
75117528
{RTArgs.BasePointersArray, RTArgs.PointersArray, RTArgs.MappersArray,
75127529
RTArgs.MapNamesArray, RTArgs.MapTypesArray, RTArgs.MapTypesArrayEnd,
75137530
RTArgs.SizesArray}) {
7514-
if (V && !isa<ConstantPointerNull>(V) && !isa<GlobalVariable>(V)) {
7531+
if (V && !isa<ConstantPointerNull, GlobalVariable>(V)) {
75157532
OffloadingArraysToPrivatize.push_back(V);
75167533
OI.ExcludeArgsFromAggregate.push_back(V);
75177534
}
75187535
}
75197536
}
7520-
OI.PostOutlineCB = [this, ToBeDeleted, Dependencies, HasNoWait, DeviceID,
7521-
OffloadingArraysToPrivatize](
7537+
OI.PostOutlineCB = [this, ToBeDeleted, Dependencies, NeedsTargetTask,
7538+
DeviceID, OffloadingArraysToPrivatize](
75227539
Function &OutlinedFn) mutable {
75237540
assert(OutlinedFn.hasOneUse() &&
75247541
"there must be a single user for the outlined function");
@@ -7527,7 +7544,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
75277544

75287545
// The first argument of StaleCI is always the thread id.
75297546
// The next few arguments are the pointers to offloading arrays
7530-
// if any. (See OffloadingArraysToPrivatize)
7547+
// if any. (see OffloadingArraysToPrivatize)
75317548
// Finally, all other local values that are live-in into the outlined region
75327549
// end up in a structure whose pointer is passed as the last argument. This
75337550
// piece of data is passed in the "shared" field of the task structure. So,
@@ -7545,10 +7562,10 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
75457562
HasShareds ? OffloadingArraysToPrivatize.size() + 1 : 0;
75467563

75477564
StructType *TaskWithPrivatesTy =
7548-
createTaskWithPrivatesTy(Task, OffloadingArraysToPrivatize);
7565+
createTaskWithPrivatesTy(*this, OffloadingArraysToPrivatize);
75497566
StructType *PrivatesTy = nullptr;
75507567

7551-
if (OffloadingArraysToPrivatize.size())
7568+
if (!OffloadingArraysToPrivatize.empty())
75527569
PrivatesTy =
75537570
static_cast<StructType *>(TaskWithPrivatesTy->getElementType(1));
75547571

@@ -7572,7 +7589,6 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
75727589
// If `HasNoWait == true`, we call @__kmpc_omp_target_task_alloc to provide
75737590
// the DeviceID to the deferred task and also since
75747591
// @__kmpc_omp_target_task_alloc creates an untied/async task.
7575-
bool NeedsTargetTask = HasNoWait && DeviceID;
75767592
Function *TaskAllocFn =
75777593
!NeedsTargetTask
75787594
? getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
@@ -7587,7 +7603,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
75877603
// Tasksize refers to the size in bytes of kmp_task_t data structure
75887604
// plus any other data to be passed to the target task, if any, which
75897605
// is packed into a struct. kmp_task_t and the struct so created are
7590-
// packed into a wrapper struct whose type is TaskWithPrivatesTy
7606+
// packed into a wrapper struct whose type is TaskWithPrivatesTy.
75917607
Value *TaskSize = Builder.getInt64(
75927608
M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
75937609

@@ -7638,28 +7654,17 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
76387654
Align Alignment = TaskData->getPointerAlignment(M.getDataLayout());
76397655
if (HasShareds) {
76407656
Value *Shareds = StaleCI->getArgOperand(SharedArgOperandNo);
7641-
Value *TaskT = Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 0);
7642-
Value *TaskSharedsPtr = TaskT;
7643-
if (TaskWithPrivatesTy != Task) {
7644-
TaskSharedsPtr = Builder.CreateStructGEP(Task, TaskT, 0);
7645-
}
7646-
Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskSharedsPtr);
7647-
7657+
Value *TaskShareds = loadSharedDataFromTaskDescriptor(
7658+
*this, Builder, TaskData, TaskWithPrivatesTy);
76487659
Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
76497660
SharedsSize);
76507661
}
7651-
if (OffloadingArraysToPrivatize.size()) {
7662+
if (!OffloadingArraysToPrivatize.empty()) {
76527663
Value *Privates =
76537664
Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
76547665
for (unsigned int i = 0; i < OffloadingArraysToPrivatize.size(); ++i) {
76557666
Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
7656-
Type *ArrayType = nullptr;
7657-
if (auto *GEP = dyn_cast<GetElementPtrInst>(PtrToPrivatize))
7658-
ArrayType = GEP->getSourceElementType();
7659-
else if (auto *Alloca = dyn_cast<AllocaInst>(PtrToPrivatize))
7660-
ArrayType = Alloca->getAllocatedType();
7661-
else
7662-
llvm_unreachable("Unhandled Instruction type");
7667+
Type *ArrayType = getOffloadingArrayType(PtrToPrivatize);
76637668
assert(ArrayType && "ArrayType cannot be nullptr");
76647669

76657670
Type *ElementType = PrivatesTy->getElementType(i);
@@ -7817,7 +7822,6 @@ static void emitTargetCall(
78177822
// Arguments that are intended to be directly forwarded to an
78187823
// emitKernelLaunch call are pased as nullptr, since
78197824
// OutlinedFnID=nullptr results in that call not being done.
7820-
// OpenMPIRBuilder::TargetDataInfo Info;
78217825
OpenMPIRBuilder::TargetDataRTArgs EmptyRTArgs;
78227826
return OMPBuilder.emitTargetTask(TaskBodyCB, /*DeviceID=*/nullptr,
78237827
/*RTLoc=*/nullptr, AllocaIP,

0 commit comments

Comments
 (0)