@@ -7150,27 +7150,52 @@ static Expected<Function *> createOutlinedFunction(
7150
7150
ValueReplacementMap);
7151
7151
return Func;
7152
7152
}
7153
+ // / Given a task descriptor, TaskWithPrivates, return the pointer to the block
7154
+ // / of pointers contain shared data between the parent task and the created
7155
+ // / task.
7156
+ // /
7157
+ static LoadInst *loadSharedDataFromTaskDescriptor (OpenMPIRBuilder &OMPIRBuilder,
7158
+ IRBuilderBase &Builder,
7159
+ Value *TaskWithPrivates,
7160
+ Type *TaskWithPrivatesTy) {
7153
7161
7162
+ Type *TaskTy = OMPIRBuilder.Task ;
7163
+ LLVMContext &Ctx = Builder.getContext ();
7164
+ Value *TaskT =
7165
+ Builder.CreateStructGEP (TaskWithPrivatesTy, TaskWithPrivates, 0 );
7166
+ Value *Shareds = TaskT;
7167
+ // TaskWithPrivatesTy can be one of the following
7168
+ // 1. %struct.task_with_privates = type { %struct.kmp_task_ompbuilder_t,
7169
+ // %struct.privates }
7170
+ // 2. %struct.kmp_task_ompbuilder_t ;; This is simply TaskTy
7171
+ //
7172
+ // In the former case, that is when TaskWithPrivatesTy != TaskTy,
7173
+ // its first member has to be the task descriptor. TaskTy is the type of the
7174
+ // task descriptor. TaskT is the pointer to the task descriptor. Loading the
7175
+ // first member of TaskT, gives us the pointer to shared data.
7176
+ if (TaskWithPrivatesTy != TaskTy)
7177
+ Shareds = Builder.CreateStructGEP (TaskTy, TaskT, 0 );
7178
+ return Builder.CreateLoad (PointerType::getUnqual (Ctx), Shareds);
7179
+ }
7154
7180
// / Create an entry point for a target task with the following.
7155
7181
// / It'll have the following signature
7156
7182
// / void @.omp_target_task_proxy_func(i32 %thread.id, ptr %task)
7157
7183
// / This function is called from emitTargetTask once the
7158
7184
// / code to launch the target kernel has been outlined already.
7185
+ // / NumOffloadingArrays is the number of offloading arrays that we need to copy
7186
+ // / into the task structure so that the deferred target task can access this
7187
+ // / data even after the stack frame of the generating task has been rolled
7188
+ // / back. Offloading arrays contain base pointers, pointers, sizes etc
7189
+ // / of the data that the target kernel will access. These in effect are the
7190
+ // / non-empty arrays of pointers held by OpenMPIRBuilder::TargetDataRTArgs.
7159
7191
static Function *emitTargetTaskProxyFunction (
7160
7192
OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI,
7161
7193
StructType *PrivatesTy, StructType *TaskWithPrivatesTy,
7162
7194
const size_t NumOffloadingArrays, const int SharedArgsOperandNo) {
7163
7195
7164
- // NumOffloadingArrays is the number of offloading arrays that we need to copy
7165
- // into the task structure so that the deferred target task can access this
7166
- // data even after the stack frame of the generating task has been rolled
7167
- // back. Offloading arrays contain base pointers, pointers, sizes etc
7168
- // of the data that the target kernel will access. In other words, the
7169
- // arrays of pointers held by OpenMPIRBuilder::TargetDataRTArgs
7170
- // The number of arrays and the size of each array depends on the specifics of
7171
- // the target call. These arrays are copied into a struct whose type is
7172
- // PrivatesTy. So, if NumOffloadingArrays is non-zero, PrivatesTy better
7173
- // not be nullptr
7196
+ // if NumOffloadingArrays is non-zero, PrivatesTy better not be nullptr
7197
+ // This is because PrivatesTy is the type of the structure in which
7198
+ // we pass the offloading arrays to the deferred target task.
7174
7199
assert ((!NumOffloadingArrays || PrivatesTy) &&
7175
7200
" PrivatesTy cannot be nullptr when there are offloadingArrays"
7176
7201
" to privatize" );
@@ -7214,18 +7239,17 @@ static Function *emitTargetTaskProxyFunction(
7214
7239
auto ProxyFn = Function::Create (ProxyFnTy, GlobalValue::InternalLinkage,
7215
7240
" .omp_target_task_proxy_func" ,
7216
7241
Builder.GetInsertBlock ()->getModule ());
7217
- ProxyFn->getArg (0 )->setName (" thread.id" );
7218
- ProxyFn->getArg (1 )->setName (" task" );
7242
+ Value *ThreadId = ProxyFn->getArg (0 );
7243
+ Value *TaskWithPrivates = ProxyFn->getArg (1 );
7244
+ ThreadId->setName (" thread.id" );
7245
+ TaskWithPrivates->setName (" task" );
7219
7246
7220
7247
bool HasShareds = SharedArgsOperandNo > 0 ;
7221
7248
bool HasOffloadingArrays = NumOffloadingArrays > 0 ;
7222
7249
BasicBlock *EntryBB =
7223
7250
BasicBlock::Create (Builder.getContext (), " entry" , ProxyFn);
7224
7251
Builder.SetInsertPoint (EntryBB);
7225
7252
7226
- Value *ThreadId = ProxyFn->getArg (0 );
7227
- Value *TaskWithPrivates = ProxyFn->getArg (1 );
7228
-
7229
7253
SmallVector<Value *> KernelLaunchArgs;
7230
7254
KernelLaunchArgs.reserve (StaleCI->arg_size ());
7231
7255
KernelLaunchArgs.push_back (ThreadId);
@@ -7255,23 +7279,8 @@ static Function *emitTargetTaskProxyFunction(
7255
7279
Value *SharedsSize =
7256
7280
Builder.getInt64 (M.getDataLayout ().getTypeStoreSize (ArgStructType));
7257
7281
7258
- Value *TaskT =
7259
- Builder.CreateStructGEP (TaskWithPrivatesTy, TaskWithPrivates, 0 );
7260
- Value *Shareds = TaskT;
7261
- // TaskWithPrivatesTy can be
7262
- // %struct.task_with_privates = type { %struct.kmp_task_ompbuilder_t,
7263
- // %struct.privates }
7264
- // OR
7265
- // %struct.kmp_task_ompbuilder_t ;; This is simply TaskTy
7266
- // In the former case, that is when TaskWithPrivatesTy is not the same as
7267
- // TaskTy, then its first member has to be the task descriptor. TaskTy is
7268
- // the type of the task descriptor. TaskT is the pointer to the task
7269
- // descriptor. Loading the first member of TaskT, gives us the pointer to
7270
- // shared data.
7271
- if (TaskWithPrivatesTy != TaskTy)
7272
- Shareds = Builder.CreateStructGEP (TaskTy, TaskT, 0 );
7273
- LoadInst *LoadShared =
7274
- Builder.CreateLoad (PointerType::getUnqual (Ctx), Shareds);
7282
+ LoadInst *LoadShared = loadSharedDataFromTaskDescriptor (
7283
+ OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
7275
7284
7276
7285
Builder.CreateMemCpy (
7277
7286
NewArgStructAlloca, NewArgStructAlloca->getAlign (), LoadShared,
@@ -7282,7 +7291,16 @@ static Function *emitTargetTaskProxyFunction(
7282
7291
Builder.CreateRetVoid ();
7283
7292
return ProxyFn;
7284
7293
}
7294
+ static Type *getOffloadingArrayType (Value *V) {
7295
+
7296
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(V))
7297
+ return GEP->getSourceElementType ();
7298
+ if (auto *Alloca = dyn_cast<AllocaInst>(V))
7299
+ return Alloca->getAllocatedType ();
7285
7300
7301
+ llvm_unreachable (" Unhandled Instruction type" );
7302
+ return nullptr ;
7303
+ }
7286
7304
// This function returns a struct that has at most two members.
7287
7305
// The first member is always %struct.kmp_task_ompbuilder_t, that is the task
7288
7306
// descriptor. The second member, if needed, is a struct containing arrays
@@ -7298,27 +7316,24 @@ static Function *emitTargetTaskProxyFunction(
7298
7316
// If there aren't any offloading arrays to pass to the target kernel,
7299
7317
// %struct.kmp_task_ompbuilder_t is returned.
7300
7318
static StructType *
7301
- createTaskWithPrivatesTy (Type *Task ,
7319
+ createTaskWithPrivatesTy (OpenMPIRBuilder &OMPIRBuilder ,
7302
7320
ArrayRef<Value *> OffloadingArraysToPrivatize) {
7303
7321
7304
7322
if (OffloadingArraysToPrivatize.empty ())
7305
- return static_cast <StructType *>( Task) ;
7323
+ return OMPIRBuilder. Task ;
7306
7324
7307
7325
SmallVector<Type *, 4 > StructFieldTypes;
7308
- for (auto & V : OffloadingArraysToPrivatize) {
7326
+ for (Value * V : OffloadingArraysToPrivatize) {
7309
7327
assert (V->getType ()->isPointerTy () &&
7310
7328
" Expected pointer to array to privatize. Got a non-pointer value "
7311
7329
" instead" );
7312
- if (auto *GEP = dyn_cast<GetElementPtrInst>(V))
7313
- StructFieldTypes.push_back (GEP->getSourceElementType ());
7314
- else if (auto *Alloca = dyn_cast<AllocaInst>(V))
7315
- StructFieldTypes.push_back (Alloca->getAllocatedType ());
7316
- else
7317
- llvm_unreachable (" Unhandled Instruction type" );
7330
+ Type *ArrayTy = getOffloadingArrayType (V);
7331
+ assert (ArrayTy && " ArrayType cannot be nullptr" );
7332
+ StructFieldTypes.push_back (ArrayTy);
7318
7333
}
7319
7334
StructType *PrivatesStructTy =
7320
7335
StructType::create (StructFieldTypes, " struct.privates" );
7321
- return StructType::create ({Task, PrivatesStructTy},
7336
+ return StructType::create ({OMPIRBuilder. Task , PrivatesStructTy},
7322
7337
" struct.task_with_privates" );
7323
7338
}
7324
7339
static Error emitTargetOutlinedFunction (
@@ -7346,7 +7361,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
7346
7361
TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc,
7347
7362
OpenMPIRBuilder::InsertPointTy AllocaIP,
7348
7363
const SmallVector<llvm::OpenMPIRBuilder::DependData> &Dependencies,
7349
- TargetDataRTArgs &RTArgs, bool HasNoWait) {
7364
+ const TargetDataRTArgs &RTArgs, bool HasNoWait) {
7350
7365
7351
7366
// The following explains the code-gen scenario for the `target` directive. A
7352
7367
// similar scneario is followed for other device-related directives (e.g.
@@ -7393,11 +7408,11 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
7393
7408
//
7394
7409
// That is we create the following
7395
7410
// struct task_with_privates {
7396
- // struct kmp_task_ompbuilder_t;
7411
+ // struct kmp_task_ompbuilder_t task_struct ;
7397
7412
// struct privates {
7398
- // [2 x ptr], ; baseptrs
7399
- // [2 x ptr] ; ptrs
7400
- // [2 x i64] ; sizes
7413
+ // [2 x ptr] ; baseptrs
7414
+ // [2 x ptr] ; ptrs
7415
+ // [2 x i64] ; sizes
7401
7416
// }
7402
7417
// }
7403
7418
// void user_code_that_offloads(...) {
@@ -7415,12 +7430,13 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
7415
7430
// sizeof(structArg),
7416
7431
// @.omp_target_task_proxy_func,
7417
7432
// ...)
7418
- // memcpy(target_task->shareds, %structArg, sizeof(structArg))
7419
- // memcpy(target_task->privates->baseptrs,
7433
+ // memcpy(target_task_with_privates->task_struct->shareds, %structArg,
7434
+ // sizeof(structArg))
7435
+ // memcpy(target_task_with_privates->privates->baseptrs,
7420
7436
// offload_baseptrs, sizeof(offload_baseptrs)
7421
- // memcpy(target_task ->privates->ptrs,
7437
+ // memcpy(target_task_with_privates ->privates->ptrs,
7422
7438
// offload_ptrs, sizeof(offload_ptrs)
7423
- // memcpy(target_task ->privates->sizes,
7439
+ // memcpy(target_task_with_privates ->privates->sizes,
7424
7440
// offload_sizes, sizeof(offload_sizes)
7425
7441
// dependencies_array = ...
7426
7442
// ;; if nowait not present
@@ -7451,7 +7467,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
7451
7467
// that of the kernel_launch function.
7452
7468
//
7453
7469
// kernel_launch_function is generated by emitKernelLaunch and has the
7454
- // always_inline attribute. For this example, it'll look like so
7470
+ // always_inline attribute. For this example, it'll look like so:
7455
7471
// void kernel_launch_function(%thread_id, %offload_baseptrs, %offload_ptrs,
7456
7472
// %offload_sizes, %structArg) alwaysinline {
7457
7473
// %kernel_args = alloca %struct.__tgt_kernel_arguments, align 8
@@ -7506,19 +7522,20 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
7506
7522
/* IsFinished=*/ true );
7507
7523
7508
7524
SmallVector<Value *, 2 > OffloadingArraysToPrivatize;
7509
- if (DeviceID && HasNoWait) {
7525
+ bool NeedsTargetTask = HasNoWait && DeviceID;
7526
+ if (NeedsTargetTask) {
7510
7527
for (auto *V :
7511
7528
{RTArgs.BasePointersArray , RTArgs.PointersArray , RTArgs.MappersArray ,
7512
7529
RTArgs.MapNamesArray , RTArgs.MapTypesArray , RTArgs.MapTypesArrayEnd ,
7513
7530
RTArgs.SizesArray }) {
7514
- if (V && !isa<ConstantPointerNull>(V) && !isa< GlobalVariable>(V)) {
7531
+ if (V && !isa<ConstantPointerNull, GlobalVariable>(V)) {
7515
7532
OffloadingArraysToPrivatize.push_back (V);
7516
7533
OI.ExcludeArgsFromAggregate .push_back (V);
7517
7534
}
7518
7535
}
7519
7536
}
7520
- OI.PostOutlineCB = [this , ToBeDeleted, Dependencies, HasNoWait, DeviceID ,
7521
- OffloadingArraysToPrivatize](
7537
+ OI.PostOutlineCB = [this , ToBeDeleted, Dependencies, NeedsTargetTask ,
7538
+ DeviceID, OffloadingArraysToPrivatize](
7522
7539
Function &OutlinedFn) mutable {
7523
7540
assert (OutlinedFn.hasOneUse () &&
7524
7541
" there must be a single user for the outlined function" );
@@ -7527,7 +7544,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
7527
7544
7528
7545
// The first argument of StaleCI is always the thread id.
7529
7546
// The next few arguments are the pointers to offloading arrays
7530
- // if any. (See OffloadingArraysToPrivatize)
7547
+ // if any. (see OffloadingArraysToPrivatize)
7531
7548
// Finally, all other local values that are live-in into the outlined region
7532
7549
// end up in a structure whose pointer is passed as the last argument. This
7533
7550
// piece of data is passed in the "shared" field of the task structure. So,
@@ -7545,10 +7562,10 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
7545
7562
HasShareds ? OffloadingArraysToPrivatize.size () + 1 : 0 ;
7546
7563
7547
7564
StructType *TaskWithPrivatesTy =
7548
- createTaskWithPrivatesTy (Task , OffloadingArraysToPrivatize);
7565
+ createTaskWithPrivatesTy (* this , OffloadingArraysToPrivatize);
7549
7566
StructType *PrivatesTy = nullptr ;
7550
7567
7551
- if (OffloadingArraysToPrivatize.size ())
7568
+ if (! OffloadingArraysToPrivatize.empty ())
7552
7569
PrivatesTy =
7553
7570
static_cast <StructType *>(TaskWithPrivatesTy->getElementType (1 ));
7554
7571
@@ -7572,7 +7589,6 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
7572
7589
// If `HasNoWait == true`, we call @__kmpc_omp_target_task_alloc to provide
7573
7590
// the DeviceID to the deferred task and also since
7574
7591
// @__kmpc_omp_target_task_alloc creates an untied/async task.
7575
- bool NeedsTargetTask = HasNoWait && DeviceID;
7576
7592
Function *TaskAllocFn =
7577
7593
!NeedsTargetTask
7578
7594
? getOrCreateRuntimeFunctionPtr (OMPRTL___kmpc_omp_task_alloc)
@@ -7587,7 +7603,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
7587
7603
// Tasksize refers to the size in bytes of kmp_task_t data structure
7588
7604
// plus any other data to be passed to the target task, if any, which
7589
7605
// is packed into a struct. kmp_task_t and the struct so created are
7590
- // packed into a wrapper struct whose type is TaskWithPrivatesTy
7606
+ // packed into a wrapper struct whose type is TaskWithPrivatesTy.
7591
7607
Value *TaskSize = Builder.getInt64 (
7592
7608
M.getDataLayout ().getTypeStoreSize (TaskWithPrivatesTy));
7593
7609
@@ -7638,28 +7654,17 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
7638
7654
Align Alignment = TaskData->getPointerAlignment (M.getDataLayout ());
7639
7655
if (HasShareds) {
7640
7656
Value *Shareds = StaleCI->getArgOperand (SharedArgOperandNo);
7641
- Value *TaskT = Builder.CreateStructGEP (TaskWithPrivatesTy, TaskData, 0 );
7642
- Value *TaskSharedsPtr = TaskT;
7643
- if (TaskWithPrivatesTy != Task) {
7644
- TaskSharedsPtr = Builder.CreateStructGEP (Task, TaskT, 0 );
7645
- }
7646
- Value *TaskShareds = Builder.CreateLoad (VoidPtr, TaskSharedsPtr);
7647
-
7657
+ Value *TaskShareds = loadSharedDataFromTaskDescriptor (
7658
+ *this , Builder, TaskData, TaskWithPrivatesTy);
7648
7659
Builder.CreateMemCpy (TaskShareds, Alignment, Shareds, Alignment,
7649
7660
SharedsSize);
7650
7661
}
7651
- if (OffloadingArraysToPrivatize.size ()) {
7662
+ if (! OffloadingArraysToPrivatize.empty ()) {
7652
7663
Value *Privates =
7653
7664
Builder.CreateStructGEP (TaskWithPrivatesTy, TaskData, 1 );
7654
7665
for (unsigned int i = 0 ; i < OffloadingArraysToPrivatize.size (); ++i) {
7655
7666
Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
7656
- Type *ArrayType = nullptr ;
7657
- if (auto *GEP = dyn_cast<GetElementPtrInst>(PtrToPrivatize))
7658
- ArrayType = GEP->getSourceElementType ();
7659
- else if (auto *Alloca = dyn_cast<AllocaInst>(PtrToPrivatize))
7660
- ArrayType = Alloca->getAllocatedType ();
7661
- else
7662
- llvm_unreachable (" Unhandled Instruction type" );
7667
+ Type *ArrayType = getOffloadingArrayType (PtrToPrivatize);
7663
7668
assert (ArrayType && " ArrayType cannot be nullptr" );
7664
7669
7665
7670
Type *ElementType = PrivatesTy->getElementType (i);
@@ -7817,7 +7822,6 @@ static void emitTargetCall(
7817
7822
// Arguments that are intended to be directly forwarded to an
7818
7823
// emitKernelLaunch call are pased as nullptr, since
7819
7824
// OutlinedFnID=nullptr results in that call not being done.
7820
- // OpenMPIRBuilder::TargetDataInfo Info;
7821
7825
OpenMPIRBuilder::TargetDataRTArgs EmptyRTArgs;
7822
7826
return OMPBuilder.emitTargetTask (TaskBodyCB, /* DeviceID=*/ nullptr ,
7823
7827
/* RTLoc=*/ nullptr , AllocaIP,
0 commit comments