12
12
13
13
#include " AMDGPU.h"
14
14
#include " GCNSubtarget.h"
15
+ #include " Utils/AMDGPUBaseInfo.h"
15
16
#include " llvm/CodeGen/TargetPassConfig.h"
16
17
#include " llvm/IR/IntrinsicsAMDGPU.h"
17
18
#include " llvm/IR/IntrinsicsR600.h"
22
23
23
24
using namespace llvm ;
24
25
26
+ #define AMDGPU_ATTRIBUTE (Name, Str ) Name##_POS,
27
+
28
+ enum ImplicitArgumentPositions {
29
+ #include " AMDGPUAttributes.def"
30
+ LAST_ARG_POS
31
+ };
32
+
33
+ #define AMDGPU_ATTRIBUTE (Name, Str ) Name = 1 << Name##_POS,
34
+
25
35
enum ImplicitArgumentMask {
26
36
NOT_IMPLICIT_INPUT = 0 ,
27
-
28
- // SGPRs
29
- DISPATCH_PTR = 1 << 0 ,
30
- QUEUE_PTR = 1 << 1 ,
31
- DISPATCH_ID = 1 << 2 ,
32
- IMPLICIT_ARG_PTR = 1 << 3 ,
33
- WORKGROUP_ID_X = 1 << 4 ,
34
- WORKGROUP_ID_Y = 1 << 5 ,
35
- WORKGROUP_ID_Z = 1 << 6 ,
36
-
37
- // VGPRS:
38
- WORKITEM_ID_X = 1 << 7 ,
39
- WORKITEM_ID_Y = 1 << 8 ,
40
- WORKITEM_ID_Z = 1 << 9 ,
41
- ALL_ARGUMENT_MASK = (1 << 10 ) - 1
37
+ #include " AMDGPUAttributes.def"
38
+ ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
42
39
};
43
40
41
+ #define AMDGPU_ATTRIBUTE (Name, Str ) {Name, Str},
44
42
static constexpr std::pair<ImplicitArgumentMask,
45
43
StringLiteral> ImplicitAttrs[] = {
46
- {DISPATCH_PTR, " amdgpu-no-dispatch-ptr" },
47
- {QUEUE_PTR, " amdgpu-no-queue-ptr" },
48
- {DISPATCH_ID, " amdgpu-no-dispatch-id" },
49
- {IMPLICIT_ARG_PTR, " amdgpu-no-implicitarg-ptr" },
50
- {WORKGROUP_ID_X, " amdgpu-no-workgroup-id-x" },
51
- {WORKGROUP_ID_Y, " amdgpu-no-workgroup-id-y" },
52
- {WORKGROUP_ID_Z, " amdgpu-no-workgroup-id-z" },
53
- {WORKITEM_ID_X, " amdgpu-no-workitem-id-x" },
54
- {WORKITEM_ID_Y, " amdgpu-no-workitem-id-y" },
55
- {WORKITEM_ID_Z, " amdgpu-no-workitem-id-z" }
44
+ #include " AMDGPUAttributes.def"
56
45
};
57
46
58
47
// We do not need to note the x workitem or workgroup id because they are always
@@ -90,7 +79,7 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) {
90
79
case Intrinsic::amdgcn_queue_ptr:
91
80
case Intrinsic::amdgcn_is_shared:
92
81
case Intrinsic::amdgcn_is_private:
93
- // TODO: Does not require queue ptr on gfx9+
82
+ // TODO: Does not require the queue pointer on gfx9+
94
83
case Intrinsic::trap:
95
84
case Intrinsic::debugtrap:
96
85
IsQueuePtr = true ;
@@ -112,6 +101,17 @@ static bool isDSAddress(const Constant *C) {
112
101
return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
113
102
}
114
103
104
+ // / Returns true if the function requires the implicit argument be passed
105
+ // / regardless of the function contents.
106
+ static bool funcRequiresHostcallPtr (const Function &F) {
107
+ // Sanitizers require the hostcall buffer passed in the implicit arguments.
108
+ return F.hasFnAttribute (Attribute::SanitizeAddress) ||
109
+ F.hasFnAttribute (Attribute::SanitizeThread) ||
110
+ F.hasFnAttribute (Attribute::SanitizeMemory) ||
111
+ F.hasFnAttribute (Attribute::SanitizeHWAddress) ||
112
+ F.hasFnAttribute (Attribute::SanitizeMemTag);
113
+ }
114
+
115
115
class AMDGPUInformationCache : public InformationCache {
116
116
public:
117
117
AMDGPUInformationCache (const Module &M, AnalysisGetter &AG,
@@ -129,7 +129,7 @@ class AMDGPUInformationCache : public InformationCache {
129
129
}
130
130
131
131
private:
132
- // / Check if the ConstantExpr \p CE requires queue ptr attribute .
132
+ // / Check if the ConstantExpr \p CE requires the queue pointer .
133
133
static bool visitConstExpr (const ConstantExpr *CE) {
134
134
if (CE->getOpcode () == Instruction::AddrSpaceCast) {
135
135
unsigned SrcAS = CE->getOperand (0 )->getType ()->getPointerAddressSpace ();
@@ -163,7 +163,7 @@ class AMDGPUInformationCache : public InformationCache {
163
163
}
164
164
165
165
public:
166
- // / Returns true if \p Fn needs a queue ptr attribute because of \p C.
166
+ // / Returns true if \p Fn needs the queue pointer because of \p C.
167
167
bool needsQueuePtr (const Constant *C, Function &Fn) {
168
168
bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC (Fn.getCallingConv ());
169
169
bool HasAperture = hasApertureRegs (Fn);
@@ -182,7 +182,7 @@ class AMDGPUInformationCache : public InformationCache {
182
182
}
183
183
184
184
private:
185
- // / Used to determine if the Constant needs a queue ptr attribute .
185
+ // / Used to determine if the Constant needs the queue pointer .
186
186
DenseMap<const Constant *, uint8_t > ConstantStatus;
187
187
};
188
188
@@ -327,7 +327,20 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
327
327
328
328
void initialize (Attributor &A) override {
329
329
Function *F = getAssociatedFunction ();
330
+
331
+ // If the function requires the implicit arg pointer due to sanitizers,
332
+ // assume it's needed even if explicitly marked as not requiring it.
333
+ const bool NeedsHostcall = funcRequiresHostcallPtr (*F);
334
+ if (NeedsHostcall) {
335
+ removeAssumedBits (IMPLICIT_ARG_PTR);
336
+ removeAssumedBits (HOSTCALL_PTR);
337
+ }
338
+
330
339
for (auto Attr : ImplicitAttrs) {
340
+ if (NeedsHostcall &&
341
+ (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
342
+ continue ;
343
+
331
344
if (F->hasFnAttribute (Attr.second ))
332
345
addKnownBits (Attr.first );
333
346
}
@@ -355,7 +368,6 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
355
368
return indicatePessimisticFixpoint ();
356
369
357
370
bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC (F->getCallingConv ());
358
- auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
359
371
360
372
bool NeedsQueuePtr = false ;
361
373
@@ -377,13 +389,58 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
377
389
}
378
390
}
379
391
380
- // If we found that we need amdgpu-queue-ptr, nothing else to do.
392
+ if (!NeedsQueuePtr) {
393
+ NeedsQueuePtr = checkForQueuePtr (A);
394
+ }
395
+
381
396
if (NeedsQueuePtr) {
382
397
removeAssumedBits (QUEUE_PTR);
383
- return getAssumed () != OrigAssumed ? ChangeStatus::CHANGED :
384
- ChangeStatus::UNCHANGED;
385
398
}
386
399
400
+ if (funcRetrievesHostcallPtr (A)) {
401
+ removeAssumedBits (IMPLICIT_ARG_PTR);
402
+ removeAssumedBits (HOSTCALL_PTR);
403
+ }
404
+
405
+ return getAssumed () != OrigAssumed ? ChangeStatus::CHANGED
406
+ : ChangeStatus::UNCHANGED;
407
+ }
408
+
409
+ ChangeStatus manifest (Attributor &A) override {
410
+ SmallVector<Attribute, 8 > AttrList;
411
+ LLVMContext &Ctx = getAssociatedFunction ()->getContext ();
412
+
413
+ for (auto Attr : ImplicitAttrs) {
414
+ if (isKnown (Attr.first ))
415
+ AttrList.push_back (Attribute::get (Ctx, Attr.second ));
416
+ }
417
+
418
+ return IRAttributeManifest::manifestAttrs (A, getIRPosition (), AttrList,
419
+ /* ForceReplace */ true );
420
+ }
421
+
422
+ const std::string getAsStr () const override {
423
+ std::string Str;
424
+ raw_string_ostream OS (Str);
425
+ OS << " AMDInfo[" ;
426
+ for (auto Attr : ImplicitAttrs)
427
+ OS << ' ' << Attr.second ;
428
+ OS << " ]" ;
429
+ return OS.str ();
430
+ }
431
+
432
+ // / See AbstractAttribute::trackStatistics()
433
+ void trackStatistics () const override {}
434
+
435
+ private:
436
+ bool checkForQueuePtr (Attributor &A) {
437
+ Function *F = getAssociatedFunction ();
438
+ bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC (F->getCallingConv ());
439
+
440
+ auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
441
+
442
+ bool NeedsQueuePtr = false ;
443
+
387
444
auto CheckAddrSpaceCasts = [&](Instruction &I) {
388
445
unsigned SrcAS = static_cast <AddrSpaceCastInst &>(I).getSrcAddressSpace ();
389
446
if (castRequiresQueuePtr (SrcAS)) {
@@ -398,69 +455,63 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
398
455
// `checkForAllInstructions` is much more cheaper than going through all
399
456
// instructions, try it first.
400
457
401
- // amdgpu- queue-ptr is not needed if aperture regs is present.
458
+ // The queue pointer is not needed if aperture regs is present.
402
459
if (!HasApertureRegs) {
403
460
bool UsedAssumedInformation = false ;
404
461
A.checkForAllInstructions (CheckAddrSpaceCasts, *this ,
405
462
{Instruction::AddrSpaceCast},
406
463
UsedAssumedInformation);
407
464
}
408
465
409
- // If we found that we need amdgpu-queue-ptr, nothing else to do.
410
- if (NeedsQueuePtr) {
411
- removeAssumedBits (QUEUE_PTR);
412
- return getAssumed () != OrigAssumed ? ChangeStatus::CHANGED :
413
- ChangeStatus::UNCHANGED;
414
- }
466
+ // If we found that we need the queue pointer, nothing else to do.
467
+ if (NeedsQueuePtr)
468
+ return true ;
415
469
416
- if (!IsNonEntryFunc && HasApertureRegs) {
417
- return getAssumed () != OrigAssumed ? ChangeStatus::CHANGED :
418
- ChangeStatus::UNCHANGED;
419
- }
470
+ if (!IsNonEntryFunc && HasApertureRegs)
471
+ return false ;
420
472
421
473
for (BasicBlock &BB : *F) {
422
474
for (Instruction &I : BB) {
423
475
for (const Use &U : I.operands ()) {
424
476
if (const auto *C = dyn_cast<Constant>(U)) {
425
- if (InfoCache.needsQueuePtr (C, *F)) {
426
- removeAssumedBits (QUEUE_PTR);
427
- return getAssumed () != OrigAssumed ? ChangeStatus::CHANGED :
428
- ChangeStatus::UNCHANGED;
429
- }
477
+ if (InfoCache.needsQueuePtr (C, *F))
478
+ return true ;
430
479
}
431
480
}
432
481
}
433
482
}
434
483
435
- return getAssumed () != OrigAssumed ? ChangeStatus::CHANGED :
436
- ChangeStatus::UNCHANGED;
484
+ return false ;
437
485
}
438
486
439
- ChangeStatus manifest (Attributor &A) override {
440
- SmallVector<Attribute, 8 > AttrList;
441
- LLVMContext &Ctx = getAssociatedFunction ()->getContext ();
442
-
443
- for (auto Attr : ImplicitAttrs) {
444
- if (isKnown (Attr.first ))
445
- AttrList.push_back (Attribute::get (Ctx, Attr.second ));
446
- }
487
+ bool funcRetrievesHostcallPtr (Attributor &A) {
488
+ auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition ();
489
+
490
+ // Check if this is a call to the implicitarg_ptr builtin and it
491
+ // is used to retrieve the hostcall pointer. The implicit arg for
492
+ // hostcall is not used only if every use of the implicitarg_ptr
493
+ // is a load that clearly does not retrieve any byte of the
494
+ // hostcall pointer. We check this by tracing all the uses of the
495
+ // initial call to the implicitarg_ptr intrinsic.
496
+ auto DoesNotLeadToHostcallPtr = [&](Instruction &I) {
497
+ auto &Call = cast<CallBase>(I);
498
+ if (Call.getIntrinsicID () != Intrinsic::amdgcn_implicitarg_ptr)
499
+ return true ;
500
+
501
+ const auto &PointerInfoAA = A.getAAFor <AAPointerInfo>(
502
+ *this , IRPosition::callsite_returned (Call), DepClassTy::REQUIRED);
503
+
504
+ AAPointerInfo::OffsetAndSize OAS (Pos, 8 );
505
+ return PointerInfoAA.forallInterferingAccesses (
506
+ OAS, [](const AAPointerInfo::Access &Acc, bool IsExact) {
507
+ return Acc.getRemoteInst ()->isDroppable ();
508
+ });
509
+ };
447
510
448
- return IRAttributeManifest::manifestAttrs (A, getIRPosition (), AttrList,
449
- /* ForceReplace */ true );
511
+ bool UsedAssumedInformation = false ;
512
+ return !A.checkForAllCallLikeInstructions (DoesNotLeadToHostcallPtr, *this ,
513
+ UsedAssumedInformation);
450
514
}
451
-
452
- const std::string getAsStr () const override {
453
- std::string Str;
454
- raw_string_ostream OS (Str);
455
- OS << " AMDInfo[" ;
456
- for (auto Attr : ImplicitAttrs)
457
- OS << ' ' << Attr.second ;
458
- OS << " ]" ;
459
- return OS.str ();
460
- }
461
-
462
- // / See AbstractAttribute::trackStatistics()
463
- void trackStatistics () const override {}
464
515
};
465
516
466
517
AAAMDAttributes &AAAMDAttributes::createForPosition (const IRPosition &IRP,
@@ -497,7 +548,8 @@ class AMDGPUAttributor : public ModulePass {
497
548
BumpPtrAllocator Allocator;
498
549
AMDGPUInformationCache InfoCache (M, AG, Allocator, nullptr , *TM);
499
550
DenseSet<const char *> Allowed (
500
- {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, &AACallEdges::ID});
551
+ {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
552
+ &AACallEdges::ID, &AAPointerInfo::ID});
501
553
502
554
Attributor A (Functions, InfoCache, CGUpdater, &Allowed);
503
555
0 commit comments