Skip to content

Commit 837b89f

Browse files
authored
[MLIR][NVVM] Add ptxas-cmd-options to pass flags to the downstream compiler (#127457)
This PR adds `cmd-options` to the `gpu-lower-to-nvvm-pipeline` pipeline and the `nvvm-attach-target` pass, allowing users to pass flags to the downstream compiler, *ptxas*. Example: ``` mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-chip=sm_80 ptxas-cmd-options='-v --register-usage-level=8'" ```
1 parent 02c44ce commit 837b89f

File tree

10 files changed

+106
-4
lines changed

10 files changed

+106
-4
lines changed

mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,10 @@ class TargetOptions {
108108
/// Returns the default compilation target: `CompilationTarget::Fatbin`.
109109
static CompilationTarget getDefaultCompilationTarget();
110110

111+
/// Returns a tokenization of the command line options.
112+
static std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>>
113+
tokenizeCmdOptions(const std::string &cmdOptions);
114+
111115
protected:
112116
/// Derived classes must use this constructor to initialize `typeID` to the
113117
/// appropiate value: ie. `TargetOptions(TypeID::get<DerivedClass>())`.

mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ struct GPUToNVVMPipelineOptions
3737
*this, "cubin-format",
3838
llvm::cl::desc("Compilation format to use to serialize to cubin."),
3939
llvm::cl::init("fatbin")};
40+
PassOptions::Option<std::string> cmdOptions{
41+
*this, "ptxas-cmd-options",
42+
llvm::cl::desc(
43+
"Command line options to pass to the downstream compiler."),
44+
llvm::cl::init("")};
4045
PassOptions::Option<int> optLevel{
4146
*this, "opt-level",
4247
llvm::cl::desc("Optimization level for NVVM compilation"),

mlir/include/mlir/Dialect/GPU/Transforms/Passes.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,9 @@ def GpuNVVMAttachTarget: Pass<"nvvm-attach-target", ""> {
143143
"Enable flush to zero for denormals.">,
144144
ListOption<"linkLibs", "l", "std::string",
145145
"Extra bitcode libraries paths to link to.">,
146+
Option<"cmdOptions", "ptxas-cmd-options", "std::string",
147+
/*default=*/ [{""}],
148+
"Command line options passed to downstream compiler">,
146149
];
147150
}
148151

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2862,6 +2862,8 @@ def NVVM_TargettAttr : NVVM_Attr<"NVVMTarget", "target"> {
28622862
bool hasFlag(StringRef flag) const;
28632863
bool hasFastMath() const;
28642864
bool hasFtz() const;
2865+
bool hasCmdOptions() const;
2866+
std::optional<mlir::NamedAttribute> getCmdOptions() const;
28652867
}];
28662868
let extraClassDefinition = [{
28672869
bool $cppClass::hasFlag(StringRef flag) const {
@@ -2875,6 +2877,12 @@ def NVVM_TargettAttr : NVVM_Attr<"NVVMTarget", "target"> {
28752877
bool $cppClass::hasFtz() const {
28762878
return hasFlag("ftz");
28772879
}
2880+
bool $cppClass::hasCmdOptions() const {
2881+
return hasFlag("ptxas-cmd-options");
2882+
}
2883+
std::optional<mlir::NamedAttribute> $cppClass::getCmdOptions() const {
2884+
return getFlags().getNamed("ptxas-cmd-options");
2885+
}
28782886
}];
28792887
}
28802888

mlir/lib/Dialect/GPU/IR/GPUDialect.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2564,7 +2564,7 @@ CompilationTarget TargetOptions::getDefaultCompilationTarget() {
25642564
}
25652565

25662566
std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>>
2567-
TargetOptions::tokenizeCmdOptions() const {
2567+
TargetOptions::tokenizeCmdOptions(const std::string &cmdOptions) {
25682568
std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> options;
25692569
llvm::StringSaver stringSaver(options.first);
25702570
StringRef opts = cmdOptions;
@@ -2586,6 +2586,11 @@ TargetOptions::tokenizeCmdOptions() const {
25862586
return options;
25872587
}
25882588

2589+
std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>>
2590+
TargetOptions::tokenizeCmdOptions() const {
2591+
return tokenizeCmdOptions(cmdOptions);
2592+
}
2593+
25892594
MLIR_DEFINE_EXPLICIT_TYPE_ID(::mlir::gpu::TargetOptions)
25902595

25912596
#include "mlir/Dialect/GPU/IR/GPUOpInterfaces.cpp.inc"

mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ void buildCommonPassPipeline(
5858
nvvmTargetOptions.chip = options.cubinChip;
5959
nvvmTargetOptions.features = options.cubinFeatures;
6060
nvvmTargetOptions.optLevel = options.optLevel;
61+
nvvmTargetOptions.cmdOptions = options.cmdOptions;
6162
pm.addPass(createGpuNVVMAttachTarget(nvvmTargetOptions));
6263
pm.addPass(createLowerAffinePass());
6364
pm.addPass(createArithToLLVMConversionPass());

mlir/lib/Dialect/GPU/Transforms/NVVMAttachTarget.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,30 @@ struct NVVMAttachTarget
4545

4646
DictionaryAttr NVVMAttachTarget::getFlags(OpBuilder &builder) const {
4747
UnitAttr unitAttr = builder.getUnitAttr();
48-
SmallVector<NamedAttribute, 2> flags;
48+
SmallVector<NamedAttribute, 3> flags;
4949
auto addFlag = [&](StringRef flag) {
5050
flags.push_back(builder.getNamedAttr(flag, unitAttr));
5151
};
5252
if (fastFlag)
5353
addFlag("fast");
5454
if (ftzFlag)
5555
addFlag("ftz");
56+
57+
// Tokenize and set the optional command line options.
58+
if (!cmdOptions.empty()) {
59+
auto options = gpu::TargetOptions::tokenizeCmdOptions(cmdOptions);
60+
if (!options.second.empty()) {
61+
llvm::SmallVector<mlir::Attribute> nvvmOptionAttrs;
62+
for (const char *opt : options.second) {
63+
nvvmOptionAttrs.emplace_back(
64+
mlir::StringAttr::get(builder.getContext(), StringRef(opt)));
65+
}
66+
flags.push_back(builder.getNamedAttr(
67+
"ptxas-cmd-options",
68+
mlir::ArrayAttr::get(builder.getContext(), nvvmOptionAttrs)));
69+
}
70+
}
71+
5672
if (!flags.empty())
5773
return builder.getDictionaryAttr(flags);
5874
return nullptr;

mlir/lib/Target/LLVM/NVVM/Target.cpp

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,25 @@ std::optional<std::string> NVPTXSerializer::findTool(StringRef tool) {
321321
return std::nullopt;
322322
}
323323

324+
/// Adds optional command-line arguments to existing arguments.
325+
template <typename T>
326+
static void setOptionalCommandlineArguments(NVVMTargetAttr target,
327+
SmallVectorImpl<T> &ptxasArgs) {
328+
if (!target.hasCmdOptions())
329+
return;
330+
331+
std::optional<mlir::NamedAttribute> cmdOptions = target.getCmdOptions();
332+
for (Attribute attr : cast<ArrayAttr>(cmdOptions->getValue())) {
333+
if (auto strAttr = dyn_cast<StringAttr>(attr)) {
334+
if constexpr (std::is_same_v<T, StringRef>) {
335+
ptxasArgs.push_back(strAttr.getValue());
336+
} else if constexpr (std::is_same_v<T, const char *>) {
337+
ptxasArgs.push_back(strAttr.getValue().data());
338+
}
339+
}
340+
}
341+
}
342+
324343
// TODO: clean this method & have a generic tool driver or never emit binaries
325344
// with this mechanism and let another stage take care of it.
326345
std::optional<SmallVector<char, 0>>
@@ -359,8 +378,8 @@ NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
359378
return std::nullopt;
360379
TmpFile cubinFile;
361380
if (createFatbin) {
362-
Twine cubinFilename = ptxFile->first + ".cubin";
363-
cubinFile = TmpFile(cubinFilename.str(), llvm::FileRemover(cubinFilename));
381+
std::string cubinFilename = (ptxFile->first + ".cubin").str();
382+
cubinFile = TmpFile(cubinFilename, llvm::FileRemover(cubinFilename));
364383
} else {
365384
cubinFile.first = binaryFile->first;
366385
}
@@ -412,6 +431,9 @@ NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
412431
useFatbin32 = true;
413432
}
414433

434+
// Set optional command line arguments
435+
setOptionalCommandlineArguments(getTarget(), ptxasArgs);
436+
415437
// Create the `fatbinary` args.
416438
StringRef chip = getTarget().getChip();
417439
// Remove the arch prefix to obtain the compute capability.
@@ -562,6 +584,8 @@ NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
562584
cmdOpts.second.append(
563585
{"-arch", getTarget().getChip().data(), "--opt-level", optLevel.c_str()});
564586

587+
// Set optional command line arguments
588+
setOptionalCommandlineArguments(getTarget(), cmdOpts.second);
565589
// Create the compiler handle.
566590
RETURN_ON_NVPTXCOMPILER_ERROR(
567591
nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str()));
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// RUN: mlir-opt %s --nvvm-attach-target="" | FileCheck %s
2+
// RUN: mlir-opt %s --nvvm-attach-target="ptxas-cmd-options=--register-usage-level=8" | FileCheck %s -check-prefix=CHECK-OPTIONS
3+
4+
module attributes {gpu.container_module} {
5+
// CHECK-LABEL:gpu.module @kernel_module1
6+
// CHECK: [#nvvm.target]
7+
// CHECK-OPTIONS: [#nvvm.target<flags = {"ptxas-cmd-options" = ["--register-usage-level=8"]}>]
8+
gpu.module @kernel_module1 {
9+
llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr,
10+
%arg2: !llvm.ptr, %arg3: i64, %arg4: i64,
11+
%arg5: i64) attributes {gpu.kernel} {
12+
llvm.return
13+
}
14+
}
15+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// RUN: mlir-opt %s \
2+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-chip=sm_80 ptxas-cmd-options='-v --register-usage-level=8'" -debug-only=serialize-to-binary \
3+
// RUN: 2>&1 | FileCheck %s
4+
5+
func.func @host_function(%arg0 : f32, %arg1 : memref<?xf32>) {
6+
%cst = arith.constant 1 : index
7+
%c0 = arith.constant 0 : index
8+
%cst2 = memref.dim %arg1, %c0 : memref<?xf32>
9+
10+
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst)
11+
threads(%tx, %ty, %tz) in (%block_x = %cst2, %block_y = %cst, %block_z = %cst) {
12+
memref.store %arg0, %arg1[%tx] : memref<?xf32>
13+
gpu.terminator
14+
}
15+
16+
return
17+
}
18+
19+
// CHECK: ptxas -arch sm_80
20+
// CHECK-SAME: -v
21+
// CHECK-SAME: --register-usage-level=8

0 commit comments

Comments
 (0)