llvm · grypp · Feb 17, 2025 · Feb 17, 2025 · Feb 17, 2025 · Feb 17, 2025
diff --git a/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h b/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h
@@ -108,6 +108,10 @@ class TargetOptions {
   /// Returns the default compilation target: `CompilationTarget::Fatbin`.
   static CompilationTarget getDefaultCompilationTarget();
 
+  /// Returns a tokenization of the command line options.
+  static std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>>
+  tokenizeCmdOptions(const std::string &cmdOptions);
+
 protected:
   /// Derived classes must use this constructor to initialize `typeID` to the
   /// appropiate value: ie. `TargetOptions(TypeID::get<DerivedClass>())`.

diff --git a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h
@@ -37,6 +37,11 @@ struct GPUToNVVMPipelineOptions
       *this, "cubin-format",
       llvm::cl::desc("Compilation format to use to serialize to cubin."),
       llvm::cl::init("fatbin")};
+  PassOptions::Option<std::string> cmdOptions{
+      *this, "ptxas-cmd-options",
+      llvm::cl::desc(
+          "Command line options to pass to the downstream compiler."),
+      llvm::cl::init("")};
   PassOptions::Option<int> optLevel{
       *this, "opt-level",
       llvm::cl::desc("Optimization level for NVVM compilation"),

diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
@@ -143,6 +143,9 @@ def GpuNVVMAttachTarget: Pass<"nvvm-attach-target", ""> {
            "Enable flush to zero for denormals.">,
     ListOption<"linkLibs", "l", "std::string",
            "Extra bitcode libraries paths to link to.">,
+    Option<"cmdOptions", "ptxas-cmd-options", "std::string",
+           /*default=*/ [{""}],
+           "Command line options passed to downstream compiler">,
   ];
 }
 

@@ -2862,6 +2862,8 @@ def NVVM_TargettAttr : NVVM_Attr<"NVVMTarget", "target"> {
     bool hasFlag(StringRef flag) const;
     bool hasFastMath() const;
     bool hasFtz() const;
+    bool hasCmdOptions() const;
+    std::optional<mlir::NamedAttribute> getCmdOptions() const;
   }];
   let extraClassDefinition = [{
     bool $cppClass::hasFlag(StringRef flag) const {
@@ -2875,6 +2877,12 @@ def NVVM_TargettAttr : NVVM_Attr<"NVVMTarget", "target"> {
     bool $cppClass::hasFtz() const {
       return hasFlag("ftz");
     }
+    bool $cppClass::hasCmdOptions() const {
+      return hasFlag("ptxas-cmd-options");
+    }
+    std::optional<mlir::NamedAttribute> $cppClass::getCmdOptions() const {
+      return getFlags().getNamed("ptxas-cmd-options");
+    }
   }];
 }
 

diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -2564,7 +2564,7 @@ CompilationTarget TargetOptions::getDefaultCompilationTarget() {
 }
 
 std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>>
-TargetOptions::tokenizeCmdOptions() const {
+TargetOptions::tokenizeCmdOptions(const std::string &cmdOptions) {
   std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> options;
   llvm::StringSaver stringSaver(options.first);
   StringRef opts = cmdOptions;
@@ -2586,6 +2586,11 @@ TargetOptions::tokenizeCmdOptions() const {
   return options;
 }
 
+std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>>
+TargetOptions::tokenizeCmdOptions() const {
+  return tokenizeCmdOptions(cmdOptions);
+}
+
 MLIR_DEFINE_EXPLICIT_TYPE_ID(::mlir::gpu::TargetOptions)
 
 #include "mlir/Dialect/GPU/IR/GPUOpInterfaces.cpp.inc"

diff --git a/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp b/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp
@@ -58,6 +58,7 @@ void buildCommonPassPipeline(
   nvvmTargetOptions.chip = options.cubinChip;
   nvvmTargetOptions.features = options.cubinFeatures;
   nvvmTargetOptions.optLevel = options.optLevel;
+  nvvmTargetOptions.cmdOptions = options.cmdOptions;
   pm.addPass(createGpuNVVMAttachTarget(nvvmTargetOptions));
   pm.addPass(createLowerAffinePass());
   pm.addPass(createArithToLLVMConversionPass());

@@ -45,14 +45,30 @@ struct NVVMAttachTarget
 
 DictionaryAttr NVVMAttachTarget::getFlags(OpBuilder &builder) const {
   UnitAttr unitAttr = builder.getUnitAttr();
-  SmallVector<NamedAttribute, 2> flags;
+  SmallVector<NamedAttribute, 3> flags;
   auto addFlag = [&](StringRef flag) {
     flags.push_back(builder.getNamedAttr(flag, unitAttr));
   };
   if (fastFlag)
     addFlag("fast");
   if (ftzFlag)
     addFlag("ftz");
+
+  // Tokenize and set the optional command line options.
+  if (!cmdOptions.empty()) {
+    auto options = gpu::TargetOptions::tokenizeCmdOptions(cmdOptions);
+    if (!options.second.empty()) {
+      llvm::SmallVector<mlir::Attribute> nvvmOptionAttrs;
+      for (const char *opt : options.second) {
+        nvvmOptionAttrs.emplace_back(
+            mlir::StringAttr::get(builder.getContext(), StringRef(opt)));
+      }
+      flags.push_back(builder.getNamedAttr(
+          "ptxas-cmd-options",
+          mlir::ArrayAttr::get(builder.getContext(), nvvmOptionAttrs)));
+    }
+  }
+
   if (!flags.empty())
     return builder.getDictionaryAttr(flags);
   return nullptr;

@@ -321,6 +321,25 @@ std::optional<std::string> NVPTXSerializer::findTool(StringRef tool) {
   return std::nullopt;
 }
 
+/// Adds optional command-line arguments to existing arguments.
+template <typename T>
+static void setOptionalCommandlineArguments(NVVMTargetAttr target,
+                                            SmallVectorImpl<T> &ptxasArgs) {
+  if (!target.hasCmdOptions())
+    return;
+
+  std::optional<mlir::NamedAttribute> cmdOptions = target.getCmdOptions();
+  for (Attribute attr : cast<ArrayAttr>(cmdOptions->getValue())) {
+    if (auto strAttr = dyn_cast<StringAttr>(attr)) {
+      if constexpr (std::is_same_v<T, StringRef>) {
+        ptxasArgs.push_back(strAttr.getValue());
+      } else if constexpr (std::is_same_v<T, const char *>) {
+        ptxasArgs.push_back(strAttr.getValue().data());
+      }
+    }
+  }
+}
+
 // TODO: clean this method & have a generic tool driver or never emit binaries
 // with this mechanism and let another stage take care of it.
 std::optional<SmallVector<char, 0>>
@@ -359,8 +378,8 @@ NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
     return std::nullopt;
   TmpFile cubinFile;
   if (createFatbin) {
-    Twine cubinFilename = ptxFile->first + ".cubin";
-    cubinFile = TmpFile(cubinFilename.str(), llvm::FileRemover(cubinFilename));
+    std::string cubinFilename = (ptxFile->first + ".cubin").str();
+    cubinFile = TmpFile(cubinFilename, llvm::FileRemover(cubinFilename));
   } else {
     cubinFile.first = binaryFile->first;
   }
@@ -412,6 +431,9 @@ NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
       useFatbin32 = true;
   }
 
+  // Set optional command line arguments
+  setOptionalCommandlineArguments(getTarget(), ptxasArgs);
+
   // Create the `fatbinary` args.
   StringRef chip = getTarget().getChip();
   // Remove the arch prefix to obtain the compute capability.
@@ -562,6 +584,8 @@ NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
   cmdOpts.second.append(
       {"-arch", getTarget().getChip().data(), "--opt-level", optLevel.c_str()});
 
+  // Set optional command line arguments
+  setOptionalCommandlineArguments(getTarget(), cmdOpts.second);
   // Create the compiler handle.
   RETURN_ON_NVPTXCOMPILER_ERROR(
       nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str()));

diff --git a/mlir/test/Dialect/GPU/nvvm-attach-target.mlir b/mlir/test/Dialect/GPU/nvvm-attach-target.mlir
@@ -0,0 +1,15 @@
+// RUN: mlir-opt %s --nvvm-attach-target="" | FileCheck %s
+// RUN: mlir-opt %s --nvvm-attach-target="ptxas-cmd-options=--register-usage-level=8" | FileCheck %s -check-prefix=CHECK-OPTIONS
+
+module attributes {gpu.container_module} {
+    // CHECK-LABEL:gpu.module @kernel_module1
+    // CHECK: [#nvvm.target]
+    // CHECK-OPTIONS: [#nvvm.target<flags = {"ptxas-cmd-options" = ["--register-usage-level=8"]}>]
+    gpu.module @kernel_module1 {
+    llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr,
+        %arg2: !llvm.ptr, %arg3: i64, %arg4: i64,
+        %arg5: i64) attributes {gpu.kernel} {
+            llvm.return
+        }
+    }
+}
@@ -0,0 +1,21 @@
+// RUN: mlir-opt %s \
+// RUN:  | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-chip=sm_80 ptxas-cmd-options='-v --register-usage-level=8'" -debug-only=serialize-to-binary \
+// RUN:  2>&1 | FileCheck %s
+
+func.func @host_function(%arg0 : f32, %arg1 : memref<?xf32>) {
+    %cst = arith.constant 1 : index
+    %c0 = arith.constant 0 : index
+    %cst2 = memref.dim %arg1, %c0 : memref<?xf32>
+
+    gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst)
+                threads(%tx, %ty, %tz) in (%block_x = %cst2, %block_y = %cst, %block_z = %cst) {
+        memref.store %arg0, %arg1[%tx] : memref<?xf32>
+        gpu.terminator
+    }
+
+    return
+}
+
+// CHECK: ptxas -arch sm_80
+// CHECK-SAME: -v 
+// CHECK-SAME: --register-usage-level=8