|
| 1 | +From 659f5acd038afbb281b4d1d410762f40954e08c8 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Princeton Ferro < [email protected]> |
| 3 | +Date: Fri, 2 May 2025 17:42:32 -0700 |
| 4 | +Subject: [PATCH 1/1] [NVPTX] Add support for PTX ISA v8.8 (#136639) |
| 5 | + |
| 6 | +Support PTX version 8.8 (`-mattr=+ptx88`) from CUDA 12.9. The following |
| 7 | +new targets are also added: |
| 8 | + |
| 9 | +- SM103 and SM121: sm_103, sm_103a, sm_121, sm_121a. |
| 10 | + |
| 11 | +Also, some things were reformatted. |
| 12 | + |
| 13 | +https://docs.nvidia.com/cuda/parallel-thread-execution/#changes-in-ptx-isa-version-8-8 |
| 14 | +--- |
| 15 | + llvm/lib/Target/NVPTX/NVPTX.td | 62 +++++++++++++++------------ |
| 16 | + llvm/test/CodeGen/NVPTX/sm-version.ll | 16 +++++++ |
| 17 | + 2 files changed, 51 insertions(+), 27 deletions(-) |
| 18 | + |
| 19 | +diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td |
| 20 | +index 5467ae011a20..ff9a187ecf72 100644 |
| 21 | +--- a/llvm/lib/Target/NVPTX/NVPTX.td |
| 22 | ++++ b/llvm/lib/Target/NVPTX/NVPTX.td |
| 23 | +@@ -36,17 +36,21 @@ class FeaturePTX<int version>: |
| 24 | + |
| 25 | + foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53, |
| 26 | + 60, 61, 62, 70, 72, 75, 80, 86, 87, |
| 27 | +- 89, 90, 100, 101, 120] in |
| 28 | ++ 89, 90, 100, 101, 103, 120, 121] in |
| 29 | + def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>; |
| 30 | + |
| 31 | +-def SM90a: FeatureSM<"90a", 901>; |
| 32 | ++// Arch-specific targets. PTX for these is not compatible with any other |
| 33 | ++// architectures. |
| 34 | ++def SM90a : FeatureSM<"90a", 901>; |
| 35 | + def SM100a: FeatureSM<"100a", 1001>; |
| 36 | + def SM101a: FeatureSM<"101a", 1011>; |
| 37 | ++def SM103a: FeatureSM<"103a", 1031>; |
| 38 | + def SM120a: FeatureSM<"120a", 1201>; |
| 39 | ++def SM121a: FeatureSM<"121a", 1211>; |
| 40 | + |
| 41 | + foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65, |
| 42 | + 70, 71, 72, 73, 74, 75, 76, 77, 78, |
| 43 | +- 80, 81, 82, 83, 84, 85, 86, 87] in |
| 44 | ++ 80, 81, 82, 83, 84, 85, 86, 87, 88] in |
| 45 | + def PTX#version: FeaturePTX<version>; |
| 46 | + |
| 47 | + //===----------------------------------------------------------------------===// |
| 48 | +@@ -56,33 +60,37 @@ foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65, |
| 49 | + class Proc<string Name, list<SubtargetFeature> Features> |
| 50 | + : Processor<Name, NoItineraries, Features>; |
| 51 | + |
| 52 | +-def : Proc<"sm_20", [SM20, PTX32]>; |
| 53 | +-def : Proc<"sm_21", [SM21, PTX32]>; |
| 54 | +-def : Proc<"sm_30", [SM30]>; |
| 55 | +-def : Proc<"sm_32", [SM32, PTX40]>; |
| 56 | +-def : Proc<"sm_35", [SM35, PTX32]>; |
| 57 | +-def : Proc<"sm_37", [SM37, PTX41]>; |
| 58 | +-def : Proc<"sm_50", [SM50, PTX40]>; |
| 59 | +-def : Proc<"sm_52", [SM52, PTX41]>; |
| 60 | +-def : Proc<"sm_53", [SM53, PTX42]>; |
| 61 | +-def : Proc<"sm_60", [SM60, PTX50]>; |
| 62 | +-def : Proc<"sm_61", [SM61, PTX50]>; |
| 63 | +-def : Proc<"sm_62", [SM62, PTX50]>; |
| 64 | +-def : Proc<"sm_70", [SM70, PTX60]>; |
| 65 | +-def : Proc<"sm_72", [SM72, PTX61]>; |
| 66 | +-def : Proc<"sm_75", [SM75, PTX63]>; |
| 67 | +-def : Proc<"sm_80", [SM80, PTX70]>; |
| 68 | +-def : Proc<"sm_86", [SM86, PTX71]>; |
| 69 | +-def : Proc<"sm_87", [SM87, PTX74]>; |
| 70 | +-def : Proc<"sm_89", [SM89, PTX78]>; |
| 71 | +-def : Proc<"sm_90", [SM90, PTX78]>; |
| 72 | +-def : Proc<"sm_90a", [SM90a, PTX80]>; |
| 73 | +-def : Proc<"sm_100", [SM100, PTX86]>; |
| 74 | ++def : Proc<"sm_20", [SM20, PTX32]>; |
| 75 | ++def : Proc<"sm_21", [SM21, PTX32]>; |
| 76 | ++def : Proc<"sm_30", [SM30]>; |
| 77 | ++def : Proc<"sm_32", [SM32, PTX40]>; |
| 78 | ++def : Proc<"sm_35", [SM35, PTX32]>; |
| 79 | ++def : Proc<"sm_37", [SM37, PTX41]>; |
| 80 | ++def : Proc<"sm_50", [SM50, PTX40]>; |
| 81 | ++def : Proc<"sm_52", [SM52, PTX41]>; |
| 82 | ++def : Proc<"sm_53", [SM53, PTX42]>; |
| 83 | ++def : Proc<"sm_60", [SM60, PTX50]>; |
| 84 | ++def : Proc<"sm_61", [SM61, PTX50]>; |
| 85 | ++def : Proc<"sm_62", [SM62, PTX50]>; |
| 86 | ++def : Proc<"sm_70", [SM70, PTX60]>; |
| 87 | ++def : Proc<"sm_72", [SM72, PTX61]>; |
| 88 | ++def : Proc<"sm_75", [SM75, PTX63]>; |
| 89 | ++def : Proc<"sm_80", [SM80, PTX70]>; |
| 90 | ++def : Proc<"sm_86", [SM86, PTX71]>; |
| 91 | ++def : Proc<"sm_87", [SM87, PTX74]>; |
| 92 | ++def : Proc<"sm_89", [SM89, PTX78]>; |
| 93 | ++def : Proc<"sm_90", [SM90, PTX78]>; |
| 94 | ++def : Proc<"sm_90a", [SM90a, PTX80]>; |
| 95 | ++def : Proc<"sm_100", [SM100, PTX86]>; |
| 96 | + def : Proc<"sm_100a", [SM100a, PTX86]>; |
| 97 | +-def : Proc<"sm_101", [SM101, PTX86]>; |
| 98 | ++def : Proc<"sm_101", [SM101, PTX86]>; |
| 99 | + def : Proc<"sm_101a", [SM101a, PTX86]>; |
| 100 | +-def : Proc<"sm_120", [SM120, PTX87]>; |
| 101 | ++def : Proc<"sm_103", [SM103, PTX88]>; |
| 102 | ++def : Proc<"sm_103a", [SM103a, PTX88]>; |
| 103 | ++def : Proc<"sm_120", [SM120, PTX87]>; |
| 104 | + def : Proc<"sm_120a", [SM120a, PTX87]>; |
| 105 | ++def : Proc<"sm_121", [SM121, PTX88]>; |
| 106 | ++def : Proc<"sm_121a", [SM121a, PTX88]>; |
| 107 | + |
| 108 | + def NVPTXInstrInfo : InstrInfo { |
| 109 | + } |
| 110 | +diff --git a/llvm/test/CodeGen/NVPTX/sm-version.ll b/llvm/test/CodeGen/NVPTX/sm-version.ll |
| 111 | +index ce9a1b1b161d..9705a2f3ba73 100644 |
| 112 | +--- a/llvm/test/CodeGen/NVPTX/sm-version.ll |
| 113 | ++++ b/llvm/test/CodeGen/NVPTX/sm-version.ll |
| 114 | +@@ -20,8 +20,12 @@ |
| 115 | + ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_100a | FileCheck %s --check-prefix=SM100a |
| 116 | + ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_101 | FileCheck %s --check-prefix=SM101 |
| 117 | + ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_101a | FileCheck %s --check-prefix=SM101a |
| 118 | ++; RUN: llc < %s -mtriple=nvptx -mcpu=sm_103 | FileCheck %s --check-prefix=SM103 |
| 119 | ++; RUN: llc < %s -mtriple=nvptx -mcpu=sm_103a | FileCheck %s --check-prefix=SM103a |
| 120 | + ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_120 | FileCheck %s --check-prefix=SM120 |
| 121 | + ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_120a | FileCheck %s --check-prefix=SM120a |
| 122 | ++; RUN: llc < %s -mtriple=nvptx -mcpu=sm_121 | FileCheck %s --check-prefix=SM121 |
| 123 | ++; RUN: llc < %s -mtriple=nvptx -mcpu=sm_121a | FileCheck %s --check-prefix=SM121a |
| 124 | + |
| 125 | + ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=SM20 |
| 126 | + ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_21 | FileCheck %s --check-prefix=SM21 |
| 127 | +@@ -45,8 +49,12 @@ |
| 128 | + ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a | FileCheck %s --check-prefix=SM100a |
| 129 | + ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_101 | FileCheck %s --check-prefix=SM101 |
| 130 | + ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_101a | FileCheck %s --check-prefix=SM101a |
| 131 | ++; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_103 | FileCheck %s --check-prefix=SM103 |
| 132 | ++; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_103a | FileCheck %s --check-prefix=SM103a |
| 133 | + ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_120 | FileCheck %s --check-prefix=SM120 |
| 134 | + ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_120a | FileCheck %s --check-prefix=SM120a |
| 135 | ++; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_121 | FileCheck %s --check-prefix=SM121 |
| 136 | ++; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_121a | FileCheck %s --check-prefix=SM121a |
| 137 | + |
| 138 | + ; SM20: .version 3.2 |
| 139 | + ; SM21: .version 3.2 |
| 140 | +@@ -70,8 +78,12 @@ |
| 141 | + ; SM100a: .version 8.6 |
| 142 | + ; SM101: .version 8.6 |
| 143 | + ; SM101a: .version 8.6 |
| 144 | ++; SM103: .version 8.8 |
| 145 | ++; SM103a: .version 8.8 |
| 146 | + ; SM120: .version 8.7 |
| 147 | + ; SM120a: .version 8.7 |
| 148 | ++; SM121: .version 8.8 |
| 149 | ++; SM121a: .version 8.8 |
| 150 | + |
| 151 | + ; SM20: .target sm_20 |
| 152 | + ; SM21: .target sm_21 |
| 153 | +@@ -95,5 +107,9 @@ |
| 154 | + ; SM100a: .target sm_100a |
| 155 | + ; SM101: .target sm_101 |
| 156 | + ; SM101a: .target sm_101a |
| 157 | ++; SM103: .target sm_103 |
| 158 | ++; SM103a: .target sm_103a |
| 159 | + ; SM120: .target sm_120 |
| 160 | + ; SM120a: .target sm_120a |
| 161 | ++; SM121: .target sm_121 |
| 162 | ++; SM121a: .target sm_121a |
| 163 | +-- |
| 164 | +2.52.0 |
| 165 | + |
0 commit comments