Skip to content

Commit ce8cee9

Browse files
committed
[LLVM][AArch64] Add assembly/disassembly for SME2p2 ftmopa and bftmopa
This patch adds assembly/disassembly for the following SME2p2 instructions (part of the 2024 AArch64 ISA update) - BFTMOPA (widening) - FEAT_SME2p2 - BFTMOPA (non-widening) - FEAT_SME2p2 & FEAT_SME_B16B16 - FTMOPA (4-way) - FEAT_SME2p2 & FEAT_SME_F8F32 - FTMOPA (2-way, 8-to-16) - FEAT_SME2p2 & FEAT_SME_F8F16 - FTMOPA (2-way, 16-to-32) - FEAT_SME2p2 - FTMOPA (non-widening, f16) - FEAT_SME2p2 & FEAT_SME_F16F16 - FTMOPA (non-widening, f32) - FEAT_SME2p2 - It also introduces .arch assembler tests for the new sme2p2 feature In accordance with: https://developer.arm.com/documentation/ddi0602/latest/ Co-authored-by: Marian Lukac [email protected]
1 parent a1b39b5 commit ce8cee9

File tree

10 files changed

+599
-0
lines changed

10 files changed

+599
-0
lines changed

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1000,3 +1000,24 @@ defm FMOPA_MPPZZ_BtoS : sme_outer_product_fp32<0b0, 0b01, ZPR8, "fmopa", null_fr
10001000

10011001
} //[HasSMEF8F32]
10021002

1003+
let Predicates = [HasSME2p2] in {
1004+
def FTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, "ftmopa">;
1005+
def FTMOPA_M2ZZZI_StoS : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, "ftmopa">;
1006+
def BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, "bftmopa">;
1007+
} // [HasSME2p2]
1008+
1009+
let Predicates = [HasSME2p2, HasSMEB16B16] in {
1010+
def BFTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b11001, ZZ_h_mul_r, ZPR16, "bftmopa">;
1011+
} // [HasSME2p2, HasSMEB16B16]
1012+
1013+
let Predicates = [HasSME2p2, HasSMEF8F32], Uses = [FPMR, FPCR] in {
1014+
def FTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01000, ZZ_b_mul_r, ZPR8, "ftmopa">;
1015+
} // [HasSME2p2, HasSMEF8F32], Uses = [FPMR, FPCR]
1016+
1017+
let Predicates = [HasSME2p2, HasSMEF8F16], Uses = [FPMR, FPCR] in {
1018+
def FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, "ftmopa">;
1019+
} // [HasSME2p2, HasSMEF8F16], Uses = [FPMR, FPCR]
1020+
1021+
let Predicates = [HasSME2p2, HasSMEF16F16] in {
1022+
def FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, "ftmopa">;
1023+
} // [HasSME2p2, HasSMEF16F16]

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3192,6 +3192,68 @@ multiclass sme2_int_bmopx_tile<string mnemonic, bits<3> op, SDPatternOperator i
31923192
def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_3, nxv4i1, nxv4i32>;
31933193
}
31943194

3195+
//===----------------------------------------------------------------------===//
3196+
// SME2 Sparse Outer Product and Accumulate
3197+
3198+
class sme_tmopa_16b<bits<5> opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
3199+
: I<(outs TileOp16:$ZAda),
3200+
(ins TileOp16:$_ZAda, zn_ty:$Zn, zm_ty:$Zm, ZK:$Zk, VectorIndexS32b:$imm),
3201+
mnemonic, "\t$ZAda, $Zn, $Zm, $Zk$imm",
3202+
"", []>,
3203+
Sched<[]> {
3204+
bit ZAda;
3205+
bits<4> Zn;
3206+
bits<5> Zm;
3207+
bits<3> Zk;
3208+
bits<2> imm;
3209+
let Inst{31-25} = 0b1000000;
3210+
let Inst{24} = opc{4};
3211+
let Inst{23-22} = 0b01;
3212+
let Inst{21} = opc{3};
3213+
let Inst{20-16} = Zm;
3214+
let Inst{15} = opc{2};
3215+
let Inst{14} = 0b0;
3216+
let Inst{13} = opc{1};
3217+
let Inst{12-10} = Zk;
3218+
let Inst{9-6} = Zn;
3219+
let Inst{5-4} = imm;
3220+
let Inst{3} = opc{0};
3221+
let Inst{2-1} = 0b00;
3222+
let Inst{0} = ZAda;
3223+
3224+
let Constraints = "$ZAda = $_ZAda";
3225+
}
3226+
3227+
class sme_tmopa_32b<bits<5> opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
3228+
: I<(outs TileOp32:$ZAda),
3229+
(ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm, ZK:$Zk, VectorIndexS32b:$imm),
3230+
mnemonic, "\t$ZAda, $Zn, $Zm, $Zk$imm",
3231+
"", []>,
3232+
Sched<[]> {
3233+
bits<2> ZAda;
3234+
bits<4> Zn;
3235+
bits<5> Zm;
3236+
bits<3> Zk;
3237+
bits<2> imm;
3238+
let Inst{31-25} = 0b1000000;
3239+
let Inst{24} = opc{4};
3240+
let Inst{23-22} = 0b01;
3241+
let Inst{21} = opc{3};
3242+
let Inst{20-16} = Zm;
3243+
let Inst{15} = opc{2};
3244+
let Inst{14} = 0b0;
3245+
let Inst{13} = opc{1};
3246+
let Inst{12-10} = Zk;
3247+
let Inst{9-6} = Zn;
3248+
let Inst{5-4} = imm;
3249+
let Inst{3} = opc{0};
3250+
let Inst{2} = 0b0;
3251+
let Inst{1-0} = ZAda;
3252+
3253+
let Constraints = "$ZAda = $_ZAda";
3254+
}
3255+
3256+
31953257
//===----------------------------------------------------------------------===///
31963258
// SME2 Zero Lookup Table.
31973259
class sme2_zero_zt<string mnemonic, bits<4> opc>
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-b16b16 2>&1 < %s| FileCheck %s
2+
3+
// --------------------------------------------------------------------------//
4+
// Invalid ZA register (range)
5+
6+
bftmopa za2.h, {z30.h-z31.h}, z31.h, z31[3]
7+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
8+
// CHECK-NEXT: bftmopa za2.h, {z30.h-z31.h}, z31.h, z31[3]
9+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
10+
11+
bftmopa za4.s, {z30.h-z31.h}, z31.h, z31[3]
12+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
13+
// CHECK-NEXT: bftmopa za4.s, {z30.h-z31.h}, z31.h, z31[3]
14+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
15+
16+
// --------------------------------------------------------------------------//
17+
// Invalid ZA register (type-suffix)
18+
19+
bftmopa za3.d, {z28.h-z29.h}, z31.h, z20[3]
20+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
21+
// CHECK-NEXT: bftmopa za3.d, {z28.h-z29.h}, z31.h, z20[3]
22+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
23+
24+
// --------------------------------------------------------------------------//
25+
// Invalid vector list operand
26+
27+
bftmopa za0.h, {z28.h-z31.h}, z31.h, z31[3]
28+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
29+
// CHECK-NEXT: bftmopa za0.h, {z28.h-z31.h}, z31.h, z31[3]
30+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
31+
32+
bftmopa za0.h, {z29.h-z30.h}, z31.h, z31[3]
33+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
34+
// CHECK-NEXT: bftmopa za0.h, {z29.h-z30.h}, z31.h, z31[3]
35+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
36+
37+
bftmopa za0.s, {z28.h-z31.h}, z31.h, z31[3]
38+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
39+
// CHECK-NEXT: bftmopa za0.s, {z28.h-z31.h}, z31.h, z31[3]
40+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
41+
42+
bftmopa za3.s, {z29.h-z30.h}, z31.h, z31[3]
43+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
44+
// CHECK-NEXT: bftmopa za3.s, {z29.h-z30.h}, z31.h, z31[3]
45+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
46+
47+
// --------------------------------------------------------------------------//
48+
// Invalid ZK register
49+
50+
bftmopa za0.h, {z28.h-z29.h}, z31.h, z19[3]
51+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
52+
// CHECK-NEXT: bftmopa za0.h, {z28.h-z29.h}, z31.h, z19[3]
53+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
54+
55+
bftmopa za0.h, {z28.h-z29.h}, z31.h, z24[3]
56+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
57+
// CHECK-NEXT: bftmopa za0.h, {z28.h-z29.h}, z31.h, z24[3]
58+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
59+
60+
bftmopa za3.s, {z28.h-z29.h}, z31.h, z19[3]
61+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
62+
// CHECK-NEXT: bftmopa za3.s, {z28.h-z29.h}, z31.h, z19[3]
63+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
64+
65+
bftmopa za3.s, {z28.h-z29.h}, z31.h, z27[3]
66+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
67+
// CHECK-NEXT: bftmopa za3.s, {z28.h-z29.h}, z31.h, z27[3]
68+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
69+
70+
bftmopa za0.h, {z28.h-z29.h}, z31.h, z21.h[3]
71+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
72+
// CHECK-NEXT: bftmopa za0.h, {z28.h-z29.h}, z31.h, z21.h[3]
73+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
74+
75+
bftmopa za0.s, {z28.h-z29.h}, z31.h, z30.h[3]
76+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
77+
// CHECK-NEXT: bftmopa za0.s, {z28.h-z29.h}, z31.h, z30.h[3]
78+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
79+
80+
// --------------------------------------------------------------------------//
81+
// Invalid immediate
82+
83+
bftmopa za0.h, {z28.h-z29.h}, z31.h, z20[4]
84+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]
85+
// CHECK-NEXT: bftmopa za0.h, {z28.h-z29.h}, z31.h, z20[4]
86+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
87+
88+
bftmopa za3.s, {z28.h-z29.h}, z31.h, z20[4]
89+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]
90+
// CHECK-NEXT: bftmopa za3.s, {z28.h-z29.h}, z31.h, z20[4]
91+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
92+
93+
// --------------------------------------------------------------------------//
94+
// Invalid ZPR type suffix
95+
96+
bftmopa za0.h, {z28.h-z29.h}, z31.s, z20[3]
97+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
98+
// CHECK-NEXT: bftmopa za0.h, {z28.h-z29.h}, z31.s, z20[3]
99+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
100+
101+
bftmopa za0.h, {z28.b-z29.b}, z31.b, z20[3]
102+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
103+
// CHECK-NEXT: bftmopa za0.h, {z28.b-z29.b}, z31.b, z20[3]
104+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
105+
106+
bftmopa za3.s, {z28.h-z29.h}, z31.s, z20[3]
107+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
108+
// CHECK-NEXT: bftmopa za3.s, {z28.h-z29.h}, z31.s, z20[3]
109+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
110+
111+
bftmopa za3.s, {z28.s-z29.s}, z31.s, z20[3]
112+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
113+
// CHECK-NEXT: bftmopa za3.s, {z28.s-z29.s}, z31.s, z20[3]
114+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

llvm/test/MC/AArch64/SME2p2/bftmopa.s

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-b16b16 < %s \
2+
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
3+
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
4+
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
5+
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-b16b16 < %s \
6+
// RUN: | llvm-objdump -d --mattr=+sme2p2,+sme-b16b16 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
7+
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-b16b16 < %s \
8+
// RUN: | llvm-objdump -d --mattr=-sme2p2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
9+
// Disassemble encoding and check the re-encoding (-show-encoding) matches.
10+
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-b16b16 < %s \
11+
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
12+
// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-b16b16 -disassemble -show-encoding \
13+
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
14+
15+
// non-widening
16+
17+
bftmopa za0.h, {z0.h-z1.h}, z0.h, z20[0] // 10000001-01100000-00000000-00001000
18+
// CHECK-INST: bftmopa za0.h, { z0.h, z1.h }, z0.h, z20[0]
19+
// CHECK-ENCODING: [0x08,0x00,0x60,0x81]
20+
// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16
21+
// CHECK-UNKNOWN: 81600008 <unknown>
22+
23+
bftmopa za1.h, {z12.h-z13.h}, z8.h, z23[3] // 10000001-01101000-00001101-10111001
24+
// CHECK-INST: bftmopa za1.h, { z12.h, z13.h }, z8.h, z23[3]
25+
// CHECK-ENCODING: [0xb9,0x0d,0x68,0x81]
26+
// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16
27+
// CHECK-UNKNOWN: 81680db9 <unknown>
28+
29+
bftmopa za1.h, {z30.h-z31.h}, z31.h, z31[3] // 10000001-01111111-00011111-11111001
30+
// CHECK-INST: bftmopa za1.h, { z30.h, z31.h }, z31.h, z31[3]
31+
// CHECK-ENCODING: [0xf9,0x1f,0x7f,0x81]
32+
// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16
33+
// CHECK-UNKNOWN: 817f1ff9 <unknown>
34+
35+
// widening
36+
37+
bftmopa za0.s, {z0.h-z1.h}, z0.h, z20[0] // 10000001-01000000-00000000-00000000
38+
// CHECK-INST: bftmopa za0.s, { z0.h, z1.h }, z0.h, z20[0]
39+
// CHECK-ENCODING: [0x00,0x00,0x40,0x81]
40+
// CHECK-ERROR: instruction requires: sme2p2
41+
// CHECK-UNKNOWN: 81400000 <unknown>
42+
43+
bftmopa za3.s, {z12.h-z13.h}, z8.h, z23[3] // 10000001-01001000-00001101-10110011
44+
// CHECK-INST: bftmopa za3.s, { z12.h, z13.h }, z8.h, z23[3]
45+
// CHECK-ENCODING: [0xb3,0x0d,0x48,0x81]
46+
// CHECK-ERROR: instruction requires: sme2p2
47+
// CHECK-UNKNOWN: 81480db3 <unknown>
48+
49+
bftmopa za3.s, {z30.h-z31.h}, z31.h, z31[3] // 10000001-01011111-00011111-11110011
50+
// CHECK-INST: bftmopa za3.s, { z30.h, z31.h }, z31.h, z31[3]
51+
// CHECK-ENCODING: [0xf3,0x1f,0x5f,0x81]
52+
// CHECK-ERROR: instruction requires: sme2p2
53+
// CHECK-UNKNOWN: 815f1ff3 <unknown>
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
2+
3+
.arch armv9-a+sme2p2
4+
.arch armv9-a+nosme2p2
5+
ftmopa za0.s, {z0.s-z1.s}, z0.s, z20[0]
6+
// CHECK: error: instruction requires: sme2p2
7+
// CHECK: ftmopa za0.s, {z0.s-z1.s}, z0.s, z20[0]
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// RUN: llvm-mc -triple aarch64 -o - %s 2>&1 | FileCheck %s
2+
3+
.arch armv9-a+sme2p2
4+
ftmopa za0.s, {z0.s-z1.s}, z0.s, z20[0]
5+
// CHECK: ftmopa za0.s, { z0.s, z1.s }, z0.s, z20[0]
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
2+
3+
.arch_extension sme2p2
4+
.arch_extension nosme2p2
5+
ftmopa za0.s, {z0.s-z1.s}, z0.s, z20[0]
6+
// CHECK: error: instruction requires: sme2p2
7+
// CHECK: ftmopa za0.s, {z0.s-z1.s}, z0.s, z20[0]
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// RUN: llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
2+
3+
.arch_extension sme2p2
4+
ftmopa za0.s, {z0.s-z1.s}, z0.s, z20[0]
5+
// CHECK: ftmopa za0.s, { z0.s, z1.s }, z0.s, z20[0]

0 commit comments

Comments
 (0)