Skip to content

Commit b41115c

Browse files
committed
AArch64: Fix invalid immediate offsets in SVE gather/scatter [PR121449]
This patch fixes incorrect constraints in RTL patterns for AArch64 SVE gather/scatter with type widening/narrowing and vector-plus-immediate addressing. The bug leads to below "immediate offset out of range" errors during assembly, eventually causing compilation failures. /tmp/ccsVqBp1.s: Assembler messages: /tmp/ccsVqBp1.s:54: Error: immediate offset out of range 0 to 31 at operand 3 -- `ld1b z1.d,p0/z,[z1.d,#64]' Current RTL patterns for such instructions incorrectly use vgw or vgd constraints for the immediate operand, base on the vector element type in Z registers (zN.s or zN.d). However, for gather/scatter with type conversions, the immediate range for vector-plus-immediate addressing is determined by the element type in memory, which differs from that in vector registers. Using the wrong constraint can produce out-of-range offset values that cannot be encoded in the instruction. This patch corrects the constraints used in these patterns. A test case that reproduces the issue is also included. Bootstrapped and regression-tested on aarch64-linux-gnu. gcc/ChangeLog: PR target/121449 * config/aarch64/aarch64-sve.md (mask_gather_load<mode><v_int_container>): Use vg<Vesize> constraints for alternatives with immediate offset. (mask_scatter_store<mode><v_int_container>): Likewise. gcc/testsuite/ChangeLog: PR target/121449 * g++.target/aarch64/sve/pr121449.C: New test.
1 parent 1803809 commit b41115c

File tree

2 files changed

+76
-32
lines changed

2 files changed

+76
-32
lines changed

gcc/config/aarch64/aarch64-sve.md

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1458,18 +1458,18 @@
14581458
UNSPEC_LD1_GATHER))]
14591459
"TARGET_SVE && TARGET_NON_STREAMING"
14601460
{@ [cons: =0, 1, 2, 3, 4, 5 ]
1461-
[&w, Z, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s]
1462-
[?w, Z, 0, Ui1, Ui1, Upl] ^
1463-
[&w, vgw, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1464-
[?w, vgw, 0, Ui1, Ui1, Upl] ^
1465-
[&w, rk, w, Z, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1466-
[?w, rk, 0, Z, Ui1, Upl] ^
1467-
[&w, rk, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1468-
[?w, rk, 0, Ui1, Ui1, Upl] ^
1469-
[&w, rk, w, Z, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1470-
[?w, rk, 0, Z, i, Upl] ^
1471-
[&w, rk, w, Ui1, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1472-
[?w, rk, 0, Ui1, i, Upl] ^
1461+
[&w, Z, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s]
1462+
[?w, Z, 0, Ui1, Ui1, Upl] ^
1463+
[&w, vg<Vesize>, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1464+
[?w, vg<Vesize>, 0, Ui1, Ui1, Upl] ^
1465+
[&w, rk, w, Z, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1466+
[?w, rk, 0, Z, Ui1, Upl] ^
1467+
[&w, rk, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1468+
[?w, rk, 0, Ui1, Ui1, Upl] ^
1469+
[&w, rk, w, Z, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1470+
[?w, rk, 0, Z, i, Upl] ^
1471+
[&w, rk, w, Ui1, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1472+
[?w, rk, 0, Ui1, i, Upl] ^
14731473
}
14741474
)
14751475

@@ -1487,14 +1487,14 @@
14871487
UNSPEC_LD1_GATHER))]
14881488
"TARGET_SVE && TARGET_NON_STREAMING"
14891489
{@ [cons: =0, 1, 2, 3, 4, 5]
1490-
[&w, Z, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d]
1491-
[?w, Z, 0, i, Ui1, Upl] ^
1492-
[&w, vgd, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1493-
[?w, vgd, 0, i, Ui1, Upl] ^
1494-
[&w, rk, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1495-
[?w, rk, 0, i, Ui1, Upl] ^
1496-
[&w, rk, w, i, i, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1497-
[?w, rk, 0, i, i, Upl] ^
1490+
[&w, Z, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d]
1491+
[?w, Z, 0, i, Ui1, Upl] ^
1492+
[&w, vg<Vesize>, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1493+
[?w, vg<Vesize>, 0, i, Ui1, Upl] ^
1494+
[&w, rk, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1495+
[?w, rk, 0, i, Ui1, Upl] ^
1496+
[&w, rk, w, i, i, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1497+
[?w, rk, 0, i, i, Upl] ^
14981498
}
14991499
)
15001500

@@ -2378,13 +2378,13 @@
23782378
(match_operand:SVE_4 4 "register_operand")]
23792379
UNSPEC_ST1_SCATTER))]
23802380
"TARGET_SVE && TARGET_NON_STREAMING"
2381-
{@ [ cons: 0 , 1 , 2 , 3 , 4 , 5 ]
2382-
[ Z , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s]
2383-
[ vgw , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2384-
[ rk , w , Z , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2385-
[ rk , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2386-
[ rk , w , Z , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2387-
[ rk , w , Ui1 , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]
2381+
{@ [ cons: 0 , 1 , 2 , 3 , 4 , 5 ]
2382+
[ Z , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s]
2383+
[ vg<Vesize> , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2384+
[ rk , w , Z , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2385+
[ rk , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2386+
[ rk , w , Z , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2387+
[ rk , w , Ui1 , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]
23882388
}
23892389
)
23902390

@@ -2401,11 +2401,11 @@
24012401
(match_operand:SVE_2 4 "register_operand")]
24022402
UNSPEC_ST1_SCATTER))]
24032403
"TARGET_SVE && TARGET_NON_STREAMING"
2404-
{@ [ cons: 0 , 1 , 3 , 4 , 5 ]
2405-
[ Z , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d]
2406-
[ vgd , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2407-
[ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d]
2408-
[ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]
2404+
{@ [ cons: 0 , 1 , 3 , 4 , 5 ]
2405+
[ Z , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d]
2406+
[ vg<Vesize> , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2407+
[ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d]
2408+
[ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]
24092409
}
24102410
)
24112411

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/* PR target/121449 */
2+
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
3+
/* { dg-options "-O3 -save-temps" } */
4+
5+
struct example;
6+
7+
struct array {
8+
unsigned length();
9+
example *operator[](unsigned i) {
10+
example **data = reinterpret_cast<example **>(this);
11+
return data[i];
12+
}
13+
};
14+
15+
struct example {
16+
int a[16];
17+
bool is_even;
18+
int version;
19+
int count() { return is_even ? 2 : 1; }
20+
void fun1(int, long);
21+
void fun2(unsigned, unsigned);
22+
void process(array &, array &);
23+
};
24+
25+
bool found;
26+
27+
void example::process(array &a, array &b) {
28+
for (unsigned i = 1; a.length(); i++) {
29+
long total = 0;
30+
for (unsigned k = 0; k <= i; k++) {
31+
total += a[k]->count();
32+
}
33+
for (unsigned j = 0; j < i; j++) {
34+
int major = b[j]->version;
35+
if (found)
36+
major += i;
37+
fun1(i + 1, total);
38+
fun2(j, major);
39+
}
40+
}
41+
}
42+
43+
/* { dg-final { scan-assembler-not {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[(z[0-9]+)\.d, #64\]} } } */
44+

0 commit comments

Comments
 (0)