Skip to content

Commit a92f54f

Browse files
committed
aarch64: Improve vector constant generation using SVE INDEX instruction [PR113328]
SVE's INDEX instruction can be used to populate vectors by values starting from "base" and incremented by "step" for each subsequent value. We can take advantage of it to generate vector constants if TARGET_SVE is available and the base and step values are within [-16, 15]. For example, with the following function: typedef int v4si __attribute__ ((vector_size (16))); v4si f_v4si (void) { return (v4si){ 0, 1, 2, 3 }; } GCC currently generates: f_v4si: adrp x0, .LC4 ldr q0, [x0, #:lo12:.LC4] ret .LC4: .word 0 .word 1 .word 2 .word 3 With this patch, we generate an INDEX instruction instead if TARGET_SVE is available. f_v4si: index z0.s, #0, #1 ret PR target/113328 gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_simd_valid_immediate): Improve handling of some ADVSIMD vectors by using SVE's INDEX if TARGET_SVE is available. (aarch64_output_simd_mov_immediate): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/acle/general/dupq_1.c: Update test to use SVE's INDEX instruction. * gcc.target/aarch64/sve/acle/general/dupq_2.c: Likewise. * gcc.target/aarch64/sve/acle/general/dupq_3.c: Likewise. * gcc.target/aarch64/sve/acle/general/dupq_4.c: Likewise. * gcc.target/aarch64/sve/vec_init_3.c: New test. Signed-off-by: Pengxuan Zheng <[email protected]>
1 parent 58bc39c commit a92f54f

File tree

6 files changed

+115
-9
lines changed

6 files changed

+115
-9
lines changed

gcc/config/aarch64/aarch64.cc

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22987,7 +22987,8 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
2298722987
if (CONST_VECTOR_P (op)
2298822988
&& CONST_VECTOR_DUPLICATE_P (op))
2298922989
n_elts = CONST_VECTOR_NPATTERNS (op);
22990-
else if ((vec_flags & VEC_SVE_DATA)
22990+
else if (which == AARCH64_CHECK_MOV
22991+
&& TARGET_SVE
2299122992
&& const_vec_series_p (op, &base, &step))
2299222993
{
2299322994
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
@@ -25245,6 +25246,16 @@ aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
2524525246

2524625247
if (which == AARCH64_CHECK_MOV)
2524725248
{
25249+
if (info.insn == simd_immediate_info::INDEX)
25250+
{
25251+
gcc_assert (TARGET_SVE);
25252+
snprintf (templ, sizeof (templ), "index\t%%Z0.%c, #"
25253+
HOST_WIDE_INT_PRINT_DEC ", #" HOST_WIDE_INT_PRINT_DEC,
25254+
element_char, INTVAL (info.u.index.base),
25255+
INTVAL (info.u.index.step));
25256+
return templ;
25257+
}
25258+
2524825259
mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi";
2524925260
shift_op = (info.u.mov.modifier == simd_immediate_info::MSL
2525025261
? "msl" : "lsl");

gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ dupq (int x)
1010
return svdupq_s32 (x, 1, 2, 3);
1111
}
1212

13-
/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
13+
/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */
1414
/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
1515
/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
16-
/* { dg-final { scan-assembler {\t\.word\t1\n\t\.word\t2\n\t\.word\t3\n} } } */

gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ dupq (int x)
1010
return svdupq_s32 (x, 1, 2, 3);
1111
}
1212

13-
/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
13+
/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
1414
/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
1515
/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
16-
/* { dg-final { scan-assembler {\t\.word\t3\n\t\.word\t2\n\t\.word\t1\n} } } */

gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ dupq (int x)
1010
return svdupq_s32 (0, 1, x, 3);
1111
}
1212

13-
/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
13+
/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */
1414
/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
1515
/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
16-
/* { dg-final { scan-assembler {\t\.word\t0\n\t\.word\t1\n\t\.word\t[^\n]*\n\t\.word\t3\n} } } */

gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ dupq (int x)
1010
return svdupq_s32 (0, 1, x, 3);
1111
}
1212

13-
/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
13+
/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
1414
/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
1515
/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
16-
/* { dg-final { scan-assembler {\t\.word\t3\n\t\.word\t[^\n]*\n\t\.word\t1\n\t\.word\t0\n} } } */
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
/* { dg-do compile } */
2+
/* { dg-options "-O2" } */
3+
/* { dg-final { check-function-bodies "**" "" "" } } */
4+
5+
typedef char v16qi __attribute__ ((vector_size (16)));
6+
typedef char v8qi __attribute__ ((vector_size (8)));
7+
typedef short v8hi __attribute__ ((vector_size (16)));
8+
typedef short v4hi __attribute__ ((vector_size (8)));
9+
typedef int v4si __attribute__ ((vector_size (16)));
10+
typedef int v2si __attribute__ ((vector_size (8)));
11+
typedef long v2di __attribute__ ((vector_size (16)));
12+
13+
/*
14+
** f_v16qi:
15+
** index z0\.b, #0, #1
16+
** ret
17+
*/
18+
v16qi
19+
f_v16qi (void)
20+
{
21+
return (v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
22+
}
23+
24+
/*
25+
** f_v8qi:
26+
** index z0\.b, #0, #1
27+
** ret
28+
*/
29+
v8qi
30+
f_v8qi (void)
31+
{
32+
return (v8qi){ 0, 1, 2, 3, 4, 5, 6, 7 };
33+
}
34+
35+
/*
36+
** f_v8hi:
37+
** index z0\.h, #0, #1
38+
** ret
39+
*/
40+
v8hi
41+
f_v8hi (void)
42+
{
43+
return (v8hi){ 0, 1, 2, 3, 4, 5, 6, 7 };
44+
}
45+
46+
/*
47+
** f_v4hi:
48+
** index z0\.h, #0, #1
49+
** ret
50+
*/
51+
v4hi
52+
f_v4hi (void)
53+
{
54+
return (v4hi){ 0, 1, 2, 3 };
55+
}
56+
57+
/*
58+
** f_v4si:
59+
** index z0\.s, #0, #1
60+
** ret
61+
*/
62+
v4si
63+
f_v4si (void)
64+
{
65+
return (v4si){ 0, 1, 2, 3 };
66+
}
67+
68+
/*
69+
** f_v2si:
70+
** index z0\.s, #0, #1
71+
** ret
72+
*/
73+
v2si
74+
f_v2si (void)
75+
{
76+
return (v2si){ 0, 1 };
77+
}
78+
79+
/*
80+
** f_v2di:
81+
** index z0\.d, #0, #1
82+
** ret
83+
*/
84+
v2di
85+
f_v2di (void)
86+
{
87+
return (v2di){ 0, 1 };
88+
}
89+
90+
/*
91+
** g_v4si:
92+
** index z0\.s, #3, #-4
93+
** ret
94+
*/
95+
v4si
96+
g_v4si (void)
97+
{
98+
return (v4si){ 3, -1, -5, -9 };
99+
}

0 commit comments

Comments
 (0)