Skip to content

Commit 4199cfa

Browse files
committed
arm: Fix wrong code generated for mve scatter store with writeback intrinsics with -O2 (PR97271).
This patch fixes (PR97271) the wrong code-gen for mve scatter store with writeback intrinsics with -O2. $cat bug.c void foo (uint32x4_t * addr, const int offset, int32x4_t value) { vstrwq_scatter_base_wb_s32 (addr, 8, value); } $ arm-none-eabi-gcc bug.c -S -O2 -march=armv8.1-m.main+mve -mfloat-abi=hard -o - Without this patch: ... foo: vldrw.32 q3, [r0] vstrw.u32 q0, [q3, gcc-mirror#8]! ---> (A) vldr.64 d4, .L3 vldr.64 d5, .L3+8 vldrw.32 q3, [r0] vstrw.u32 q2, [q3, gcc-mirror#8]! ---> (B) bx lr ... With this patch: ... foo: vldrw.32 q3, [r0] vstrw.u32 q0, [q3, gcc-mirror#8]! --> (C) vstrw.32 q3, [r0] bx lr ... Without this patch 2 vstrw assembly instructions (A and B) are generated for vstrwq_scatter_base_wb_s32 intrinsic where as fix generates only one vstrw assembly instruction (C). gcc/ChangeLog: 2020-10-06 Srinath Parvathaneni <[email protected]> PR target/97291 * config/arm/arm-builtins.c (arm_strsbwbs_qualifiers): Modify array. (arm_strsbwbu_qualifiers): Likewise. (arm_strsbwbs_p_qualifiers): Likewise. (arm_strsbwbu_p_qualifiers): Likewise. * config/arm/arm_mve.h (__arm_vstrdq_scatter_base_wb_s64): Modify function definition. (__arm_vstrdq_scatter_base_wb_u64): Likewise. (__arm_vstrdq_scatter_base_wb_p_s64): Likewise. (__arm_vstrdq_scatter_base_wb_p_u64): Likewise. (__arm_vstrwq_scatter_base_wb_p_s32): Likewise. (__arm_vstrwq_scatter_base_wb_p_u32): Likewise. (__arm_vstrwq_scatter_base_wb_s32): Likewise. (__arm_vstrwq_scatter_base_wb_u32): Likewise. (__arm_vstrwq_scatter_base_wb_f32): Likewise. (__arm_vstrwq_scatter_base_wb_p_f32): Likewise. * config/arm/arm_mve_builtins.def (vstrwq_scatter_base_wb_add_u): Remove expansion for the builtin. (vstrwq_scatter_base_wb_add_s): Likewise. (vstrwq_scatter_base_wb_add_f): Likewise. (vstrdq_scatter_base_wb_add_u): Likewise. (vstrdq_scatter_base_wb_add_s): Likewise. (vstrwq_scatter_base_wb_p_add_u): Likewise. (vstrwq_scatter_base_wb_p_add_s): Likewise. (vstrwq_scatter_base_wb_p_add_f): Likewise. (vstrdq_scatter_base_wb_p_add_u): Likewise. (vstrdq_scatter_base_wb_p_add_s): Likewise. * config/arm/mve.md (mve_vstrwq_scatter_base_wb_<supf>v4si): Remove expand. (mve_vstrwq_scatter_base_wb_add_<supf>v4si): Likewise. (mve_vstrwq_scatter_base_wb_<supf>v4si_insn): Rename pattern to ... (mve_vstrwq_scatter_base_wb_<supf>v4si): This. (mve_vstrwq_scatter_base_wb_p_<supf>v4si): Remove expand. (mve_vstrwq_scatter_base_wb_p_add_<supf>v4si): Likewise. (mve_vstrwq_scatter_base_wb_p_<supf>v4si_insn): Rename pattern to ... (mve_vstrwq_scatter_base_wb_p_<supf>v4si): This. (mve_vstrwq_scatter_base_wb_fv4sf): Remove expand. (mve_vstrwq_scatter_base_wb_add_fv4sf): Likewise. (mve_vstrwq_scatter_base_wb_fv4sf_insn): Rename pattern to ... (mve_vstrwq_scatter_base_wb_fv4sf): This. (mve_vstrwq_scatter_base_wb_p_fv4sf): Remove expand. (mve_vstrwq_scatter_base_wb_p_add_fv4sf): Likewise. (mve_vstrwq_scatter_base_wb_p_fv4sf_insn): Rename pattern to ... (mve_vstrwq_scatter_base_wb_p_fv4sf): This. (mve_vstrdq_scatter_base_wb_<supf>v2di): Remove expand. (mve_vstrdq_scatter_base_wb_add_<supf>v2di): Likewise. (mve_vstrdq_scatter_base_wb_<supf>v2di_insn): Rename pattern to ... (mve_vstrdq_scatter_base_wb_<supf>v2di): This. (mve_vstrdq_scatter_base_wb_p_<supf>v2di): Remove expand. (mve_vstrdq_scatter_base_wb_p_add_<supf>v2di): Likewise. (mve_vstrdq_scatter_base_wb_p_<supf>v2di_insn): Rename pattern to ... (mve_vstrdq_scatter_base_wb_p_<supf>v2di): This. gcc/testsuite/ChangeLog: PR target/97291 * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_s64.c: Modify. * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_u64.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_s64.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_u64.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_u32.c: Likewise. (cherry picked from commit 3775358)
1 parent b3bfb38 commit 4199cfa

14 files changed

+32
-252
lines changed

gcc/config/arm/arm-builtins.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -811,23 +811,23 @@ arm_ldrgbwbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
811811

812812
static enum arm_type_qualifiers
813813
arm_strsbwbs_qualifiers[SIMD_MAX_BUILTIN_ARGS]
814-
= { qualifier_void, qualifier_unsigned, qualifier_const, qualifier_none};
814+
= { qualifier_unsigned, qualifier_unsigned, qualifier_const, qualifier_none};
815815
#define STRSBWBS_QUALIFIERS (arm_strsbwbs_qualifiers)
816816

817817
static enum arm_type_qualifiers
818818
arm_strsbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
819-
= { qualifier_void, qualifier_unsigned, qualifier_const, qualifier_unsigned};
819+
= { qualifier_unsigned, qualifier_unsigned, qualifier_const, qualifier_unsigned};
820820
#define STRSBWBU_QUALIFIERS (arm_strsbwbu_qualifiers)
821821

822822
static enum arm_type_qualifiers
823823
arm_strsbwbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
824-
= { qualifier_void, qualifier_unsigned, qualifier_const,
824+
= { qualifier_unsigned, qualifier_unsigned, qualifier_const,
825825
qualifier_none, qualifier_unsigned};
826826
#define STRSBWBS_P_QUALIFIERS (arm_strsbwbs_p_qualifiers)
827827

828828
static enum arm_type_qualifiers
829829
arm_strsbwbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
830-
= { qualifier_void, qualifier_unsigned, qualifier_const,
830+
= { qualifier_unsigned, qualifier_unsigned, qualifier_const,
831831
qualifier_unsigned, qualifier_unsigned};
832832
#define STRSBWBU_P_QUALIFIERS (arm_strsbwbu_p_qualifiers)
833833

gcc/config/arm/arm_mve.h

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13993,64 +13993,56 @@ __extension__ extern __inline void
1399313993
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1399413994
__arm_vstrdq_scatter_base_wb_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value)
1399513995
{
13996-
__builtin_mve_vstrdq_scatter_base_wb_sv2di (*__addr, __offset, __value);
13997-
__builtin_mve_vstrdq_scatter_base_wb_add_sv2di (*__addr, __offset, *__addr);
13996+
*__addr = __builtin_mve_vstrdq_scatter_base_wb_sv2di (*__addr, __offset, __value);
1399813997
}
1399913998

1400013999
__extension__ extern __inline void
1400114000
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1400214001
__arm_vstrdq_scatter_base_wb_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value)
1400314002
{
14004-
__builtin_mve_vstrdq_scatter_base_wb_uv2di (*__addr, __offset, __value);
14005-
__builtin_mve_vstrdq_scatter_base_wb_add_uv2di (*__addr, __offset, *__addr);
14003+
*__addr = __builtin_mve_vstrdq_scatter_base_wb_uv2di (*__addr, __offset, __value);
1400614004
}
1400714005

1400814006
__extension__ extern __inline void
1400914007
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1401014008
__arm_vstrdq_scatter_base_wb_p_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value, mve_pred16_t __p)
1401114009
{
14012-
__builtin_mve_vstrdq_scatter_base_wb_p_sv2di (*__addr, __offset, __value, __p);
14013-
__builtin_mve_vstrdq_scatter_base_wb_p_add_sv2di (*__addr, __offset, *__addr, __p);
14010+
*__addr = __builtin_mve_vstrdq_scatter_base_wb_p_sv2di (*__addr, __offset, __value, __p);
1401414011
}
1401514012

1401614013
__extension__ extern __inline void
1401714014
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1401814015
__arm_vstrdq_scatter_base_wb_p_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value, mve_pred16_t __p)
1401914016
{
14020-
__builtin_mve_vstrdq_scatter_base_wb_p_uv2di (*__addr, __offset, __value, __p);
14021-
__builtin_mve_vstrdq_scatter_base_wb_p_add_uv2di (*__addr, __offset, *__addr, __p);
14017+
*__addr = __builtin_mve_vstrdq_scatter_base_wb_p_uv2di (*__addr, __offset, __value, __p);
1402214018
}
1402314019

1402414020
__extension__ extern __inline void
1402514021
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1402614022
__arm_vstrwq_scatter_base_wb_p_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value, mve_pred16_t __p)
1402714023
{
14028-
__builtin_mve_vstrwq_scatter_base_wb_p_sv4si (*__addr, __offset, __value, __p);
14029-
__builtin_mve_vstrwq_scatter_base_wb_p_add_sv4si (*__addr, __offset, *__addr, __p);
14024+
*__addr = __builtin_mve_vstrwq_scatter_base_wb_p_sv4si (*__addr, __offset, __value, __p);
1403014025
}
1403114026

1403214027
__extension__ extern __inline void
1403314028
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1403414029
__arm_vstrwq_scatter_base_wb_p_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value, mve_pred16_t __p)
1403514030
{
14036-
__builtin_mve_vstrwq_scatter_base_wb_p_uv4si (*__addr, __offset, __value, __p);
14037-
__builtin_mve_vstrwq_scatter_base_wb_p_add_uv4si (*__addr, __offset, *__addr, __p);
14031+
*__addr = __builtin_mve_vstrwq_scatter_base_wb_p_uv4si (*__addr, __offset, __value, __p);
1403814032
}
1403914033

1404014034
__extension__ extern __inline void
1404114035
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1404214036
__arm_vstrwq_scatter_base_wb_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value)
1404314037
{
14044-
__builtin_mve_vstrwq_scatter_base_wb_sv4si (*__addr, __offset, __value);
14045-
__builtin_mve_vstrwq_scatter_base_wb_add_sv4si (*__addr, __offset, *__addr);
14038+
*__addr = __builtin_mve_vstrwq_scatter_base_wb_sv4si (*__addr, __offset, __value);
1404614039
}
1404714040

1404814041
__extension__ extern __inline void
1404914042
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1405014043
__arm_vstrwq_scatter_base_wb_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value)
1405114044
{
14052-
__builtin_mve_vstrwq_scatter_base_wb_uv4si (*__addr, __offset, __value);
14053-
__builtin_mve_vstrwq_scatter_base_wb_add_uv4si (*__addr, __offset, *__addr);
14045+
*__addr = __builtin_mve_vstrwq_scatter_base_wb_uv4si (*__addr, __offset, __value);
1405414046
}
1405514047

1405614048
__extension__ extern __inline uint8x16_t
@@ -19158,16 +19150,14 @@ __extension__ extern __inline void
1915819150
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1915919151
__arm_vstrwq_scatter_base_wb_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value)
1916019152
{
19161-
__builtin_mve_vstrwq_scatter_base_wb_fv4sf (*__addr, __offset, __value);
19162-
__builtin_mve_vstrwq_scatter_base_wb_add_fv4sf (*__addr, __offset, *__addr);
19153+
*__addr = __builtin_mve_vstrwq_scatter_base_wb_fv4sf (*__addr, __offset, __value);
1916319154
}
1916419155

1916519156
__extension__ extern __inline void
1916619157
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1916719158
__arm_vstrwq_scatter_base_wb_p_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value, mve_pred16_t __p)
1916819159
{
19169-
__builtin_mve_vstrwq_scatter_base_wb_p_fv4sf (*__addr, __offset, __value, __p);
19170-
__builtin_mve_vstrwq_scatter_base_wb_p_add_fv4sf (*__addr, __offset, *__addr, __p);
19160+
*__addr = __builtin_mve_vstrwq_scatter_base_wb_p_fv4sf (*__addr, __offset, __value, __p);
1917119161
}
1917219162

1917319163
__extension__ extern __inline float16x8_t

gcc/config/arm/arm_mve_builtins.def

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -828,19 +828,9 @@ VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vidupq_m_n_u, v16qi, v8hi, v4si)
828828
VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vdwdupq_n_u, v16qi, v4si, v8hi)
829829
VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, viwdupq_n_u, v16qi, v4si, v8hi)
830830
VAR1 (STRSBWBU, vstrwq_scatter_base_wb_u, v4si)
831-
VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_u, v4si)
832-
VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_s, v4si)
833-
VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_f, v4sf)
834831
VAR1 (STRSBWBU, vstrdq_scatter_base_wb_u, v2di)
835-
VAR1 (STRSBWBU, vstrdq_scatter_base_wb_add_u, v2di)
836-
VAR1 (STRSBWBU, vstrdq_scatter_base_wb_add_s, v2di)
837832
VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_u, v4si)
838-
VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_u, v4si)
839-
VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_s, v4si)
840-
VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_f, v4sf)
841833
VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_u, v2di)
842-
VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_add_u, v2di)
843-
VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_add_s, v2di)
844834
VAR1 (STRSBWBS, vstrwq_scatter_base_wb_s, v4si)
845835
VAR1 (STRSBWBS, vstrwq_scatter_base_wb_f, v4sf)
846836
VAR1 (STRSBWBS, vstrdq_scatter_base_wb_s, v2di)

0 commit comments

Comments
 (0)