diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 80e098eb1ea15..793dbfb559b0a 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -399,6 +399,26 @@ void AArch64RegisterBankInfo::applyMappingImpl( MI.getOperand(1).setReg(ConstReg); return applyDefaultMapping(OpdMapper); } + case TargetOpcode::G_EXTRACT_VECTOR_ELT: { + // SDAG will promote a 64bit G_EXTRACT_VECTOR_ELT to 128 to reduce the + // number of duplicate lane-extract patterns needed. Do the same here so + // that selection will operate on the larger vectors. + Register Src = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(Src); + assert(SrcTy.getSizeInBits() == 64 && "Expected 64-bit source vector"); + LLT DstTy = SrcTy.multiplyElements(2); + Builder.setInsertPt(*MI.getParent(), MI.getIterator()); + auto Undef = Builder.buildUndef(SrcTy); + auto Concat = Builder.buildConcatVectors(DstTy, {Src, Undef.getReg(0)}); + MRI.setRegBank(Undef.getReg(0), getRegBank(AArch64::FPRRegBankID)); + MRI.setRegBank(Concat.getReg(0), getRegBank(AArch64::FPRRegBankID)); + for (MachineInstr &Ext : + make_early_inc_range(MRI.use_nodbg_instructions(Src))) { + if (Ext.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) + Ext.getOperand(1).setReg(Concat.getReg(0)); + } + return applyDefaultMapping(OpdMapper); + } default: llvm_unreachable("Don't know how to handle that operation"); } @@ -1014,14 +1034,20 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } break; } - case TargetOpcode::G_EXTRACT_VECTOR_ELT: + case TargetOpcode::G_EXTRACT_VECTOR_ELT: { // Destination and source need to be FPRs. OpRegBankIdx[0] = PMI_FirstFPR; OpRegBankIdx[1] = PMI_FirstFPR; - - // Index needs to be a GPR. + // Index needs to be a GPR constant. OpRegBankIdx[2] = PMI_FirstGPR; + // SDAG will promote a 64bit G_EXTRACT_VECTOR_ELT to 128 to reduce the + // number of duplicate lane-extract patterns needed. Do the same here so + // that selection will operate on the larger vectors. + LLT Ty = MRI.getType(MI.getOperand(1).getReg()); + if (!Ty.isScalable() && Ty.getSizeInBits() == 64) + MappingID = CustomMappingID; break; + } case TargetOpcode::G_INSERT_VECTOR_ELT: OpRegBankIdx[0] = PMI_FirstFPR; OpRegBankIdx[1] = PMI_FirstFPR; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-extract-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-extract-vector-elt.mir index 35bc36d472b1a..4e569e0bc7e5f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-extract-vector-elt.mir @@ -94,7 +94,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(<4 x s16>) = COPY $d0 ; CHECK-NEXT: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:fpr(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s16>), [[C]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:fpr(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<4 x s16>), [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:fpr(s16) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](<8 x s16>), [[C]](s64) ; CHECK-NEXT: $h0 = COPY [[EVEC]](s16) ; CHECK-NEXT: RET_ReallyLR implicit $h0 %0:_(<4 x s16>) = COPY $d0 diff --git a/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll b/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll index 7f922c0047553..287344bdbd29f 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll @@ -70,6 +70,9 @@ define <1 x i32> @test_bitf_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) { ; ; CHECK-GI-LABEL: test_bitf_v1i32: ; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-GI-NEXT: fmov w8, s2 ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: fmov w10, s0 diff --git a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll index b8eb8269d605c..73fcee56506f9 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll @@ -70,6 +70,9 @@ define <1 x i32> @test_bit_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) { ; ; CHECK-GI-LABEL: test_bit_v1i32: ; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-GI-NEXT: fmov w8, s2 ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: fmov w10, s0 diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll index 0f56d25a47b2a..470d68a805718 100644 --- a/llvm/test/CodeGen/AArch64/abs.ll +++ b/llvm/test/CodeGen/AArch64/abs.ll @@ -243,6 +243,7 @@ define <1 x i32> @abs_v1i32(<1 x i32> %a){ ; ; CHECK-GI-LABEL: abs_v1i32: ; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: fmov w9, s0 ; CHECK-GI-NEXT: cmp w8, #0 diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll index 367105f783817..60af49d867be7 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -1215,6 +1215,7 @@ define <8 x i8> @testDUP.v1i8(<1 x i8> %a) { ; ; CHECK-GI-LABEL: testDUP.v1i8: ; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: dup v0.8b, w8 ; CHECK-GI-NEXT: ret @@ -1710,7 +1711,7 @@ define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { ; CHECK-GI-NEXT: mov v2.16b, v1.16b ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: adrp x8, .LCPI127_0 -; CHECK-GI-NEXT: mov v1.b[0], v0.b[0] +; CHECK-GI-NEXT: mov b1, v0.b[0] ; CHECK-GI-NEXT: mov v1.b[1], v0.b[1] ; CHECK-GI-NEXT: mov v1.b[2], v0.b[2] ; CHECK-GI-NEXT: mov v1.b[3], v0.b[3] @@ -1817,7 +1818,7 @@ define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 { ; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v8i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] +; CHECK-GI-NEXT: mov b2, v0.b[0] ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-GI-NEXT: mov v2.b[1], v0.b[1] ; CHECK-GI-NEXT: mov v2.b[2], v0.b[2] @@ -1903,7 +1904,7 @@ define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 { ; CHECK-GI-NEXT: mov v2.16b, v1.16b ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: adrp x8, .LCPI131_0 -; CHECK-GI-NEXT: mov v1.h[0], v0.h[0] +; CHECK-GI-NEXT: mov h1, v0.h[0] ; CHECK-GI-NEXT: mov v1.h[1], v0.h[1] ; CHECK-GI-NEXT: mov v1.h[2], v0.h[2] ; CHECK-GI-NEXT: mov v1.h[3], v0.h[3] @@ -1974,7 +1975,7 @@ define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 { ; CHECK-GI-LABEL: test_concat_v8i16_v4i16_v4i16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov v2.h[0], v0.h[0] +; CHECK-GI-NEXT: mov h2, v0.h[0] ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-GI-NEXT: mov v2.h[1], v0.h[1] ; CHECK-GI-NEXT: mov v2.h[2], v0.h[2] @@ -2036,7 +2037,7 @@ define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 { ; CHECK-GI-NEXT: mov v2.16b, v1.16b ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: adrp x8, .LCPI135_0 -; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] +; CHECK-GI-NEXT: mov s1, v0.s[0] ; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI135_0] ; CHECK-GI-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b @@ -2242,6 +2243,7 @@ define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) { ; ; CHECK-GI-LABEL: concat_vector_v8i8: ; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: dup v0.8b, w8 ; CHECK-GI-NEXT: ret @@ -2268,6 +2270,7 @@ define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) { ; ; CHECK-GI-LABEL: concat_vector_v16i8: ; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: dup v0.16b, w8 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll b/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll index f47c06e1ba4cb..ac6f041ccd70d 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll @@ -614,16 +614,11 @@ entry: } define void @test_vst1_lane0_s16(ptr %a, <4 x i16> %b) { -; CHECK-GI-LABEL: test_vst1_lane0_s16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str h0, [x0] -; CHECK-GI-NEXT: ret -; -; CHECK-SD-LABEL: test_vst1_lane0_s16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: str h0, [x0] -; CHECK-SD-NEXT: ret +; CHECK-LABEL: test_vst1_lane0_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str h0, [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <4 x i16> %b, i32 0 store i16 %0, ptr %a, align 2 @@ -643,16 +638,11 @@ entry: } define void @test_vst1_lane0_s32(ptr %a, <2 x i32> %b) { -; CHECK-GI-LABEL: test_vst1_lane0_s32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str s0, [x0] -; CHECK-GI-NEXT: ret -; -; CHECK-SD-LABEL: test_vst1_lane0_s32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: str s0, [x0] -; CHECK-SD-NEXT: ret +; CHECK-LABEL: test_vst1_lane0_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <2 x i32> %b, i32 0 store i32 %0, ptr %a, align 4 @@ -683,16 +673,11 @@ entry: } define void @test_vst1_lane0_f32(ptr %a, <2 x float> %b) { -; CHECK-GI-LABEL: test_vst1_lane0_f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str s0, [x0] -; CHECK-GI-NEXT: ret -; -; CHECK-SD-LABEL: test_vst1_lane0_f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: str s0, [x0] -; CHECK-SD-NEXT: ret +; CHECK-LABEL: test_vst1_lane0_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <2 x float> %b, i32 0 store float %0, ptr %a, align 4 diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll index cb14adc00df00..1f8ac792d75f5 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll @@ -663,24 +663,14 @@ entry: } define i32 @test_vqrdmlahs_lane_s32(i32 %a, i32 %b, <2 x i32> %c) { -; CHECK-SD-LABEL: test_vqrdmlahs_lane_s32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmov s1, w0 -; CHECK-SD-NEXT: fmov s2, w1 -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: sqrdmlah s1, s2, v0.s[1] -; CHECK-SD-NEXT: fmov w0, s1 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vqrdmlahs_lane_s32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: fmov s1, w0 -; CHECK-GI-NEXT: fmov s2, w1 -; CHECK-GI-NEXT: mov s0, v0.s[1] -; CHECK-GI-NEXT: sqrdmlah s1, s2, s0 -; CHECK-GI-NEXT: fmov w0, s1 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vqrdmlahs_lane_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s2, w1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: sqrdmlah s1, s2, v0.s[1] +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: ret entry: %vget_lane = extractelement <2 x i32> %c, i64 1 %vqrdmlahs_s32.i = tail call i32 @llvm.aarch64.neon.sqrdmlah.i32(i32 %a, i32 %b, i32 %vget_lane) #4 @@ -813,24 +803,14 @@ entry: } define i32 @test_vqrdmlshs_lane_s32(i32 %a, i32 %b, <2 x i32> %c) { -; CHECK-SD-LABEL: test_vqrdmlshs_lane_s32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmov s1, w0 -; CHECK-SD-NEXT: fmov s2, w1 -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: sqrdmlsh s1, s2, v0.s[1] -; CHECK-SD-NEXT: fmov w0, s1 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vqrdmlshs_lane_s32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: fmov s1, w0 -; CHECK-GI-NEXT: fmov s2, w1 -; CHECK-GI-NEXT: mov s0, v0.s[1] -; CHECK-GI-NEXT: sqrdmlsh s1, s2, s0 -; CHECK-GI-NEXT: fmov w0, s1 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vqrdmlshs_lane_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s2, w1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: sqrdmlsh s1, s2, v0.s[1] +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: ret entry: %vget_lane = extractelement <2 x i32> %c, i64 1 %vqrdmlshs_s32.i = tail call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 %a, i32 %b, i32 %vget_lane) #4 @@ -867,3 +847,6 @@ entry: %vqrdmlshs_s32.i = tail call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 %a, i32 %b, i32 %vgetq_lane) #4 ret i32 %vqrdmlshs_s32.i } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-GI: {{.*}} +; CHECK-SD: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll index d4cc154ac6afc..eccf918f74312 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll @@ -271,6 +271,7 @@ define half @test_vcvt_f16_f32(<1 x float> %x) { ; ; GISEL-LABEL: test_vcvt_f16_f32: ; GISEL: // %bb.0: +; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 ; GISEL-NEXT: fcvt h0, s0 ; GISEL-NEXT: ret %tmp = fptrunc <1 x float> %x to <1 x half> diff --git a/llvm/test/CodeGen/AArch64/bswap.ll b/llvm/test/CodeGen/AArch64/bswap.ll index 898958fb4993f..9ae4782b52bd9 100644 --- a/llvm/test/CodeGen/AArch64/bswap.ll +++ b/llvm/test/CodeGen/AArch64/bswap.ll @@ -207,6 +207,7 @@ define <1 x i32> @bswap_v1i32(<1 x i32> %a){ ; ; CHECK-GI-LABEL: bswap_v1i32: ; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: rev w8, w8 ; CHECK-GI-NEXT: fmov s0, w8 diff --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll index acf15f1bd1178..1e8dd0c78043a 100644 --- a/llvm/test/CodeGen/AArch64/concat-vector.ll +++ b/llvm/test/CodeGen/AArch64/concat-vector.ll @@ -13,11 +13,10 @@ define <4 x i8> @concat1(<2 x i8> %A, <2 x i8> %B) { ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov w8, v0.s[1] ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov w9, v1.s[1] ; CHECK-GI-NEXT: mov v0.h[1], w8 -; CHECK-GI-NEXT: fmov w8, s1 -; CHECK-GI-NEXT: mov v0.h[2], w8 -; CHECK-GI-NEXT: mov v0.h[3], w9 +; CHECK-GI-NEXT: mov w8, v1.s[1] +; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[3], w8 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %v4i8 = shufflevector <2 x i8> %A, <2 x i8> %B, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/double_reduct.ll b/llvm/test/CodeGen/AArch64/double_reduct.ll index f30895db2c098..2d146bf9aae89 100644 --- a/llvm/test/CodeGen/AArch64/double_reduct.ll +++ b/llvm/test/CodeGen/AArch64/double_reduct.ll @@ -65,10 +65,8 @@ define float @fmul_f32(<8 x float> %a, <4 x float> %b) { ; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v1.2s ; CHECK-GI-NEXT: fmul v1.2s, v2.2s, v3.2s -; CHECK-GI-NEXT: mov s2, v0.s[1] -; CHECK-GI-NEXT: mov s3, v1.s[1] -; CHECK-GI-NEXT: fmul s0, s0, s2 -; CHECK-GI-NEXT: fmul s1, s1, s3 +; CHECK-GI-NEXT: fmul s0, s0, v0.s[1] +; CHECK-GI-NEXT: fmul s1, s1, v1.s[1] ; CHECK-GI-NEXT: fmul s0, s0, s1 ; CHECK-GI-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %a) @@ -92,10 +90,8 @@ define float @fmul_f32_same(<4 x float> %a, <4 x float> %b) { ; CHECK-GI-NEXT: mov d3, v1.d[1] ; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v2.2s ; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v3.2s -; CHECK-GI-NEXT: mov s2, v0.s[1] -; CHECK-GI-NEXT: mov s3, v1.s[1] -; CHECK-GI-NEXT: fmul s0, s0, s2 -; CHECK-GI-NEXT: fmul s1, s1, s3 +; CHECK-GI-NEXT: fmul s0, s0, v0.s[1] +; CHECK-GI-NEXT: fmul s1, s1, v1.s[1] ; CHECK-GI-NEXT: fmul s0, s0, s1 ; CHECK-GI-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a) @@ -922,10 +918,8 @@ define float @nested_mul_f32(<4 x float> %a, <4 x float> %b, float %c, float %d) ; CHECK-GI-NEXT: mov d5, v1.d[1] ; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v4.2s ; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v5.2s -; CHECK-GI-NEXT: mov s4, v0.s[1] -; CHECK-GI-NEXT: mov s5, v1.s[1] -; CHECK-GI-NEXT: fmul s0, s0, s4 -; CHECK-GI-NEXT: fmul s1, s1, s5 +; CHECK-GI-NEXT: fmul s0, s0, v0.s[1] +; CHECK-GI-NEXT: fmul s1, s1, v1.s[1] ; CHECK-GI-NEXT: fmul s0, s0, s2 ; CHECK-GI-NEXT: fmul s1, s1, s3 ; CHECK-GI-NEXT: fmul s0, s0, s1 diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll index adc536da26f26..aa120f2643950 100644 --- a/llvm/test/CodeGen/AArch64/f16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -1496,7 +1496,7 @@ define half @test_copysign(half %a, half %b) #0 { ; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $d0 ; CHECK-CVT-GI-NEXT: // kill: def $h1 killed $h1 def $d1 ; CHECK-CVT-GI-NEXT: bif v0.8b, v1.8b, v2.8b -; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 killed $d0 +; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-CVT-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: test_copysign: @@ -1505,7 +1505,7 @@ define half @test_copysign(half %a, half %b) #0 { ; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 def $d0 ; CHECK-FP16-GI-NEXT: // kill: def $h1 killed $h1 def $d1 ; CHECK-FP16-GI-NEXT: bif v0.8b, v1.8b, v2.8b -; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 killed $d0 +; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-FP16-GI-NEXT: ret %r = call half @llvm.copysign.f16(half %a, half %b) ret half %r @@ -1536,7 +1536,7 @@ define half @test_copysign_f32(half %a, float %b) #0 { ; CHECK-CVT-GI-NEXT: mvni v2.4h, #128, lsl #8 ; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $d0 ; CHECK-CVT-GI-NEXT: bif v0.8b, v1.8b, v2.8b -; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 killed $d0 +; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-CVT-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: test_copysign_f32: @@ -1545,7 +1545,7 @@ define half @test_copysign_f32(half %a, float %b) #0 { ; CHECK-FP16-GI-NEXT: mvni v2.4h, #128, lsl #8 ; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 def $d0 ; CHECK-FP16-GI-NEXT: bif v0.8b, v1.8b, v2.8b -; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 killed $d0 +; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-FP16-GI-NEXT: ret %tb = fptrunc float %b to half %r = call half @llvm.copysign.f16(half %a, half %tb) @@ -1577,7 +1577,7 @@ define half @test_copysign_f64(half %a, double %b) #0 { ; CHECK-CVT-GI-NEXT: mvni v2.4h, #128, lsl #8 ; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $d0 ; CHECK-CVT-GI-NEXT: bif v0.8b, v1.8b, v2.8b -; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 killed $d0 +; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-CVT-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: test_copysign_f64: @@ -1586,7 +1586,7 @@ define half @test_copysign_f64(half %a, double %b) #0 { ; CHECK-FP16-GI-NEXT: mvni v2.4h, #128, lsl #8 ; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 def $d0 ; CHECK-FP16-GI-NEXT: bif v0.8b, v1.8b, v2.8b -; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 killed $d0 +; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-FP16-GI-NEXT: ret %tb = fptrunc double %b to half %r = call half @llvm.copysign.f16(half %a, half %tb) diff --git a/llvm/test/CodeGen/AArch64/faddsub.ll b/llvm/test/CodeGen/AArch64/faddsub.ll index b15579199a059..943073e2a603e 100644 --- a/llvm/test/CodeGen/AArch64/faddsub.ll +++ b/llvm/test/CodeGen/AArch64/faddsub.ll @@ -196,7 +196,7 @@ define <7 x half> @fadd_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: mov v4.h[2], v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v1.h[6] -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov h0, v2.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v4.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[1] @@ -537,7 +537,7 @@ define <7 x half> @fsub_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: mov v4.h[2], v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v1.h[6] -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov h0, v2.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v4.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[1] diff --git a/llvm/test/CodeGen/AArch64/fcopysign.ll b/llvm/test/CodeGen/AArch64/fcopysign.ll index 3a5f7e2cd6b29..7ac1f37af2e0b 100644 --- a/llvm/test/CodeGen/AArch64/fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/fcopysign.ll @@ -33,7 +33,7 @@ define float @copysign_f32(float %a, float %b) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $d0 ; CHECK-GI-NEXT: // kill: def $s1 killed $s1 def $d1 ; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b -; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 +; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: ret entry: %c = call float @llvm.copysign.f32(float %a, float %b) @@ -56,7 +56,7 @@ define half @copysign_f16(half %a, half %b) { ; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $d0 ; CHECK-GI-NEXT: // kill: def $h1 killed $h1 def $d1 ; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b -; CHECK-GI-NEXT: // kill: def $h0 killed $h0 killed $d0 +; CHECK-GI-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-GI-NEXT: ret entry: %c = call half @llvm.copysign.f16(half %a, half %b) diff --git a/llvm/test/CodeGen/AArch64/fcvt.ll b/llvm/test/CodeGen/AArch64/fcvt.ll index b408e9c1bd4e6..2c512de413aeb 100644 --- a/llvm/test/CodeGen/AArch64/fcvt.ll +++ b/llvm/test/CodeGen/AArch64/fcvt.ll @@ -169,7 +169,7 @@ define <7 x half> @ceil_v7f16(<7 x half> %a) { ; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v0.h[5] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s ; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v0.h[6] -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov h0, v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[1] ; CHECK-GI-NOFP16-NEXT: frintp v2.4s, v2.4s @@ -468,7 +468,7 @@ define <7 x half> @floor_v7f16(<7 x half> %a) { ; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v0.h[5] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s ; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v0.h[6] -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov h0, v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[1] ; CHECK-GI-NOFP16-NEXT: frintm v2.4s, v2.4s @@ -767,7 +767,7 @@ define <7 x half> @nearbyint_v7f16(<7 x half> %a) { ; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v0.h[5] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s ; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v0.h[6] -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov h0, v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[1] ; CHECK-GI-NOFP16-NEXT: frinti v2.4s, v2.4s @@ -1066,7 +1066,7 @@ define <7 x half> @roundeven_v7f16(<7 x half> %a) { ; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v0.h[5] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s ; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v0.h[6] -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov h0, v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[1] ; CHECK-GI-NOFP16-NEXT: frintn v2.4s, v2.4s @@ -1365,7 +1365,7 @@ define <7 x half> @rint_v7f16(<7 x half> %a) { ; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v0.h[5] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s ; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v0.h[6] -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov h0, v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[1] ; CHECK-GI-NOFP16-NEXT: frintx v2.4s, v2.4s @@ -1664,7 +1664,7 @@ define <7 x half> @round_v7f16(<7 x half> %a) { ; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v0.h[5] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s ; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v0.h[6] -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov h0, v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[1] ; CHECK-GI-NOFP16-NEXT: frinta v2.4s, v2.4s @@ -1963,7 +1963,7 @@ define <7 x half> @trunc_v7f16(<7 x half> %a) { ; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v0.h[5] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s ; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v0.h[6] -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov h0, v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[1] ; CHECK-GI-NOFP16-NEXT: frintz v2.4s, v2.4s diff --git a/llvm/test/CodeGen/AArch64/fdiv.ll b/llvm/test/CodeGen/AArch64/fdiv.ll index 5bdccccc62b99..d232ca4d9c131 100644 --- a/llvm/test/CodeGen/AArch64/fdiv.ll +++ b/llvm/test/CodeGen/AArch64/fdiv.ll @@ -199,7 +199,7 @@ define <7 x half> @fdiv_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v3.4h ; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: fdiv v1.4s, v0.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov h0, v2.h[0] ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[2] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s diff --git a/llvm/test/CodeGen/AArch64/fminimummaximum.ll b/llvm/test/CodeGen/AArch64/fminimummaximum.ll index fb12f8acf1745..1c7c55d12a864 100644 --- a/llvm/test/CodeGen/AArch64/fminimummaximum.ll +++ b/llvm/test/CodeGen/AArch64/fminimummaximum.ll @@ -672,7 +672,7 @@ define <7 x half> @min_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-NOFP16-GI-NEXT: fcvtn v2.4h, v2.4s ; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v0.h[6] ; CHECK-NOFP16-GI-NEXT: mov v3.h[2], v1.h[6] -; CHECK-NOFP16-GI-NEXT: mov v0.h[0], v2.h[0] +; CHECK-NOFP16-GI-NEXT: mov h0, v2.h[0] ; CHECK-NOFP16-GI-NEXT: fcvtl v1.4s, v4.4h ; CHECK-NOFP16-GI-NEXT: fcvtl v3.4s, v3.4h ; CHECK-NOFP16-GI-NEXT: mov v0.h[1], v2.h[1] @@ -770,7 +770,7 @@ define <7 x half> @max_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-NOFP16-GI-NEXT: fcvtn v2.4h, v2.4s ; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v0.h[6] ; CHECK-NOFP16-GI-NEXT: mov v3.h[2], v1.h[6] -; CHECK-NOFP16-GI-NEXT: mov v0.h[0], v2.h[0] +; CHECK-NOFP16-GI-NEXT: mov h0, v2.h[0] ; CHECK-NOFP16-GI-NEXT: fcvtl v1.4s, v4.4h ; CHECK-NOFP16-GI-NEXT: fcvtl v3.4s, v3.4h ; CHECK-NOFP16-GI-NEXT: mov v0.h[1], v2.h[1] diff --git a/llvm/test/CodeGen/AArch64/fminmax.ll b/llvm/test/CodeGen/AArch64/fminmax.ll index 64f0da8b4cd0f..da9b57223cff7 100644 --- a/llvm/test/CodeGen/AArch64/fminmax.ll +++ b/llvm/test/CodeGen/AArch64/fminmax.ll @@ -672,7 +672,7 @@ define <7 x half> @min_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-NOFP16-GI-NEXT: fcvtn v2.4h, v2.4s ; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v0.h[6] ; CHECK-NOFP16-GI-NEXT: mov v3.h[2], v1.h[6] -; CHECK-NOFP16-GI-NEXT: mov v0.h[0], v2.h[0] +; CHECK-NOFP16-GI-NEXT: mov h0, v2.h[0] ; CHECK-NOFP16-GI-NEXT: fcvtl v1.4s, v4.4h ; CHECK-NOFP16-GI-NEXT: fcvtl v3.4s, v3.4h ; CHECK-NOFP16-GI-NEXT: mov v0.h[1], v2.h[1] @@ -770,7 +770,7 @@ define <7 x half> @max_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-NOFP16-GI-NEXT: fcvtn v2.4h, v2.4s ; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v0.h[6] ; CHECK-NOFP16-GI-NEXT: mov v3.h[2], v1.h[6] -; CHECK-NOFP16-GI-NEXT: mov v0.h[0], v2.h[0] +; CHECK-NOFP16-GI-NEXT: mov h0, v2.h[0] ; CHECK-NOFP16-GI-NEXT: fcvtl v1.4s, v4.4h ; CHECK-NOFP16-GI-NEXT: fcvtl v3.4s, v3.4h ; CHECK-NOFP16-GI-NEXT: mov v0.h[1], v2.h[1] diff --git a/llvm/test/CodeGen/AArch64/fmla.ll b/llvm/test/CodeGen/AArch64/fmla.ll index a37aabb0b5384..ef59209b69921 100644 --- a/llvm/test/CodeGen/AArch64/fmla.ll +++ b/llvm/test/CodeGen/AArch64/fmla.ll @@ -268,7 +268,7 @@ define <7 x half> @fma_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) { ; CHECK-GI-NOFP16-NEXT: mov v6.h[2], v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v1.h[6] ; CHECK-GI-NOFP16-NEXT: mov v4.h[2], v2.h[6] -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov h0, v5.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v6.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v4.4h @@ -873,7 +873,7 @@ define <7 x half> @fmuladd_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) { ; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s ; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0] +; CHECK-GI-NOFP16-NEXT: mov h0, v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1] @@ -1358,7 +1358,7 @@ define <7 x half> @fmul_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) { ; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s ; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0] +; CHECK-GI-NOFP16-NEXT: mov h0, v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1] diff --git a/llvm/test/CodeGen/AArch64/fmul.ll b/llvm/test/CodeGen/AArch64/fmul.ll index bd3d1353e643e..51eba5666f681 100644 --- a/llvm/test/CodeGen/AArch64/fmul.ll +++ b/llvm/test/CodeGen/AArch64/fmul.ll @@ -196,7 +196,7 @@ define <7 x half> @fmul_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: mov v4.h[2], v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v1.h[6] -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov h0, v2.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v4.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[1] diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index 9c21d2bf083a2..bcebbf4982eaa 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -31,6 +31,7 @@ define <1 x i32> @test_signed_v1f32_v1i32(<1 x float> %f) { ; ; CHECK-GI-LABEL: test_signed_v1f32_v1i32: ; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: fcvtzs w8, s0 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index 44847a41287d6..38895eb7bd761 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -31,6 +31,7 @@ define <1 x i32> @test_unsigned_v1f32_v1i32(<1 x float> %f) { ; ; CHECK-GI-LABEL: test_unsigned_v1f32_v1i32: ; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: fcvtzu w8, s0 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fptrunc.ll b/llvm/test/CodeGen/AArch64/fptrunc.ll index 1f84c944d7c16..a428c95c90387 100644 --- a/llvm/test/CodeGen/AArch64/fptrunc.ll +++ b/llvm/test/CodeGen/AArch64/fptrunc.ll @@ -263,7 +263,7 @@ define <3 x float> @fptrunc_v3f64_v3f32(<3 x double> %a) { ; CHECK-GI-NEXT: fcvt s2, d2 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] ; CHECK-GI-NEXT: fcvtn v1.2s, v0.2d -; CHECK-GI-NEXT: mov v0.s[0], v1.s[0] +; CHECK-GI-NEXT: mov s0, v1.s[0] ; CHECK-GI-NEXT: mov v0.s[1], v1.s[1] ; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] ; CHECK-GI-NEXT: ret @@ -354,7 +354,7 @@ define <2 x half> @fptrunc_v2f32_v2f16(<2 x float> %a) { ; CHECK-GI-LABEL: fptrunc_v2f32_v2f16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] +; CHECK-GI-NEXT: mov s1, v0.s[0] ; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fsqrt.ll b/llvm/test/CodeGen/AArch64/fsqrt.ll index 6c5fd8e52b017..1e888a4c0e193 100644 --- a/llvm/test/CodeGen/AArch64/fsqrt.ll +++ b/llvm/test/CodeGen/AArch64/fsqrt.ll @@ -203,7 +203,7 @@ define <7 x half> @sqrt_v7f16(<7 x half> %a) { ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v2.4h ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s ; CHECK-GI-NOFP16-NEXT: fsqrt v2.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov h0, v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v1.h[2] ; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v2.4s diff --git a/llvm/test/CodeGen/AArch64/insertextract.ll b/llvm/test/CodeGen/AArch64/insertextract.ll index 5c89316e5f570..1af36ccaefa30 100644 --- a/llvm/test/CodeGen/AArch64/insertextract.ll +++ b/llvm/test/CodeGen/AArch64/insertextract.ll @@ -1478,16 +1478,11 @@ entry: } define float @extract_v2f32_0(<2 x float> %a, i32 %c) { -; CHECK-SD-LABEL: extract_v2f32_0: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: extract_v2f32_0: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: extract_v2f32_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: ret entry: %d = extractelement <2 x float> %a, i32 0 ret float %d @@ -1686,16 +1681,11 @@ entry: } define half @extract_v4f16_0(<4 x half> %a, i32 %c) { -; CHECK-SD-LABEL: extract_v4f16_0: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: extract_v4f16_0: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $h0 killed $h0 killed $d0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: extract_v4f16_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-NEXT: ret entry: %d = extractelement <4 x half> %a, i32 0 ret half %d @@ -2159,16 +2149,11 @@ entry: } define i32 @extract_v2i32_0(<2 x i32> %a, i32 %c) { -; CHECK-SD-LABEL: extract_v2i32_0: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: fmov w0, s0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: extract_v2i32_0: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: fmov w0, s0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: extract_v2i32_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret entry: %d = extractelement <2 x i32> %a, i32 0 ret i32 %d diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll index e8194b9bd9b27..5ec30b6e8a667 100644 --- a/llvm/test/CodeGen/AArch64/itofp.ll +++ b/llvm/test/CodeGen/AArch64/itofp.ll @@ -4378,7 +4378,7 @@ define <3 x float> @stofp_v3i64_v3f32(<3 x i64> %a) { ; CHECK-GI-NEXT: scvtf v0.2d, v0.2d ; CHECK-GI-NEXT: fcvtn v2.2s, v2.2d ; CHECK-GI-NEXT: fcvtn v1.2s, v0.2d -; CHECK-GI-NEXT: mov v0.s[0], v1.s[0] +; CHECK-GI-NEXT: mov s0, v1.s[0] ; CHECK-GI-NEXT: mov v0.s[1], v1.s[1] ; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] ; CHECK-GI-NEXT: ret @@ -4415,7 +4415,7 @@ define <3 x float> @utofp_v3i64_v3f32(<3 x i64> %a) { ; CHECK-GI-NEXT: ucvtf v0.2d, v0.2d ; CHECK-GI-NEXT: fcvtn v2.2s, v2.2d ; CHECK-GI-NEXT: fcvtn v1.2s, v0.2d -; CHECK-GI-NEXT: mov v0.s[0], v1.s[0] +; CHECK-GI-NEXT: mov s0, v1.s[0] ; CHECK-GI-NEXT: mov v0.s[1], v1.s[1] ; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] ; CHECK-GI-NEXT: ret @@ -6393,7 +6393,7 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) { ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: scvtf v0.2d, v0.2d ; CHECK-GI-NOFP16-NEXT: fcvtn v0.2s, v0.2d -; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] +; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s ; CHECK-GI-NOFP16-NEXT: ret @@ -6439,7 +6439,7 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) { ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: ucvtf v0.2d, v0.2d ; CHECK-GI-NOFP16-NEXT: fcvtn v0.2s, v0.2d -; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] +; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s ; CHECK-GI-NOFP16-NEXT: ret @@ -7375,7 +7375,7 @@ define <2 x half> @stofp_v2i32_v2f16(<2 x i32> %a) { ; CHECK-GI-LABEL: stofp_v2i32_v2f16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: scvtf v0.2s, v0.2s -; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] +; CHECK-GI-NEXT: mov s1, v0.s[0] ; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s ; CHECK-GI-NEXT: ret @@ -7395,7 +7395,7 @@ define <2 x half> @utofp_v2i32_v2f16(<2 x i32> %a) { ; CHECK-GI-LABEL: utofp_v2i32_v2f16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ucvtf v0.2s, v0.2s -; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] +; CHECK-GI-NEXT: mov s1, v0.s[0] ; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s ; CHECK-GI-NEXT: ret @@ -7602,7 +7602,7 @@ define <2 x half> @stofp_v2i16_v2f16(<2 x i16> %a) { ; CHECK-GI-NOFP16-NEXT: shl v0.2s, v0.2s, #16 ; CHECK-GI-NOFP16-NEXT: sshr v0.2s, v0.2s, #16 ; CHECK-GI-NOFP16-NEXT: scvtf v0.2s, v0.2s -; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] +; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s ; CHECK-GI-NOFP16-NEXT: ret @@ -7637,7 +7637,7 @@ define <2 x half> @utofp_v2i16_v2f16(<2 x i16> %a) { ; CHECK-GI-NOFP16-NEXT: movi d1, #0x00ffff0000ffff ; CHECK-GI-NOFP16-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-GI-NOFP16-NEXT: ucvtf v0.2s, v0.2s -; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] +; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s ; CHECK-GI-NOFP16-NEXT: ret @@ -8124,7 +8124,7 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) { ; CHECK-GI-NOFP16-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-GI-NOFP16-NEXT: sshr v0.2s, v0.2s, #24 ; CHECK-GI-NOFP16-NEXT: scvtf v0.2s, v0.2s -; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] +; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s ; CHECK-GI-NOFP16-NEXT: ret @@ -8175,7 +8175,7 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) { ; CHECK-GI-NOFP16-NEXT: movi d1, #0x0000ff000000ff ; CHECK-GI-NOFP16-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-GI-NOFP16-NEXT: ucvtf v0.2s, v0.2s -; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] +; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s ; CHECK-GI-NOFP16-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/llvm.exp10.ll b/llvm/test/CodeGen/AArch64/llvm.exp10.ll index c1ea891bc86e7..9d165556f1c73 100644 --- a/llvm/test/CodeGen/AArch64/llvm.exp10.ll +++ b/llvm/test/CodeGen/AArch64/llvm.exp10.ll @@ -301,28 +301,17 @@ define float @exp10_f32(float %x) { } define <1 x float> @exp10_v1f32(<1 x float> %x) { -; SDAG-LABEL: exp10_v1f32: -; SDAG: // %bb.0: -; SDAG-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; SDAG-NEXT: .cfi_def_cfa_offset 16 -; SDAG-NEXT: .cfi_offset w30, -16 -; SDAG-NEXT: // kill: def $d0 killed $d0 def $q0 -; SDAG-NEXT: // kill: def $s0 killed $s0 killed $q0 -; SDAG-NEXT: bl exp10f -; SDAG-NEXT: // kill: def $s0 killed $s0 def $d0 -; SDAG-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; SDAG-NEXT: ret -; -; GISEL-LABEL: exp10_v1f32: -; GISEL: // %bb.0: -; GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; GISEL-NEXT: .cfi_def_cfa_offset 16 -; GISEL-NEXT: .cfi_offset w30, -16 -; GISEL-NEXT: // kill: def $s0 killed $s0 killed $d0 -; GISEL-NEXT: bl exp10f -; GISEL-NEXT: // kill: def $s0 killed $s0 def $d0 -; GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; GISEL-NEXT: ret +; CHECK-LABEL: exp10_v1f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl exp10f +; CHECK-NEXT: // kill: def $s0 killed $s0 def $d0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = call <1 x float> @llvm.exp10.v1f32(<1 x float> %x) ret <1 x float> %r } diff --git a/llvm/test/CodeGen/AArch64/popcount.ll b/llvm/test/CodeGen/AArch64/popcount.ll index c158d8ad93b05..eded13a6b3669 100644 --- a/llvm/test/CodeGen/AArch64/popcount.ll +++ b/llvm/test/CodeGen/AArch64/popcount.ll @@ -655,7 +655,9 @@ define i32 @ctpop_into_extract(ptr %p) { ; CHECKO0-NEXT: // implicit-def: $d2 ; CHECKO0-NEXT: fmov s2, w8 ; CHECKO0-NEXT: ldr d0, [x0] -; CHECKO0-NEXT: fmov s1, s0 +; CHECKO0-NEXT: // implicit-def: $q1 +; CHECKO0-NEXT: fmov d1, d0 +; CHECKO0-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECKO0-NEXT: fmov w8, s1 ; CHECKO0-NEXT: fmov s1, w8 ; CHECKO0-NEXT: // kill: def $d1 killed $s1 @@ -725,7 +727,9 @@ define i32 @ctpop_into_extract(ptr %p) { ; GISELO0-NEXT: // implicit-def: $d2 ; GISELO0-NEXT: fmov s2, w8 ; GISELO0-NEXT: ldr d0, [x0] -; GISELO0-NEXT: fmov s1, s0 +; GISELO0-NEXT: // implicit-def: $q1 +; GISELO0-NEXT: fmov d1, d0 +; GISELO0-NEXT: // kill: def $s1 killed $s1 killed $q1 ; GISELO0-NEXT: fmov w8, s1 ; GISELO0-NEXT: fmov s1, w8 ; GISELO0-NEXT: // kill: def $d1 killed $s1 diff --git a/llvm/test/CodeGen/AArch64/ptradd.ll b/llvm/test/CodeGen/AArch64/ptradd.ll index 28a8f4303765b..4a1c50b67ed7b 100644 --- a/llvm/test/CodeGen/AArch64/ptradd.ll +++ b/llvm/test/CodeGen/AArch64/ptradd.ll @@ -51,6 +51,7 @@ define <1 x ptr> @vector_gep_v1i32(<1 x ptr> %b, <1 x i32> %off) { ; ; CHECK-GI-LABEL: vector_gep_v1i32: ; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-GI-NEXT: fmov w8, s1 ; CHECK-GI-NEXT: fmov x9, d0 ; CHECK-GI-NEXT: add x8, x9, w8, sxtw diff --git a/llvm/test/CodeGen/AArch64/shift.ll b/llvm/test/CodeGen/AArch64/shift.ll index 9827cb3526f99..1652eb70b0625 100644 --- a/llvm/test/CodeGen/AArch64/shift.ll +++ b/llvm/test/CodeGen/AArch64/shift.ll @@ -595,6 +595,8 @@ define <1 x i32> @shl_v1i32(<1 x i32> %0, <1 x i32> %1){ ; ; CHECK-GI-LABEL: shl_v1i32: ; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: lsl w8, w8, w9 @@ -771,6 +773,8 @@ define <1 x i32> @ashr_v1i32(<1 x i32> %0, <1 x i32> %1){ ; ; CHECK-GI-LABEL: ashr_v1i32: ; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: asr w8, w8, w9 @@ -943,6 +947,8 @@ define <1 x i32> @lshr_v1i32(<1 x i32> %0, <1 x i32> %1){ ; ; CHECK-GI-LABEL: lshr_v1i32: ; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: lsr w8, w8, w9 diff --git a/llvm/test/CodeGen/AArch64/store.ll b/llvm/test/CodeGen/AArch64/store.ll index 3a9f12b838702..0fe1ef5039929 100644 --- a/llvm/test/CodeGen/AArch64/store.ll +++ b/llvm/test/CodeGen/AArch64/store.ll @@ -167,16 +167,11 @@ define void @store_v16i16(<16 x i16> %a, ptr %ptr){ } define void @store_v1i32(<1 x i32> %a, ptr %ptr){ -; CHECK-SD-LABEL: store_v1i32: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str s0, [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: store_v1i32: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: str s0, [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: store_v1i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret store <1 x i32> %a, ptr %ptr ret void } diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll index 77483ebb2235c..63e26a25f4e27 100644 --- a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll +++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll @@ -930,195 +930,85 @@ define <2 x i8> @vector_to_vector_cast(<16 x i1> %arg) nounwind { ; CHECK-GI-LABEL: vector_to_vector_cast: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #16 -; CHECK-GI-NEXT: umov.b w8, v0[1] -; CHECK-GI-NEXT: mov d1, v0[1] ; CHECK-GI-NEXT: umov.b w10, v0[1] -; CHECK-GI-NEXT: umov.b w9, v0[0] -; CHECK-GI-NEXT: umov.b w13, v0[0] -; CHECK-GI-NEXT: umov.b w14, v0[2] +; CHECK-GI-NEXT: umov.b w9, v0[1] +; CHECK-GI-NEXT: mov d1, v0[1] +; CHECK-GI-NEXT: umov.b w8, v0[0] +; CHECK-GI-NEXT: umov.b w11, v0[0] +; CHECK-GI-NEXT: umov.b w12, v0[2] +; CHECK-GI-NEXT: umov.b w13, v0[2] ; CHECK-GI-NEXT: umov.b w15, v0[3] -; CHECK-GI-NEXT: umov.b w11, v0[2] ; CHECK-GI-NEXT: umov.b w16, v0[4] -; CHECK-GI-NEXT: umov.b w17, v0[5] -; CHECK-GI-NEXT: umov.b w12, v0[3] -; CHECK-GI-NEXT: and w8, w8, #0x1 +; CHECK-GI-NEXT: umov.b w14, v0[3] ; CHECK-GI-NEXT: and w10, w10, #0x1 -; CHECK-GI-NEXT: umov.b w0, v1[1] -; CHECK-GI-NEXT: bfi w9, w8, #1, #31 -; CHECK-GI-NEXT: bfi w13, w10, #1, #31 -; CHECK-GI-NEXT: and w14, w14, #0x1 -; CHECK-GI-NEXT: umov.b w8, v1[0] -; CHECK-GI-NEXT: umov.b w10, v1[2] +; CHECK-GI-NEXT: and w9, w9, #0x1 +; CHECK-GI-NEXT: bfi w8, w10, #1, #31 +; CHECK-GI-NEXT: umov.b w10, v1[1] +; CHECK-GI-NEXT: and w12, w12, #0x1 +; CHECK-GI-NEXT: bfi w11, w9, #1, #31 +; CHECK-GI-NEXT: umov.b w9, v1[0] +; CHECK-GI-NEXT: and w13, w13, #0x1 +; CHECK-GI-NEXT: orr w8, w8, w12, lsl #2 +; CHECK-GI-NEXT: umov.b w12, v1[2] ; CHECK-GI-NEXT: and w15, w15, #0x1 -; CHECK-GI-NEXT: orr w13, w13, w14, lsl #2 -; CHECK-GI-NEXT: umov.b w14, v1[3] -; CHECK-GI-NEXT: and w11, w11, #0x1 -; CHECK-GI-NEXT: and w0, w0, #0x1 +; CHECK-GI-NEXT: orr w11, w11, w13, lsl #2 +; CHECK-GI-NEXT: umov.b w13, v0[5] ; CHECK-GI-NEXT: and w16, w16, #0x1 -; CHECK-GI-NEXT: orr w9, w9, w11, lsl #2 -; CHECK-GI-NEXT: orr w13, w13, w15, lsl #3 -; CHECK-GI-NEXT: umov.b w15, v1[4] -; CHECK-GI-NEXT: umov.b w11, v0[6] -; CHECK-GI-NEXT: bfi w8, w0, #1, #31 +; CHECK-GI-NEXT: orr w8, w8, w15, lsl #3 +; CHECK-GI-NEXT: umov.b w15, v1[3] ; CHECK-GI-NEXT: and w10, w10, #0x1 -; CHECK-GI-NEXT: and w17, w17, #0x1 -; CHECK-GI-NEXT: orr w13, w13, w16, lsl #4 +; CHECK-GI-NEXT: bfi w9, w10, #1, #31 +; CHECK-GI-NEXT: umov.b w10, v0[6] ; CHECK-GI-NEXT: and w14, w14, #0x1 -; CHECK-GI-NEXT: umov.b w0, v0[7] -; CHECK-GI-NEXT: orr w8, w8, w10, lsl #2 -; CHECK-GI-NEXT: umov.b w10, v1[5] -; CHECK-GI-NEXT: umov.b w16, v1[6] -; CHECK-GI-NEXT: orr w13, w13, w17, lsl #5 -; CHECK-GI-NEXT: umov.b w17, v0[4] -; CHECK-GI-NEXT: and w15, w15, #0x1 -; CHECK-GI-NEXT: orr w8, w8, w14, lsl #3 +; CHECK-GI-NEXT: orr w8, w8, w16, lsl #4 +; CHECK-GI-NEXT: umov.b w16, v1[4] ; CHECK-GI-NEXT: and w12, w12, #0x1 -; CHECK-GI-NEXT: and w11, w11, #0x1 -; CHECK-GI-NEXT: umov.b w14, v1[7] -; CHECK-GI-NEXT: orr w9, w9, w12, lsl #3 -; CHECK-GI-NEXT: orr w11, w13, w11, lsl #6 -; CHECK-GI-NEXT: orr w8, w8, w15, lsl #4 -; CHECK-GI-NEXT: umov.b w15, v0[5] -; CHECK-GI-NEXT: and w10, w10, #0x1 -; CHECK-GI-NEXT: and w0, w0, #0x1 -; CHECK-GI-NEXT: and w12, w17, #0x1 -; CHECK-GI-NEXT: umov.b w13, v0[1] -; CHECK-GI-NEXT: orr w8, w8, w10, lsl #5 -; CHECK-GI-NEXT: and w16, w16, #0x1 -; CHECK-GI-NEXT: orr w9, w9, w12, lsl #4 -; CHECK-GI-NEXT: umov.b w10, v0[0] -; CHECK-GI-NEXT: orr w11, w11, w0, lsl #7 -; CHECK-GI-NEXT: and w14, w14, #0x1 -; CHECK-GI-NEXT: and w12, w15, #0x1 -; CHECK-GI-NEXT: umov.b w15, v0[2] -; CHECK-GI-NEXT: orr w8, w8, w16, lsl #6 -; CHECK-GI-NEXT: orr w9, w9, w12, lsl #5 -; CHECK-GI-NEXT: umov.b w12, v0[6] -; CHECK-GI-NEXT: strb w11, [sp, #8] -; CHECK-GI-NEXT: and w11, w13, #0x1 -; CHECK-GI-NEXT: umov.b w13, v0[3] -; CHECK-GI-NEXT: orr w8, w8, w14, lsl #7 -; CHECK-GI-NEXT: umov.b w14, v0[7] -; CHECK-GI-NEXT: ldr b0, [sp, #8] -; CHECK-GI-NEXT: bfi w10, w11, #1, #31 -; CHECK-GI-NEXT: and w11, w15, #0x1 -; CHECK-GI-NEXT: strb w8, [sp, #9] -; CHECK-GI-NEXT: umov.b w15, v0[4] -; CHECK-GI-NEXT: and w8, w12, #0x1 -; CHECK-GI-NEXT: orr w10, w10, w11, lsl #2 -; CHECK-GI-NEXT: orr w8, w9, w8, lsl #6 -; CHECK-GI-NEXT: and w9, w13, #0x1 -; CHECK-GI-NEXT: umov.b w11, v0[1] -; CHECK-GI-NEXT: orr w9, w10, w9, lsl #3 -; CHECK-GI-NEXT: umov.b w10, v0[5] -; CHECK-GI-NEXT: umov.b w12, v0[0] -; CHECK-GI-NEXT: and w13, w14, #0x1 -; CHECK-GI-NEXT: umov.b w16, v0[2] -; CHECK-GI-NEXT: umov.b w17, v0[3] -; CHECK-GI-NEXT: and w14, w15, #0x1 -; CHECK-GI-NEXT: umov.b w15, v0[2] -; CHECK-GI-NEXT: orr w8, w8, w13, lsl #7 -; CHECK-GI-NEXT: orr w9, w9, w14, lsl #4 -; CHECK-GI-NEXT: umov.b w13, v0[6] -; CHECK-GI-NEXT: and w11, w11, #0x1 -; CHECK-GI-NEXT: umov.b w14, v0[3] -; CHECK-GI-NEXT: strb w8, [sp, #10] -; CHECK-GI-NEXT: and w8, w10, #0x1 -; CHECK-GI-NEXT: bfi w12, w11, #1, #31 -; CHECK-GI-NEXT: orr w8, w9, w8, lsl #5 -; CHECK-GI-NEXT: umov.b w10, v0[4] -; CHECK-GI-NEXT: and w9, w15, #0x1 -; CHECK-GI-NEXT: umov.b w11, v0[7] -; CHECK-GI-NEXT: umov.b w15, v0[1] -; CHECK-GI-NEXT: orr w9, w12, w9, lsl #2 -; CHECK-GI-NEXT: umov.b w12, v0[5] +; CHECK-GI-NEXT: orr w9, w9, w12, lsl #2 ; CHECK-GI-NEXT: and w13, w13, #0x1 -; CHECK-GI-NEXT: and w14, w14, #0x1 -; CHECK-GI-NEXT: orr w8, w8, w13, lsl #6 -; CHECK-GI-NEXT: umov.b w13, v0[0] -; CHECK-GI-NEXT: orr w9, w9, w14, lsl #3 -; CHECK-GI-NEXT: and w10, w10, #0x1 -; CHECK-GI-NEXT: umov.b w14, v0[6] -; CHECK-GI-NEXT: and w11, w11, #0x1 -; CHECK-GI-NEXT: and w15, w15, #0x1 -; CHECK-GI-NEXT: umov.b w0, v0[3] -; CHECK-GI-NEXT: orr w9, w9, w10, lsl #4 -; CHECK-GI-NEXT: and w10, w12, #0x1 -; CHECK-GI-NEXT: umov.b w12, v0[7] -; CHECK-GI-NEXT: orr w8, w8, w11, lsl #7 -; CHECK-GI-NEXT: bfi w13, w15, #1, #31 -; CHECK-GI-NEXT: and w11, w16, #0x1 -; CHECK-GI-NEXT: orr w9, w9, w10, lsl #5 -; CHECK-GI-NEXT: and w10, w14, #0x1 -; CHECK-GI-NEXT: umov.b w14, v0[4] -; CHECK-GI-NEXT: strb w8, [sp, #11] -; CHECK-GI-NEXT: umov.b w15, v0[1] -; CHECK-GI-NEXT: umov.b w16, v0[3] -; CHECK-GI-NEXT: orr w8, w9, w10, lsl #6 -; CHECK-GI-NEXT: orr w9, w13, w11, lsl #2 -; CHECK-GI-NEXT: and w10, w12, #0x1 -; CHECK-GI-NEXT: and w11, w17, #0x1 -; CHECK-GI-NEXT: umov.b w12, v0[5] -; CHECK-GI-NEXT: umov.b w17, v0[0] -; CHECK-GI-NEXT: orr w8, w8, w10, lsl #7 -; CHECK-GI-NEXT: orr w9, w9, w11, lsl #3 -; CHECK-GI-NEXT: umov.b w10, v0[1] -; CHECK-GI-NEXT: and w11, w14, #0x1 -; CHECK-GI-NEXT: umov.b w14, v0[0] +; CHECK-GI-NEXT: umov.b w12, v0[4] +; CHECK-GI-NEXT: orr w8, w8, w13, lsl #5 +; CHECK-GI-NEXT: umov.b w13, v1[5] ; CHECK-GI-NEXT: and w15, w15, #0x1 -; CHECK-GI-NEXT: orr w9, w9, w11, lsl #4 -; CHECK-GI-NEXT: umov.b w11, v0[2] -; CHECK-GI-NEXT: umov.b w13, v0[6] -; CHECK-GI-NEXT: and w12, w12, #0x1 -; CHECK-GI-NEXT: bfi w17, w15, #1, #31 -; CHECK-GI-NEXT: umov.b w15, v0[5] -; CHECK-GI-NEXT: orr w9, w9, w12, lsl #5 +; CHECK-GI-NEXT: orr w9, w9, w15, lsl #3 ; CHECK-GI-NEXT: and w10, w10, #0x1 -; CHECK-GI-NEXT: umov.b w12, v0[2] -; CHECK-GI-NEXT: bfi w14, w10, #1, #31 -; CHECK-GI-NEXT: umov.b w10, v0[4] -; CHECK-GI-NEXT: ldr b1, [sp, #9] -; CHECK-GI-NEXT: and w11, w11, #0x1 +; CHECK-GI-NEXT: umov.b w15, v0[7] +; CHECK-GI-NEXT: orr w8, w8, w10, lsl #6 +; CHECK-GI-NEXT: umov.b w10, v1[6] +; CHECK-GI-NEXT: and w16, w16, #0x1 +; CHECK-GI-NEXT: orr w9, w9, w16, lsl #4 +; CHECK-GI-NEXT: umov.b w16, v0[5] +; CHECK-GI-NEXT: orr w11, w11, w14, lsl #3 ; CHECK-GI-NEXT: and w13, w13, #0x1 -; CHECK-GI-NEXT: strb w8, [sp, #12] -; CHECK-GI-NEXT: orr w11, w14, w11, lsl #2 -; CHECK-GI-NEXT: and w14, w16, #0x1 -; CHECK-GI-NEXT: umov.b w16, v0[4] +; CHECK-GI-NEXT: umov.b w14, v1[7] ; CHECK-GI-NEXT: and w12, w12, #0x1 -; CHECK-GI-NEXT: and w15, w15, #0x1 -; CHECK-GI-NEXT: orr w9, w9, w13, lsl #6 -; CHECK-GI-NEXT: orr w11, w11, w14, lsl #3 -; CHECK-GI-NEXT: orr w12, w17, w12, lsl #2 +; CHECK-GI-NEXT: orr w9, w9, w13, lsl #5 +; CHECK-GI-NEXT: umov.b w13, v0[6] +; CHECK-GI-NEXT: orr w11, w11, w12, lsl #4 ; CHECK-GI-NEXT: and w10, w10, #0x1 -; CHECK-GI-NEXT: and w17, w0, #0x1 -; CHECK-GI-NEXT: umov.b w0, v0[5] -; CHECK-GI-NEXT: umov.b w14, v0[6] -; CHECK-GI-NEXT: orr w10, w11, w10, lsl #4 -; CHECK-GI-NEXT: orr w12, w12, w17, lsl #3 -; CHECK-GI-NEXT: umov.b w11, v0[7] -; CHECK-GI-NEXT: and w16, w16, #0x1 -; CHECK-GI-NEXT: umov.b w17, v0[6] -; CHECK-GI-NEXT: orr w10, w10, w15, lsl #5 +; CHECK-GI-NEXT: and w12, w15, #0x1 ; CHECK-GI-NEXT: umov.b w15, v0[7] -; CHECK-GI-NEXT: orr w12, w12, w16, lsl #4 -; CHECK-GI-NEXT: and w16, w0, #0x1 -; CHECK-GI-NEXT: umov.b w0, v0[7] -; CHECK-GI-NEXT: and w14, w14, #0x1 -; CHECK-GI-NEXT: orr w12, w12, w16, lsl #5 -; CHECK-GI-NEXT: orr w10, w10, w14, lsl #6 -; CHECK-GI-NEXT: and w11, w11, #0x1 -; CHECK-GI-NEXT: and w13, w17, #0x1 +; CHECK-GI-NEXT: orr w9, w9, w10, lsl #6 +; CHECK-GI-NEXT: and w10, w16, #0x1 +; CHECK-GI-NEXT: orr w8, w8, w12, lsl #7 +; CHECK-GI-NEXT: orr w10, w11, w10, lsl #5 +; CHECK-GI-NEXT: and w11, w14, #0x1 ; CHECK-GI-NEXT: orr w9, w9, w11, lsl #7 +; CHECK-GI-NEXT: and w11, w13, #0x1 +; CHECK-GI-NEXT: strb w8, [sp, #8] +; CHECK-GI-NEXT: orr w8, w10, w11, lsl #6 +; CHECK-GI-NEXT: ldr b0, [sp, #8] +; CHECK-GI-NEXT: strb w9, [sp, #9] +; CHECK-GI-NEXT: and w9, w15, #0x1 +; CHECK-GI-NEXT: ldr b1, [sp, #9] +; CHECK-GI-NEXT: orr w8, w8, w9, lsl #7 ; CHECK-GI-NEXT: mov.s v0[1], v1[0] -; CHECK-GI-NEXT: orr w11, w12, w13, lsl #6 -; CHECK-GI-NEXT: and w12, w15, #0x1 +; CHECK-GI-NEXT: strb w8, [sp, #10] +; CHECK-GI-NEXT: strb w8, [sp, #11] ; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: orr w8, w10, w12, lsl #7 -; CHECK-GI-NEXT: and w10, w0, #0x1 -; CHECK-GI-NEXT: strb w9, [sp, #13] -; CHECK-GI-NEXT: orr w9, w11, w10, lsl #7 +; CHECK-GI-NEXT: strb w8, [sp, #12] +; CHECK-GI-NEXT: strb w8, [sp, #13] ; CHECK-GI-NEXT: strb w8, [sp, #14] -; CHECK-GI-NEXT: strb w9, [sp, #15] +; CHECK-GI-NEXT: strb w8, [sp, #15] ; CHECK-GI-NEXT: add sp, sp, #16 ; CHECK-GI-NEXT: ret %bc = bitcast <16 x i1> %arg to <2 x i8> diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll index 1164e02a16c9e..bd68b213ec988 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll @@ -79,11 +79,10 @@ define half @add_HalfH(<4 x half> %bin.rdx) { ; CHECK-GI-FP16-LABEL: add_HalfH: ; CHECK-GI-FP16: // %bb.0: ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: fadd h1, h0, h1 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[2] +; CHECK-GI-FP16-NEXT: faddp h2, v0.2h ; CHECK-GI-FP16-NEXT: mov h0, v0.h[3] -; CHECK-GI-FP16-NEXT: fadd h1, h1, h2 +; CHECK-GI-FP16-NEXT: fadd h1, h2, h1 ; CHECK-GI-FP16-NEXT: fadd h0, h1, h0 ; CHECK-GI-FP16-NEXT: ret %r = call half @llvm.vector.reduce.fadd.f16.v4f16(half -0.0, <4 x half> %bin.rdx) diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll index 1d295a30a994b..1906ca9defa40 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll @@ -44,27 +44,11 @@ define half @test_v1f16(<1 x half> %a) nounwind { } define float @test_v1f32(<1 x float> %a) nounwind { -; CHECK-NOFP-SD-LABEL: test_v1f32: -; CHECK-NOFP-SD: // %bb.0: -; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NOFP-SD-NEXT: ret -; -; CHECK-FP-SD-LABEL: test_v1f32: -; CHECK-FP-SD: // %bb.0: -; CHECK-FP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-FP-SD-NEXT: ret -; -; CHECK-NOFP-GI-LABEL: test_v1f32: -; CHECK-NOFP-GI: // %bb.0: -; CHECK-NOFP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 -; CHECK-NOFP-GI-NEXT: ret -; -; CHECK-FP-GI-LABEL: test_v1f32: -; CHECK-FP-GI: // %bb.0: -; CHECK-FP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 -; CHECK-FP-GI-NEXT: ret +; CHECK-LABEL: test_v1f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: ret %b = call float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a) ret float %b } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll index ee2af110c84cd..152eb66ebcdfe 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll @@ -44,27 +44,11 @@ define half @test_v1f16(<1 x half> %a) nounwind { } define float @test_v1f32(<1 x float> %a) nounwind { -; CHECK-NOFP-SD-LABEL: test_v1f32: -; CHECK-NOFP-SD: // %bb.0: -; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NOFP-SD-NEXT: ret -; -; CHECK-FP-SD-LABEL: test_v1f32: -; CHECK-FP-SD: // %bb.0: -; CHECK-FP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-FP-SD-NEXT: ret -; -; CHECK-NOFP-GI-LABEL: test_v1f32: -; CHECK-NOFP-GI: // %bb.0: -; CHECK-NOFP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 -; CHECK-NOFP-GI-NEXT: ret -; -; CHECK-FP-GI-LABEL: test_v1f32: -; CHECK-FP-GI: // %bb.0: -; CHECK-FP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 -; CHECK-FP-GI-NEXT: ret +; CHECK-LABEL: test_v1f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: ret %b = call nnan float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a) ret float %b } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll index be61f9b521795..a1b7118d8080d 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll @@ -40,27 +40,11 @@ define half @test_v1f16(<1 x half> %a) nounwind { } define float @test_v1f32(<1 x float> %a) nounwind { -; CHECK-NOFP-SD-LABEL: test_v1f32: -; CHECK-NOFP-SD: // %bb.0: -; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NOFP-SD-NEXT: ret -; -; CHECK-FP-SD-LABEL: test_v1f32: -; CHECK-FP-SD: // %bb.0: -; CHECK-FP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-FP-SD-NEXT: ret -; -; CHECK-NOFP-GI-LABEL: test_v1f32: -; CHECK-NOFP-GI: // %bb.0: -; CHECK-NOFP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 -; CHECK-NOFP-GI-NEXT: ret -; -; CHECK-FP-GI-LABEL: test_v1f32: -; CHECK-FP-GI: // %bb.0: -; CHECK-FP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 -; CHECK-FP-GI-NEXT: ret +; CHECK-LABEL: test_v1f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: ret %b = call float @llvm.vector.reduce.fmaximum.v1f32(<1 x float> %a) ret float %b } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll index 300081dc3ec40..d5f999add22c2 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll @@ -44,27 +44,11 @@ define half @test_v1f16(<1 x half> %a) nounwind { } define float @test_v1f32(<1 x float> %a) nounwind { -; CHECK-NOFP-SD-LABEL: test_v1f32: -; CHECK-NOFP-SD: // %bb.0: -; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NOFP-SD-NEXT: ret -; -; CHECK-FP-SD-LABEL: test_v1f32: -; CHECK-FP-SD: // %bb.0: -; CHECK-FP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-FP-SD-NEXT: ret -; -; CHECK-NOFP-GI-LABEL: test_v1f32: -; CHECK-NOFP-GI: // %bb.0: -; CHECK-NOFP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 -; CHECK-NOFP-GI-NEXT: ret -; -; CHECK-FP-GI-LABEL: test_v1f32: -; CHECK-FP-GI: // %bb.0: -; CHECK-FP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 -; CHECK-FP-GI-NEXT: ret +; CHECK-LABEL: test_v1f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: ret %b = call nnan float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a) ret float %b } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll b/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll index e735f670ced0c..719cac8f33028 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll @@ -40,27 +40,11 @@ define half @test_v1f16(<1 x half> %a) nounwind { } define float @test_v1f32(<1 x float> %a) nounwind { -; CHECK-NOFP-SD-LABEL: test_v1f32: -; CHECK-NOFP-SD: // %bb.0: -; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NOFP-SD-NEXT: ret -; -; CHECK-FP-SD-LABEL: test_v1f32: -; CHECK-FP-SD: // %bb.0: -; CHECK-FP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-FP-SD-NEXT: ret -; -; CHECK-NOFP-GI-LABEL: test_v1f32: -; CHECK-NOFP-GI: // %bb.0: -; CHECK-NOFP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 -; CHECK-NOFP-GI-NEXT: ret -; -; CHECK-FP-GI-LABEL: test_v1f32: -; CHECK-FP-GI: // %bb.0: -; CHECK-FP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 -; CHECK-FP-GI-NEXT: ret +; CHECK-LABEL: test_v1f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: ret %b = call float @llvm.vector.reduce.fminimum.v1f32(<1 x float> %a) ret float %b } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll index e1b21705c95f3..e22a5a4af4fae 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll @@ -5,18 +5,11 @@ ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 define float @mul_HalfS(<2 x float> %bin.rdx) { -; CHECK-SD-LABEL: mul_HalfS: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: fmul s0, s0, v0.s[1] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: mul_HalfS: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: fmul s0, s0, s1 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: mul_HalfS: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmul s0, s0, v0.s[1] +; CHECK-NEXT: ret %r = call float @llvm.vector.reduce.fmul.f32.v2f32(float 1.0, <2 x float> %bin.rdx) ret float %r } @@ -79,12 +72,9 @@ define half @mul_HalfH(<4 x half> %bin.rdx) { ; CHECK-GI-FP16-LABEL: mul_HalfH: ; CHECK-GI-FP16: // %bb.0: ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: fmul h1, h0, h1 -; CHECK-GI-FP16-NEXT: mov h0, v0.h[3] -; CHECK-GI-FP16-NEXT: fmul h1, h1, h2 -; CHECK-GI-FP16-NEXT: fmul h0, h1, h0 +; CHECK-GI-FP16-NEXT: fmul h1, h0, v0.h[1] +; CHECK-GI-FP16-NEXT: fmul h1, h1, v0.h[2] +; CHECK-GI-FP16-NEXT: fmul h0, h1, v0.h[3] ; CHECK-GI-FP16-NEXT: ret %r = call half @llvm.vector.reduce.fmul.f16.v4f16(half 1.0, <4 x half> %bin.rdx) ret half %r @@ -475,3 +465,6 @@ declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>) declare float @llvm.vector.reduce.fmul.f32.v8f32(float, <8 x float>) declare double @llvm.vector.reduce.fmul.f64.v2f64(double, <2 x double>) declare double @llvm.vector.reduce.fmul.f64.v4f64(double, <4 x double>) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-GI: {{.*}} +; CHECK-SD: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll index 2429cf4b4597a..5fd705b07ca3b 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll @@ -5,18 +5,11 @@ ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 define float @mul_HalfS(<2 x float> %bin.rdx) { -; CHECK-SD-LABEL: mul_HalfS: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: fmul s0, s0, v0.s[1] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: mul_HalfS: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: fmul s0, s0, s1 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: mul_HalfS: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmul s0, s0, v0.s[1] +; CHECK-NEXT: ret %r = call fast float @llvm.vector.reduce.fmul.f32.v2f32(float 1.0, <2 x float> %bin.rdx) ret float %r } @@ -51,20 +44,17 @@ define half @mul_HalfH(<4 x half> %bin.rdx) { ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NOFP16-NEXT: mov d1, v0.d[1] ; CHECK-GI-NOFP16-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NOFP16-NEXT: fmul s0, s0, v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: mul_HalfH: ; CHECK-GI-FP16: // %bb.0: ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 -; CHECK-GI-FP16-NEXT: fmul h1, h2, h3 -; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[3] +; CHECK-GI-FP16-NEXT: fmul h2, h0, v0.h[1] +; CHECK-GI-FP16-NEXT: fmul h0, h1, v0.h[2] +; CHECK-GI-FP16-NEXT: fmul h0, h2, h0 ; CHECK-GI-FP16-NEXT: ret %r = call fast half @llvm.vector.reduce.fmul.f16.v4f16(half 1.0, <4 x half> %bin.rdx) ret half %r @@ -115,8 +105,7 @@ define half @mul_H(<8 x half> %bin.rdx) { ; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v1.4s, v0.4s ; CHECK-GI-NOFP16-NEXT: mov d1, v0.d[1] ; CHECK-GI-NOFP16-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NOFP16-NEXT: fmul s0, s0, v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 ; CHECK-GI-NOFP16-NEXT: ret ; @@ -124,12 +113,10 @@ define half @mul_H(<8 x half> %bin.rdx) { ; CHECK-GI-FP16: // %bb.0: ; CHECK-GI-FP16-NEXT: mov d1, v0.d[1] ; CHECK-GI-FP16-NEXT: fmul v0.4h, v0.4h, v1.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 -; CHECK-GI-FP16-NEXT: fmul h1, h2, h3 -; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[3] +; CHECK-GI-FP16-NEXT: fmul h2, h0, v0.h[1] +; CHECK-GI-FP16-NEXT: fmul h0, h1, v0.h[2] +; CHECK-GI-FP16-NEXT: fmul h0, h2, h0 ; CHECK-GI-FP16-NEXT: ret %r = call fast half @llvm.vector.reduce.fmul.f16.v8f16(half 1.0, <8 x half> %bin.rdx) ret half %r @@ -147,8 +134,7 @@ define float @mul_S(<4 x float> %bin.rdx) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fmul s0, s0, v0.s[1] ; CHECK-GI-NEXT: ret %r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %bin.rdx) ret float %r @@ -220,8 +206,7 @@ define half @mul_2H(<16 x half> %bin.rdx) { ; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s ; CHECK-GI-NOFP16-NEXT: mov d1, v0.d[1] ; CHECK-GI-NOFP16-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NOFP16-NEXT: fmul s0, s0, v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 ; CHECK-GI-NOFP16-NEXT: ret ; @@ -230,12 +215,10 @@ define half @mul_2H(<16 x half> %bin.rdx) { ; CHECK-GI-FP16-NEXT: fmul v0.8h, v0.8h, v1.8h ; CHECK-GI-FP16-NEXT: mov d1, v0.d[1] ; CHECK-GI-FP16-NEXT: fmul v0.4h, v0.4h, v1.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 -; CHECK-GI-FP16-NEXT: fmul h1, h2, h3 -; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[3] +; CHECK-GI-FP16-NEXT: fmul h2, h0, v0.h[1] +; CHECK-GI-FP16-NEXT: fmul h0, h1, v0.h[2] +; CHECK-GI-FP16-NEXT: fmul h0, h2, h0 ; CHECK-GI-FP16-NEXT: ret %r = call fast half @llvm.vector.reduce.fmul.f16.v16f16(half 1.0, <16 x half> %bin.rdx) ret half %r @@ -255,8 +238,7 @@ define float @mul_2S(<8 x float> %bin.rdx) { ; CHECK-GI-NEXT: fmul v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fmul s0, s0, v0.s[1] ; CHECK-GI-NEXT: ret %r = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %bin.rdx) ret float %r @@ -289,9 +271,8 @@ define float @mul_S_init_42(<4 x float> %bin.rdx) { ; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: mov w8, #1109917696 // =0x42280000 ; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: fmul s0, s0, s1 ; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmul s0, s0, v0.s[1] ; CHECK-GI-NEXT: fmul s0, s0, s1 ; CHECK-GI-NEXT: ret %r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 42.0, <4 x float> %bin.rdx) @@ -357,10 +338,8 @@ define half @fmul_reduct_reassoc_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-GI-NOFP16-NEXT: mov d3, v1.d[1] ; CHECK-GI-NOFP16-NEXT: fmul v0.2s, v0.2s, v2.2s ; CHECK-GI-NOFP16-NEXT: fmul v1.2s, v1.2s, v3.2s -; CHECK-GI-NOFP16-NEXT: mov s2, v0.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[1] -; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s2 -; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s3 +; CHECK-GI-NOFP16-NEXT: fmul s0, s0, v0.s[1] +; CHECK-GI-NOFP16-NEXT: fmul s1, s1, v1.s[1] ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 @@ -375,18 +354,14 @@ define half @fmul_reduct_reassoc_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-GI-FP16-NEXT: mov d3, v1.d[1] ; CHECK-GI-FP16-NEXT: fmul v0.4h, v0.4h, v2.4h ; CHECK-GI-FP16-NEXT: fmul v1.4h, v1.4h, v3.4h -; CHECK-GI-FP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h5, v1.h[1] -; CHECK-GI-FP16-NEXT: mov h6, v1.h[2] -; CHECK-GI-FP16-NEXT: mov h7, v1.h[3] -; CHECK-GI-FP16-NEXT: fmul h0, h0, h2 -; CHECK-GI-FP16-NEXT: fmul h2, h3, h4 -; CHECK-GI-FP16-NEXT: fmul h1, h1, h5 -; CHECK-GI-FP16-NEXT: fmul h3, h6, h7 -; CHECK-GI-FP16-NEXT: fmul h0, h0, h2 -; CHECK-GI-FP16-NEXT: fmul h1, h1, h3 +; CHECK-GI-FP16-NEXT: mov h2, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h3, v1.h[3] +; CHECK-GI-FP16-NEXT: fmul h4, h0, v0.h[1] +; CHECK-GI-FP16-NEXT: fmul h0, h2, v0.h[2] +; CHECK-GI-FP16-NEXT: fmul h2, h1, v1.h[1] +; CHECK-GI-FP16-NEXT: fmul h1, h3, v1.h[2] +; CHECK-GI-FP16-NEXT: fmul h0, h4, h0 +; CHECK-GI-FP16-NEXT: fmul h1, h2, h1 ; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 ; CHECK-GI-FP16-NEXT: ret %r1 = call fast half @llvm.vector.reduce.fmul.f16.v8f16(half 1.0, <8 x half> %a) @@ -414,10 +389,8 @@ define float @fmul_reduct_reassoc_v8f32(<8 x float> %a, <8 x float> %b) { ; CHECK-GI-NEXT: mov d3, v1.d[1] ; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v2.2s ; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v3.2s -; CHECK-GI-NEXT: mov s2, v0.s[1] -; CHECK-GI-NEXT: mov s3, v1.s[1] -; CHECK-GI-NEXT: fmul s0, s0, s2 -; CHECK-GI-NEXT: fmul s1, s1, s3 +; CHECK-GI-NEXT: fmul s0, s0, v0.s[1] +; CHECK-GI-NEXT: fmul s1, s1, v1.s[1] ; CHECK-GI-NEXT: fmul s0, s0, s1 ; CHECK-GI-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %a) @@ -441,10 +414,8 @@ define float @fmul_reduct_reassoc_v4f32(<4 x float> %a, <4 x float> %b) { ; CHECK-GI-NEXT: mov d3, v1.d[1] ; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v2.2s ; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v3.2s -; CHECK-GI-NEXT: mov s2, v0.s[1] -; CHECK-GI-NEXT: mov s3, v1.s[1] -; CHECK-GI-NEXT: fmul s0, s0, s2 -; CHECK-GI-NEXT: fmul s1, s1, s3 +; CHECK-GI-NEXT: fmul s0, s0, v0.s[1] +; CHECK-GI-NEXT: fmul s1, s1, v1.s[1] ; CHECK-GI-NEXT: fmul s0, s0, s1 ; CHECK-GI-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a) @@ -471,12 +442,10 @@ define float @fmul_reduct_reassoc_v4f32_init(float %i, <4 x float> %a, <4 x floa ; CHECK-GI-NEXT: mov d3, v1.d[1] ; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v3.2s ; CHECK-GI-NEXT: mov d3, v2.d[1] -; CHECK-GI-NEXT: mov s4, v1.s[1] +; CHECK-GI-NEXT: fmul s1, s1, v1.s[1] ; CHECK-GI-NEXT: fmul v2.2s, v2.2s, v3.2s -; CHECK-GI-NEXT: fmul s1, s1, s4 -; CHECK-GI-NEXT: mov s3, v2.s[1] ; CHECK-GI-NEXT: fmul s0, s0, s1 -; CHECK-GI-NEXT: fmul s1, s2, s3 +; CHECK-GI-NEXT: fmul s1, s2, v2.s[1] ; CHECK-GI-NEXT: fmul s0, s0, s1 ; CHECK-GI-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float %i, <4 x float> %a) @@ -502,10 +471,8 @@ define float @fmul_reduct_reassoc_v4v8f32(<4 x float> %a, <8 x float> %b) { ; CHECK-GI-NEXT: mov d3, v1.d[1] ; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v2.2s ; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v3.2s -; CHECK-GI-NEXT: mov s2, v0.s[1] -; CHECK-GI-NEXT: mov s3, v1.s[1] -; CHECK-GI-NEXT: fmul s0, s0, s2 -; CHECK-GI-NEXT: fmul s1, s1, s3 +; CHECK-GI-NEXT: fmul s0, s0, v0.s[1] +; CHECK-GI-NEXT: fmul s1, s1, v1.s[1] ; CHECK-GI-NEXT: fmul s0, s0, s1 ; CHECK-GI-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a) @@ -556,10 +523,8 @@ define float @fmul_reduct_reassoc_v4f32_extrause(<4 x float> %a, <4 x float> %b) ; CHECK-GI-NEXT: mov d3, v1.d[1] ; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v2.2s ; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v3.2s -; CHECK-GI-NEXT: mov s2, v0.s[1] -; CHECK-GI-NEXT: mov s3, v1.s[1] -; CHECK-GI-NEXT: fmul s0, s0, s2 -; CHECK-GI-NEXT: fmul s1, s1, s3 +; CHECK-GI-NEXT: fmul s0, s0, v0.s[1] +; CHECK-GI-NEXT: fmul s1, s1, v1.s[1] ; CHECK-GI-NEXT: fmul s1, s0, s1 ; CHECK-GI-NEXT: fmul s0, s1, s0 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll index 0806f7da5c89c..d5c040e09945b 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll @@ -57,16 +57,11 @@ define i24 @test_v1i24(<1 x i24> %a) nounwind { } define i32 @test_v1i32(<1 x i32> %a) nounwind { -; CHECK-SD-LABEL: test_v1i32: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: fmov w0, s0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_v1i32: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov w0, s0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_v1i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %b = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> %a) ret i32 %b } diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll index 602643264e7be..53456c4c81ccc 100644 --- a/llvm/test/CodeGen/AArch64/vector-lrint.ll +++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll @@ -755,20 +755,13 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { ; CHECK-i32-NEXT: fcvtzs v0.2s, v0.2s ; CHECK-i32-NEXT: ret ; -; CHECK-i64-SD-LABEL: lrint_v1f32: -; CHECK-i64-SD: // %bb.0: -; CHECK-i64-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-i64-SD-NEXT: frintx s0, s0 -; CHECK-i64-SD-NEXT: fcvtzs x8, s0 -; CHECK-i64-SD-NEXT: fmov d0, x8 -; CHECK-i64-SD-NEXT: ret -; -; CHECK-i64-GI-LABEL: lrint_v1f32: -; CHECK-i64-GI: // %bb.0: -; CHECK-i64-GI-NEXT: frintx s0, s0 -; CHECK-i64-GI-NEXT: fcvtzs x8, s0 -; CHECK-i64-GI-NEXT: fmov d0, x8 -; CHECK-i64-GI-NEXT: ret +; CHECK-i64-LABEL: lrint_v1f32: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-i64-NEXT: frintx s0, s0 +; CHECK-i64-NEXT: fcvtzs x8, s0 +; CHECK-i64-NEXT: fmov d0, x8 +; CHECK-i64-NEXT: ret %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x) ret <1 x iXLen> %a } @@ -1335,3 +1328,7 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) { ret <32 x iXLen> %a } declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-i32-GI: {{.*}} +; CHECK-i64-GI: {{.*}} +; CHECK-i64-SD: {{.*}}