llvm
diff --git a/‎llvm/include/llvm/CodeGen/RegisterClassInfo.h
Lines changed: 6 additions & 2 deletions b/‎llvm/include/llvm/CodeGen/RegisterClassInfo.h
Lines changed: 6 additions & 2 deletions
diff --git a/‎llvm/include/llvm/CodeGen/TargetRegisterInfo.h
Lines changed: 4 additions & 3 deletions b/‎llvm/include/llvm/CodeGen/TargetRegisterInfo.h
Lines changed: 4 additions & 3 deletions
diff --git a/‎llvm/include/llvm/Target/Target.td
Lines changed: 1 addition & 1 deletion b/‎llvm/include/llvm/Target/Target.td
Lines changed: 1 addition & 1 deletion
diff --git a/‎llvm/lib/CodeGen/BreakFalseDeps.cpp
Lines changed: 1 addition & 1 deletion b/‎llvm/lib/CodeGen/BreakFalseDeps.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎llvm/lib/CodeGen/RegisterClassInfo.cpp
Lines changed: 10 additions & 3 deletions b/‎llvm/lib/CodeGen/RegisterClassInfo.cpp
Lines changed: 10 additions & 3 deletions
diff --git a/‎llvm/lib/Target/X86/X86RegisterInfo.td
Lines changed: 24 additions & 4 deletions b/‎llvm/lib/Target/X86/X86RegisterInfo.td
Lines changed: 24 additions & 4 deletions
diff --git a/‎llvm/test/CodeGen/X86/avx-cvt.ll
Lines changed: 8 additions & 8 deletions b/‎llvm/test/CodeGen/X86/avx-cvt.ll
Lines changed: 8 additions & 8 deletions
@@ -50,6 +50,8 @@ class RegisterClassInfo {
   // entry is valid when its tag matches.
   unsigned Tag = 0;
 
+  bool Reverse = false;
+
   const MachineFunction *MF = nullptr;
   const TargetRegisterInfo *TRI = nullptr;
 
@@ -86,9 +88,11 @@ class RegisterClassInfo {
 public:
   LLVM_ABI RegisterClassInfo();
 
-  /// runOnFunction - Prepare to answer questions about MF. This must be called
+  /// runOnFunction - Prepare to answer questions about MF. Rev indicates to
+  /// use reversed raw order when compute register order. This must be called
   /// before any other methods are used.
-  LLVM_ABI void runOnMachineFunction(const MachineFunction &MF);
+  LLVM_ABI void runOnMachineFunction(const MachineFunction &MF,
+                                     bool Rev = false);
 
   /// getNumAllocatableRegs - Returns the number of actually allocatable
   /// registers in RC in the current function.
 
@@ -68,7 +68,7 @@ class TargetRegisterClass {
   const bool CoveredBySubRegs;
   const unsigned *SuperClasses;
   const uint16_t SuperClassesSize;
-  ArrayRef<MCPhysReg> (*OrderFunc)(const MachineFunction&);
+  ArrayRef<MCPhysReg> (*OrderFunc)(const MachineFunction &, bool Rev);
 
   /// Return the register class ID number.
   unsigned getID() const { return MC->getID(); }
@@ -199,8 +199,9 @@ class TargetRegisterClass {
   /// other criteria.
   ///
   /// By default, this method returns all registers in the class.
-  ArrayRef<MCPhysReg> getRawAllocationOrder(const MachineFunction &MF) const {
-    return OrderFunc ? OrderFunc(MF) : getRegisters();
+  ArrayRef<MCPhysReg> getRawAllocationOrder(const MachineFunction &MF,
+                                            bool Rev = false) const {
+    return OrderFunc ? OrderFunc(MF, Rev) : getRegisters();
   }
 
   /// Returns the combination of all lane masks of register in this class.
 
@@ -314,7 +314,7 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
   // to use in a given machine function. The code will be inserted in a
   // function like this:
   //
-  //   static inline unsigned f(const MachineFunction &MF) { ... }
+  //   static inline unsigned f(const MachineFunction &MF, bool Rev) { ... }
   //
   // The function should return 0 to select the default order defined by
   // MemberList, 1 to select the first AltOrders entry and so on.
 
@@ -285,7 +285,7 @@ bool BreakFalseDeps::runOnMachineFunction(MachineFunction &mf) {
   TRI = MF->getSubtarget().getRegisterInfo();
   RDA = &getAnalysis<ReachingDefAnalysis>();
 
-  RegClassInfo.runOnMachineFunction(mf);
+  RegClassInfo.runOnMachineFunction(mf, /*Rev=*/true);
 
   LLVM_DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n");
 
 
@@ -39,14 +39,16 @@ StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"),
 
 RegisterClassInfo::RegisterClassInfo() = default;
 
-void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
+void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf,
+                                             bool Rev) {
   bool Update = false;
   MF = &mf;
 
   auto &STI = MF->getSubtarget();
 
   // Allocate new array the first time we see a new target.
-  if (STI.getRegisterInfo() != TRI) {
+  if (STI.getRegisterInfo() != TRI || Reverse != Rev) {
+    Reverse = Rev;
     TRI = STI.getRegisterInfo();
     RegClass.reset(new RCInfo[TRI->getNumRegClasses()]);
     Update = true;
@@ -142,7 +144,12 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
 
   // FIXME: Once targets reserve registers instead of removing them from the
   // allocation order, we can simply use begin/end here.
-  ArrayRef<MCPhysReg> RawOrder = RC->getRawAllocationOrder(*MF);
+  ArrayRef<MCPhysReg> RawOrder = RC->getRawAllocationOrder(*MF, Reverse);
+  std::vector<MCPhysReg> ReverseOrder;
+  if (Reverse) {
+    llvm::append_range(ReverseOrder, reverse(RawOrder));
+    RawOrder = ArrayRef<MCPhysReg>(ReverseOrder);
+  }
   for (unsigned PhysReg : RawOrder) {
     // Remove reserved registers from the allocation order.
     if (Reserved.test(PhysReg))
 
@@ -806,17 +806,37 @@ def VR512_0_15 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i
                                512, (sequence "ZMM%u", 0, 15)>;
 
 // Scalar AVX-512 floating point registers.
-def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;
+def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)> {
+  let AltOrders = [(add (sequence "XMM%u", 16, 31), (sequence "XMM%u", 0, 15))];
+  let AltOrderSelect = [{
+    return Rev;
+  }];
+}
 
-def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)>;
+def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)> {
+  let AltOrders = [(add (sequence "XMM%u", 16, 31), (sequence "XMM%u", 0, 15))];
+  let AltOrderSelect = [{
+    return Rev;
+  }];
+}
 
 def FR16X : RegisterClass<"X86", [f16], 16, (add FR32X)> {let Size = 32;}
 
 // Extended VR128 and VR256 for AVX-512 instructions
 def VR128X : RegisterClass<"X86", [v4f32, v2f64, v8f16, v8bf16, v16i8, v8i16, v4i32, v2i64, f128],
-                           128, (add FR32X)>;
+                           128, (add FR32X)> {
+  let AltOrders = [(add (sequence "XMM%u", 16, 31), (sequence "XMM%u", 0, 15))];
+  let AltOrderSelect = [{
+    return Rev;
+  }];
+}
 def VR256X : RegisterClass<"X86", [v8f32, v4f64, v16f16, v16bf16, v32i8, v16i16, v8i32, v4i64],
-                           256, (sequence "YMM%u", 0, 31)>;
+                           256, (sequence "YMM%u", 0, 31)> {
+  let AltOrders = [(add (sequence "YMM%u", 16, 31), (sequence "YMM%u", 0, 15))];
+  let AltOrderSelect = [{
+    return Rev;
+  }];
+}
 
 // Mask registers
 def VK1     : RegisterClass<"X86", [v1i1],  16,  (sequence "K%u", 0, 7)> {let Size = 16;}
 
@@ -108,7 +108,7 @@ define <2 x double> @fpext01(<2 x double> %a0, <4 x float> %a1) nounwind {
 define double @funcA(ptr nocapture %e) nounwind uwtable readonly ssp {
 ; CHECK-LABEL: funcA:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcvtsi2sdq (%rdi), %xmm0, %xmm0
+; CHECK-NEXT:    vcvtsi2sdq (%rdi), %xmm15, %xmm0
 ; CHECK-NEXT:    retq
   %tmp1 = load i64, ptr %e, align 8
   %conv = sitofp i64 %tmp1 to double
@@ -118,7 +118,7 @@ define double @funcA(ptr nocapture %e) nounwind uwtable readonly ssp {
 define double @funcB(ptr nocapture %e) nounwind uwtable readonly ssp {
 ; CHECK-LABEL: funcB:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcvtsi2sdl (%rdi), %xmm0, %xmm0
+; CHECK-NEXT:    vcvtsi2sdl (%rdi), %xmm15, %xmm0
 ; CHECK-NEXT:    retq
   %tmp1 = load i32, ptr %e, align 4
   %conv = sitofp i32 %tmp1 to double
@@ -128,7 +128,7 @@ define double @funcB(ptr nocapture %e) nounwind uwtable readonly ssp {
 define float @funcC(ptr nocapture %e) nounwind uwtable readonly ssp {
 ; CHECK-LABEL: funcC:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcvtsi2ssl (%rdi), %xmm0, %xmm0
+; CHECK-NEXT:    vcvtsi2ssl (%rdi), %xmm15, %xmm0
 ; CHECK-NEXT:    retq
   %tmp1 = load i32, ptr %e, align 4
   %conv = sitofp i32 %tmp1 to float
@@ -138,7 +138,7 @@ define float @funcC(ptr nocapture %e) nounwind uwtable readonly ssp {
 define float @funcD(ptr nocapture %e) nounwind uwtable readonly ssp {
 ; CHECK-LABEL: funcD:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcvtsi2ssq (%rdi), %xmm0, %xmm0
+; CHECK-NEXT:    vcvtsi2ssq (%rdi), %xmm15, %xmm0
 ; CHECK-NEXT:    retq
   %tmp1 = load i64, ptr %e, align 8
   %conv = sitofp i64 %tmp1 to float
@@ -183,7 +183,7 @@ declare float @llvm.floor.f32(float %p)
 define float @floor_f32_load(ptr %aptr) optsize {
 ; CHECK-LABEL: floor_f32_load:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vroundss $9, (%rdi), %xmm0, %xmm0
+; CHECK-NEXT:    vroundss $9, (%rdi), %xmm15, %xmm0
 ; CHECK-NEXT:    retq
   %a = load float, ptr %aptr
   %res = call float @llvm.floor.f32(float %a)
@@ -193,7 +193,7 @@ define float @floor_f32_load(ptr %aptr) optsize {
 define float @floor_f32_load_pgso(ptr %aptr) !prof !14 {
 ; CHECK-LABEL: floor_f32_load_pgso:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vroundss $9, (%rdi), %xmm0, %xmm0
+; CHECK-NEXT:    vroundss $9, (%rdi), %xmm15, %xmm0
 ; CHECK-NEXT:    retq
   %a = load float, ptr %aptr
   %res = call float @llvm.floor.f32(float %a)
@@ -203,7 +203,7 @@ define float @floor_f32_load_pgso(ptr %aptr) !prof !14 {
 define double @nearbyint_f64_load(ptr %aptr) optsize {
 ; CHECK-LABEL: nearbyint_f64_load:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vroundsd $12, (%rdi), %xmm0, %xmm0
+; CHECK-NEXT:    vroundsd $12, (%rdi), %xmm15, %xmm0
 ; CHECK-NEXT:    retq
   %a = load double, ptr %aptr
   %res = call double @llvm.nearbyint.f64(double %a)
@@ -213,7 +213,7 @@ define double @nearbyint_f64_load(ptr %aptr) optsize {
 define double @nearbyint_f64_load_pgso(ptr %aptr) !prof !14 {
 ; CHECK-LABEL: nearbyint_f64_load_pgso:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vroundsd $12, (%rdi), %xmm0, %xmm0
+; CHECK-NEXT:    vroundsd $12, (%rdi), %xmm15, %xmm0
 ; CHECK-NEXT:    retq
   %a = load double, ptr %aptr
   %res = call double @llvm.nearbyint.f64(double %a)