diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
index e1e2220f94d6d..fcd4314249ff8 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
@@ -62,10 +62,7 @@ fixed_bool_t from_svbool_t(svbool_t type) {
 
 // CHECK-LABEL: @lax_cast(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SAVED_VALUE:%.*]] = alloca <16 x i32>, align 64
-// CHECK-NEXT:    [[TYPE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    store <16 x i32> [[TYPE]], ptr [[SAVED_VALUE]], align 64, !tbaa [[TBAA6:![0-9]+]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 2 x i64>, ptr [[SAVED_VALUE]], align 64, !tbaa [[TBAA6]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 4 x i32> [[TYPE_COERCE:%.*]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 svint64_t lax_cast(fixed_int32_t type) {
@@ -74,9 +71,9 @@ svint64_t lax_cast(fixed_int32_t type) {
 
 // CHECK-LABEL: @to_svint32_t__from_gnu_int32_t(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TYPE]], i64 0)
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+// CHECK-NEXT:    [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TYPE]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
 //
 svint32_t to_svint32_t__from_gnu_int32_t(gnu_int32_t type) {
   return type;
@@ -84,8 +81,8 @@ svint32_t to_svint32_t__from_gnu_int32_t(gnu_int32_t type) {
 
 // CHECK-LABEL: @from_svint32_t__to_gnu_int32_t(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE:%.*]], i64 0)
-// CHECK-NEXT:    store <16 x i32> [[CASTFIXEDSVE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT:    [[CAST_FIXED:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE:%.*]], i64 0)
+// CHECK-NEXT:    store <16 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
 gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) {
@@ -94,9 +91,9 @@ gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) {
 
 // CHECK-LABEL: @to_fixed_int32_t__from_gnu_int32_t(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TYPE]], i64 0)
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+// CHECK-NEXT:    [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TYPE]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32_t to_fixed_int32_t__from_gnu_int32_t(gnu_int32_t type) {
   return type;
@@ -105,7 +102,7 @@ fixed_int32_t to_fixed_int32_t__from_gnu_int32_t(gnu_int32_t type) {
 // CHECK-LABEL: @from_fixed_int32_t__to_gnu_int32_t(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TYPE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    store <16 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT:    store <16 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
 gnu_int32_t from_fixed_int32_t__to_gnu_int32_t(fixed_int32_t type) {
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index c2510ea75544a..f24d03635731e 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -1053,6 +1053,10 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node<Function> {
   /// defined.
   void setAlignment(MaybeAlign Align) { GlobalObject::setAlignment(Align); }
 
+  /// Return the value for vscale based on the vscale_range attribute or 0 when
+  /// unknown.
+  unsigned getVScaleValue() const;
+
 private:
   void allocHungoffUselist();
   template<int Idx> void setHungoffOperand(Constant *C);
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 63665d837c398..493dec72d45af 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -1165,6 +1165,18 @@ bool Function::nullPointerIsDefined() const {
   return hasFnAttribute(Attribute::NullPointerIsValid);
 }
 
+unsigned Function::getVScaleValue() const {
+  Attribute Attr = getFnAttribute(Attribute::VScaleRange);
+  if (!Attr.isValid())
+    return 0;
+
+  unsigned VScale = Attr.getVScaleRangeMin();
+  if (VScale && VScale == Attr.getVScaleRangeMax())
+    return VScale;
+
+  return 0;
+}
+
 bool llvm::NullPointerIsDefined(const Function *F, unsigned AS) {
   if (F && F->nullPointerIsDefined())
     return true;
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index a4e373d395b90..42d1d9a437bb2 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -1120,8 +1120,13 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
       return PI.setEscapedReadOnly(&LI);
 
     TypeSize Size = DL.getTypeStoreSize(LI.getType());
-    if (Size.isScalable())
-      return PI.setAborted(&LI);
+    if (Size.isScalable()) {
+      unsigned VScale = LI.getFunction()->getVScaleValue();
+      if (!VScale)
+        return PI.setAborted(&LI);
+
+      Size = TypeSize::getFixed(Size.getKnownMinValue() * VScale);
+    }
 
     return handleLoadOrStore(LI.getType(), LI, Offset, Size.getFixedValue(),
                              LI.isVolatile());
@@ -1135,8 +1140,13 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
       return PI.setAborted(&SI);
 
     TypeSize StoreSize = DL.getTypeStoreSize(ValOp->getType());
-    if (StoreSize.isScalable())
-      return PI.setAborted(&SI);
+    if (StoreSize.isScalable()) {
+      unsigned VScale = SI.getFunction()->getVScaleValue();
+      if (!VScale)
+        return PI.setAborted(&SI);
+
+      StoreSize = TypeSize::getFixed(StoreSize.getKnownMinValue() * VScale);
+    }
 
     uint64_t Size = StoreSize.getFixedValue();
 
@@ -1927,7 +1937,8 @@ static Align getAdjustedAlignment(Instruction *I, uint64_t Offset) {
 /// ensure that we only try to convert viable values. The strategy is that we
 /// will peel off single element struct and array wrappings to get to an
 /// underlying value, and convert that value.
-static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
+static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy,
+                            unsigned VScale = 0) {
   if (OldTy == NewTy)
     return true;
 
@@ -1941,8 +1952,35 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
     return false;
   }
 
-  if (DL.getTypeSizeInBits(NewTy).getFixedValue() !=
-      DL.getTypeSizeInBits(OldTy).getFixedValue())
+  TypeSize NewSize = DL.getTypeSizeInBits(NewTy);
+  TypeSize OldSize = DL.getTypeSizeInBits(OldTy);
+
+  if ((isa<ScalableVectorType>(NewTy) && isa<FixedVectorType>(OldTy)) ||
+      (isa<ScalableVectorType>(OldTy) && isa<FixedVectorType>(NewTy))) {
+    // Conversion is only possible when the size of scalable vectors is known.
+    if (!VScale)
+      return false;
+
+    // For ptr-to-int and int-to-ptr casts, the pointer side is resolved within
+    // a single domain (either fixed or scalable). Any additional conversion
+    // between fixed and scalable types is handled through integer types.
+    auto OldVTy = OldTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(OldTy) : OldTy;
+    auto NewVTy = NewTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(NewTy) : NewTy;
+
+    if (isa<ScalableVectorType>(NewTy)) {
+      if (!VectorType::getWithSizeAndScalar(cast<VectorType>(NewVTy), OldVTy))
+        return false;
+
+      NewSize = TypeSize::getFixed(NewSize.getKnownMinValue() * VScale);
+    } else {
+      if (!VectorType::getWithSizeAndScalar(cast<VectorType>(OldVTy), NewVTy))
+        return false;
+
+      OldSize = TypeSize::getFixed(OldSize.getKnownMinValue() * VScale);
+    }
+  }
+
+  if (NewSize != OldSize)
     return false;
   if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
     return false;
@@ -1992,7 +2030,14 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
 static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
                            Type *NewTy) {
   Type *OldTy = V->getType();
-  assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type");
+
+#ifndef NDEBUG
+  BasicBlock *BB = IRB.GetInsertBlock();
+  assert(BB && BB->getParent() && "VScale unknown!");
+  unsigned VScale = BB->getParent()->getVScaleValue();
+  assert(canConvertValue(DL, OldTy, NewTy, VScale) &&
+         "Value not convertable to type");
+#endif
 
   if (OldTy == NewTy)
     return V;
@@ -2000,13 +2045,41 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
   assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
          "Integer types must be the exact same to convert.");
 
+  // A variant of bitcast that supports a mixture of fixed and scalable types
+  // that are know to have the same size.
+  auto CreateBitCastLike = [&IRB](Value *In, Type *Ty) -> Value * {
+    Type *InTy = In->getType();
+    if (InTy == Ty)
+      return In;
+
+    if (isa<FixedVectorType>(InTy) && isa<ScalableVectorType>(Ty)) {
+      // For vscale_range(2) expand <4 x i32> to <vscale x 4 x i16> -->
+      //   <4 x i32> to <vscale x 2 x i32> to <vscale x 4 x i16>
+      auto *VTy = VectorType::getWithSizeAndScalar(cast<VectorType>(Ty), InTy);
+      return IRB.CreateBitCast(IRB.CreateInsertVector(VTy,
+                                                      PoisonValue::get(VTy), In,
+                                                      IRB.getInt64(0)),
+                               Ty);
+    }
+
+    if (isa<ScalableVectorType>(InTy) && isa<FixedVectorType>(Ty)) {
+      // For vscale_range(2) expand <vscale x 4 x i16> to <4 x i32> -->
+      //   <vscale x 4 x i16> to <vscale x 2 x i32> to <4 x i32>
+      auto *VTy = VectorType::getWithSizeAndScalar(cast<VectorType>(InTy), Ty);
+      return IRB.CreateExtractVector(Ty, IRB.CreateBitCast(In, VTy),
+                                     IRB.getInt64(0));
+    }
+
+    return IRB.CreateBitCast(In, Ty);
+  };
+
   // See if we need inttoptr for this type pair. May require additional bitcast.
   if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
     // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
     // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*>
     // Expand <4 x i32> to <2 x i8*> --> <4 x i32> to <2 x i64> to <2 x i8*>
     // Directly handle i64 to i8*
-    return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
+    return IRB.CreateIntToPtr(CreateBitCastLike(V, DL.getIntPtrType(NewTy)),
                               NewTy);
   }
 
@@ -2016,7 +2089,7 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
     // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32>
     // Expand <2 x i8*> to <4 x i32> --> <2 x i8*> to <2 x i64> to <4 x i32>
     // Expand i8* to i64 --> i8* to i64 to i64
-    return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
+    return CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
                              NewTy);
   }
 
@@ -2031,12 +2104,14 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
     // size.
     if (OldAS != NewAS) {
       assert(DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
-      return IRB.CreateIntToPtr(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
-                                NewTy);
+      return IRB.CreateIntToPtr(
+          CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
+                            DL.getIntPtrType(NewTy)),
+          NewTy);
     }
   }
 
-  return IRB.CreateBitCast(V, NewTy);
+  return CreateBitCastLike(V, NewTy);
 }
 
 /// Test whether the given slice use can be promoted to a vector.
@@ -2046,7 +2121,8 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
 static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
                                             VectorType *Ty,
                                             uint64_t ElementSize,
-                                            const DataLayout &DL) {
+                                            const DataLayout &DL,
+                                            unsigned VScale) {
   // First validate the slice offsets.
   uint64_t BeginOffset =
       std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset();
@@ -2090,7 +2166,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
       assert(LTy->isIntegerTy());
       LTy = SplitIntTy;
     }
-    if (!canConvertValue(DL, SliceTy, LTy))
+    if (!canConvertValue(DL, SliceTy, LTy, VScale))
       return false;
   } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
     if (SI->isVolatile())
@@ -2103,7 +2179,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
       assert(STy->isIntegerTy());
       STy = SplitIntTy;
     }
-    if (!canConvertValue(DL, STy, SliceTy))
+    if (!canConvertValue(DL, STy, SliceTy, VScale))
       return false;
   } else {
     return false;
@@ -2118,7 +2194,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
 /// (and thus isVectorPromotionViable) over all slices of the alloca for the
 /// given VectorType.
 static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy,
-                                        const DataLayout &DL) {
+                                        const DataLayout &DL, unsigned VScale) {
   uint64_t ElementSize =
       DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
 
@@ -2131,11 +2207,11 @@ static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy,
   ElementSize /= 8;
 
   for (const Slice &S : P)
-    if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL))
+    if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL, VScale))
       return false;
 
   for (const Slice *S : P.splitSliceTails())
-    if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL))
+    if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL, VScale))
       return false;
 
   return true;
@@ -2150,7 +2226,7 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL,
                              SmallVectorImpl<VectorType *> &CandidateTys,
                              bool HaveCommonEltTy, Type *CommonEltTy,
                              bool HaveVecPtrTy, bool HaveCommonVecPtrTy,
-                             VectorType *CommonVecPtrTy) {
+                             VectorType *CommonVecPtrTy, unsigned VScale) {
   // If we didn't find a vector type, nothing to do here.
   if (CandidateTys.empty())
     return nullptr;
@@ -2226,7 +2302,7 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL,
   });
 
   for (VectorType *VTy : CandidateTys)
-    if (checkVectorTypeForPromotion(P, VTy, DL))
+    if (checkVectorTypeForPromotion(P, VTy, DL, VScale))
       return VTy;
 
   return nullptr;
@@ -2237,7 +2313,7 @@ static VectorType *createAndCheckVectorTypesForPromotion(
     function_ref<void(Type *)> CheckCandidateType, Partition &P,
     const DataLayout &DL, SmallVectorImpl<VectorType *> &CandidateTys,
     bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
-    bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy) {
+    bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale) {
   [[maybe_unused]] VectorType *OriginalElt =
       CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr;
   // Consider additional vector types where the element type size is a
@@ -2262,9 +2338,9 @@ static VectorType *createAndCheckVectorTypesForPromotion(
     }
   }
 
-  return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
-                                      CommonEltTy, HaveVecPtrTy,
-                                      HaveCommonVecPtrTy, CommonVecPtrTy);
+  return checkVectorTypesForPromotion(
+      P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
+      HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
 }
 
 /// Test whether the given alloca partitioning and range of slices can be
@@ -2276,7 +2352,8 @@ static VectorType *createAndCheckVectorTypesForPromotion(
 /// SSA value. We only can ensure this for a limited set of operations, and we
 /// don't want to do the rewrites unless we are confident that the result will
 /// be promotable, so we have an early test here.
-static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
+static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL,
+                                           unsigned VScale) {
   // Collect the candidate types for vector-based promotion. Also track whether
   // we have different element types.
   SmallVector<VectorType *, 4> CandidateTys;
@@ -2288,7 +2365,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
   bool HaveCommonEltTy = true;
   bool HaveCommonVecPtrTy = true;
   auto CheckCandidateType = [&](Type *Ty) {
-    if (auto *VTy = dyn_cast<VectorType>(Ty)) {
+    if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
       // Return if bitcast to vectors is different for total size in bits.
       if (!CandidateTys.empty()) {
         VectorType *V = CandidateTys[0];
@@ -2343,14 +2420,14 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
   if (auto *VTy = createAndCheckVectorTypesForPromotion(
           LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
           CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
-          HaveCommonVecPtrTy, CommonVecPtrTy))
+          HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
     return VTy;
 
   CandidateTys.clear();
   return createAndCheckVectorTypesForPromotion(
       DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,
       HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
-      CommonVecPtrTy);
+      CommonVecPtrTy, VScale);
 }
 
 /// Test whether a slice of an alloca is valid for integer widening.
@@ -2387,7 +2464,8 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
     if (LI->isVolatile())
       return false;
     // We can't handle loads that extend past the allocated memory.
-    if (DL.getTypeStoreSize(LI->getType()).getFixedValue() > Size)
+    TypeSize LoadSize = DL.getTypeStoreSize(LI->getType());
+    if (!LoadSize.isFixed() || LoadSize.getFixedValue() > Size)
       return false;
     // So far, AllocaSliceRewriter does not support widening split slice tails
     // in rewriteIntegerLoad.
@@ -2412,7 +2490,8 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
     if (SI->isVolatile())
       return false;
     // We can't handle stores that extend past the allocated memory.
-    if (DL.getTypeStoreSize(ValueTy).getFixedValue() > Size)
+    TypeSize StoreSize = DL.getTypeStoreSize(ValueTy);
+    if (!StoreSize.isFixed() || StoreSize.getFixedValue() > Size)
       return false;
     // So far, AllocaSliceRewriter does not support widening split slice tails
     // in rewriteIntegerStore.
@@ -2885,8 +2964,6 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
 
     Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
                              : LI.getType();
-    const bool IsLoadPastEnd =
-        DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize;
     bool IsPtrAdjusted = false;
     Value *V;
     if (VecTy) {
@@ -2896,8 +2973,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
     } else if (NewBeginOffset == NewAllocaBeginOffset &&
                NewEndOffset == NewAllocaEndOffset &&
                (canConvertValue(DL, NewAllocaTy, TargetTy) ||
-                (IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
-                 TargetTy->isIntegerTy() && !LI.isVolatile()))) {
+                (NewAllocaTy->isIntegerTy() && TargetTy->isIntegerTy() &&
+                 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&
+                 !LI.isVolatile()))) {
       Value *NewPtr =
           getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
       LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,
@@ -3070,7 +3148,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
       if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
         Pass.PostPromotionWorklist.insert(AI);
 
-    if (SliceSize < DL.getTypeStoreSize(V->getType()).getFixedValue()) {
+    TypeSize StoreSize = DL.getTypeStoreSize(V->getType());
+    if (StoreSize.isFixed() && SliceSize < StoreSize.getFixedValue()) {
       assert(!SI.isVolatile());
       assert(V->getType()->isIntegerTy() &&
              "Only integer type loads and stores are split");
@@ -4846,14 +4925,18 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
   Type *SliceTy = nullptr;
   VectorType *SliceVecTy = nullptr;
   const DataLayout &DL = AI.getDataLayout();
+  unsigned VScale = AI.getFunction()->getVScaleValue();
+
   std::pair<Type *, IntegerType *> CommonUseTy =
       findCommonType(P.begin(), P.end(), P.endOffset());
   // Do all uses operate on the same type?
-  if (CommonUseTy.first)
-    if (DL.getTypeAllocSize(CommonUseTy.first).getFixedValue() >= P.size()) {
+  if (CommonUseTy.first) {
+    TypeSize CommonUseSize = DL.getTypeAllocSize(CommonUseTy.first);
+    if (CommonUseSize.isFixed() && CommonUseSize.getFixedValue() >= P.size()) {
       SliceTy = CommonUseTy.first;
       SliceVecTy = dyn_cast<VectorType>(SliceTy);
     }
+  }
   // If not, can we find an appropriate subtype in the original allocated type?
   if (!SliceTy)
     if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
@@ -4874,12 +4957,12 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
 
   // If the common use types are not viable for promotion then attempt to find
   // another type that is viable.
-  if (SliceVecTy && !checkVectorTypeForPromotion(P, SliceVecTy, DL))
+  if (SliceVecTy && !checkVectorTypeForPromotion(P, SliceVecTy, DL, VScale))
     if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
                                                  P.beginOffset(), P.size())) {
       VectorType *TypePartitionVecTy = dyn_cast<VectorType>(TypePartitionTy);
       if (TypePartitionVecTy &&
-          checkVectorTypeForPromotion(P, TypePartitionVecTy, DL))
+          checkVectorTypeForPromotion(P, TypePartitionVecTy, DL, VScale))
         SliceTy = TypePartitionTy;
     }
 
@@ -4890,7 +4973,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
   bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL);
 
   VectorType *VecTy =
-      IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL);
+      IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL, VScale);
   if (VecTy)
     SliceTy = VecTy;
 
diff --git a/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll b/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll
new file mode 100644
index 0000000000000..85715e406e065
--- /dev/null
+++ b/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll
@@ -0,0 +1,349 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG
+; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+; This test checks that SROA runs mem2reg on scalable vectors.
+
+define <vscale x 16 x i1> @alloca_nxv16i1(<vscale x 16 x i1> %pg) vscale_range(1) {
+; CHECK-LABEL: @alloca_nxv16i1(
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[PG:%.*]]
+;
+  %pg.addr = alloca <vscale x 16 x i1>
+  store <vscale x 16 x i1> %pg, ptr %pg.addr
+  %1 = load <vscale x 16 x i1>, ptr %pg.addr
+  ret <vscale x 16 x i1> %1
+}
+
+define <vscale x 16 x i8> @alloca_nxv16i8(<vscale x 16 x i8> %vec) vscale_range(1) {
+; CHECK-LABEL: @alloca_nxv16i8(
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[VEC:%.*]]
+;
+  %vec.addr = alloca <vscale x 16 x i8>
+  store <vscale x 16 x i8> %vec, ptr %vec.addr
+  %1 = load <vscale x 16 x i8>, ptr %vec.addr
+  ret <vscale x 16 x i8> %1
+}
+
+; Test scalable alloca that can't be promoted. Mem2Reg only considers
+; non-volatile loads and stores for promotion.
+define <vscale x 16 x i8> @unpromotable_alloca(<vscale x 16 x i8> %vec) vscale_range(1) {
+; CHECK-LABEL: @unpromotable_alloca(
+; CHECK-NEXT:    [[VEC_ADDR:%.*]] = alloca <vscale x 16 x i8>, align 16
+; CHECK-NEXT:    store volatile <vscale x 16 x i8> [[VEC:%.*]], ptr [[VEC_ADDR]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = load volatile <vscale x 16 x i8>, ptr [[VEC_ADDR]], align 16
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
+;
+  %vec.addr = alloca <vscale x 16 x i8>
+  store volatile <vscale x 16 x i8> %vec, ptr %vec.addr
+  %1 = load volatile <vscale x 16 x i8>, ptr %vec.addr
+  ret <vscale x 16 x i8> %1
+}
+
+; Test we bail out when using an alloca of a fixed-length vector (VLS) that was
+; bitcasted to a scalable vector.
+define <vscale x 4 x i32> @cast_alloca_to_svint32_t(<vscale x 4 x i32> %type.coerce) vscale_range(1) {
+; CHECK-LABEL: @cast_alloca_to_svint32_t(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[TYPE_COERCE:%.*]], i64 0)
+; CHECK-NEXT:    [[TYPE_0_VEC_EXPAND:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TYPE_0_VECBLEND:%.*]] = select <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i32> [[TYPE_0_VEC_EXPAND]], <16 x i32> undef
+; CHECK-NEXT:    [[TYPE_ADDR_0_VEC_EXTRACT:%.*]] = shufflevector <16 x i32> [[TYPE_0_VECBLEND]], <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> [[TYPE_ADDR_0_VEC_EXTRACT]], i64 0)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
+;
+  %type = alloca <16 x i32>
+  %type.addr = alloca <16 x i32>
+  store <vscale x 4 x i32> %type.coerce, ptr %type
+  %type1 = load <16 x i32>, ptr %type
+  store <16 x i32> %type1, ptr %type.addr
+  %1 = load <16 x i32>, ptr %type.addr
+  %2 = load <vscale x 4 x i32>, ptr %type.addr
+  ret <vscale x 4 x i32> %2
+}
+
+; When casting from VLA to VLS via memory check we bail out when producing a
+; GEP where the element type is a scalable vector.
+define <vscale x 4 x i32> @cast_alloca_from_svint32_t() vscale_range(1) {
+; CHECK-LABEL: @cast_alloca_from_svint32_t(
+; CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 4 x i32>, align 16
+; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr [[RETVAL_COERCE]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = load <vscale x 4 x i32>, ptr [[RETVAL_COERCE]], align 16
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+;
+  %retval = alloca <16 x i32>
+  store <16 x i32> zeroinitializer, ptr %retval
+  %retval.coerce = alloca <vscale x 4 x i32>
+  call void @llvm.memcpy.p0.p0.i64(ptr align 16 %retval.coerce, ptr align 16 %retval, i64 64, i1 false)
+  %1 = load <vscale x 4 x i32>, ptr %retval.coerce
+  ret <vscale x 4 x i32> %1
+}
+
+; Test we bail out when using an alloca of a fixed-length vector (VLS) that was
+; bitcasted to a scalable vector.
+define void @select_load_alloca_to_svdouble_t() vscale_range(1) {
+; CHECK-LABEL: @select_load_alloca_to_svdouble_t(
+; CHECK-NEXT:    [[Z:%.*]] = alloca <16 x half>, align 32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 0, 0
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], ptr [[Z]], ptr null
+; CHECK-NEXT:    [[VAL:%.*]] = load <vscale x 2 x double>, ptr [[COND]], align 16
+; CHECK-NEXT:    ret void
+;
+  %z = alloca <16 x half>
+  %cmp = icmp eq i32 0, 0
+  %cond = select i1 %cmp, ptr %z, ptr null
+  %val = load <vscale x 2 x double>, ptr %cond, align 16
+  ret void
+}
+
+define void @select_store_alloca_to_svdouble_t(<vscale x 2 x double> %val) vscale_range(1) {
+; CHECK-LABEL: @select_store_alloca_to_svdouble_t(
+; CHECK-NEXT:    [[Z:%.*]] = alloca <16 x half>, align 32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 0, 0
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], ptr [[Z]], ptr null
+; CHECK-NEXT:    store <vscale x 2 x double> [[VAL:%.*]], ptr [[COND]], align 16
+; CHECK-NEXT:    ret void
+;
+  %z = alloca <16 x half>
+  %cmp = icmp eq i32 0, 0
+  %cond = select i1 %cmp, ptr %z, ptr null
+  store <vscale x 2 x double> %val, ptr %cond, align 16
+  ret void
+}
+
+define <4 x i32> @fixed_alloca_fixed_from_scalable(<vscale x 4 x i32> %a) vscale_range(1) {
+; CHECK-LABEL: @fixed_alloca_fixed_from_scalable(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[A:%.*]], i64 0)
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %tmp = alloca <4 x i32>
+  store <vscale x 4 x i32> %a, ptr %tmp
+  %cast = load <4 x i32>, ptr %tmp
+  ret <4 x i32> %cast
+}
+
+define <2 x i8> @fixed_alloca_fixed_from_scalable_requires_bitcast(<vscale x 16 x i1> %a) vscale_range(1) {
+; CHECK-LABEL: @fixed_alloca_fixed_from_scalable_requires_bitcast(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 16 x i1> [[A:%.*]] to <vscale x 2 x i8>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> [[TMP1]], i64 0)
+; CHECK-NEXT:    ret <2 x i8> [[TMP2]]
+;
+  %tmp = alloca <2 x i8>
+  store <vscale x 16 x i1> %a, ptr %tmp
+  %cast = load <2 x i8>, ptr %tmp
+  ret <2 x i8> %cast
+}
+
+define <2 x ptr> @fixed_alloca_fixed_from_scalable_inttoptr(<vscale x 4 x i32> %a) vscale_range(1) {
+; CHECK-LABEL: @fixed_alloca_fixed_from_scalable_inttoptr(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 4 x i32> [[A:%.*]] to <vscale x 2 x i64>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> [[TMP1]], i64 0)
+; CHECK-NEXT:    [[TMP2:%.*]] = inttoptr <2 x i64> [[TMP3]] to <2 x ptr>
+; CHECK-NEXT:    ret <2 x ptr> [[TMP2]]
+;
+  %tmp = alloca <4 x i32>
+  store <vscale x 4 x i32> %a, ptr %tmp
+  %cast = load <2 x ptr>, ptr %tmp
+  ret <2 x ptr> %cast
+}
+
+define <4 x i32> @fixed_alloca_fixed_from_scalable_ptrtoint(<vscale x 2 x ptr> %a) vscale_range(1) {
+; CHECK-LABEL: @fixed_alloca_fixed_from_scalable_ptrtoint(
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint <vscale x 2 x ptr> [[A:%.*]] to <vscale x 2 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i64> [[TMP1]] to <vscale x 4 x i32>
+; CHECK-NEXT:    [[TMP_0_CAST:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[TMP2]], i64 0)
+; CHECK-NEXT:    ret <4 x i32> [[TMP_0_CAST]]
+;
+  %tmp = alloca <4 x i32>
+  store <vscale x 2 x ptr> %a, ptr %tmp
+  %cast = load <4 x i32>, ptr %tmp
+  ret <4 x i32> %cast
+}
+
+define <2 x ptr> @fixed_alloca_fixed_from_scalable_ptrtoptr(<vscale x 2 x ptr> %a) vscale_range(1) {
+; CHECK-LABEL: @fixed_alloca_fixed_from_scalable_ptrtoptr(
+; CHECK-NEXT:    [[TMP_0_CAST:%.*]] = call <2 x ptr> @llvm.vector.extract.v2p0.nxv2p0(<vscale x 2 x ptr> [[A:%.*]], i64 0)
+; CHECK-NEXT:    ret <2 x ptr> [[TMP_0_CAST]]
+;
+  %tmp = alloca <2 x ptr>
+  store <vscale x 2 x ptr> %a, ptr %tmp
+  %cast = load <2 x ptr>, ptr %tmp
+  ret <2 x ptr> %cast
+}
+
+define <2 x ptr> @fixed_alloca_fixed_from_scalable_ptrtoptr_different_addrspace(<vscale x 2 x ptr addrspace(1)> %a) vscale_range(1) {
+; CHECK-LABEL: @fixed_alloca_fixed_from_scalable_ptrtoptr_different_addrspace(
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint <vscale x 2 x ptr addrspace(1)> [[A:%.*]] to <vscale x 2 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> [[TMP1]], i64 0)
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr <2 x i64> [[TMP2]] to <2 x ptr>
+; CHECK-NEXT:    ret <2 x ptr> [[TMP3]]
+;
+  %tmp = alloca <2 x ptr>
+  store <vscale x 2 x ptr addrspace(1)> %a, ptr %tmp
+  %cast = load <2 x ptr>, ptr %tmp
+  ret <2 x ptr> %cast
+}
+
+define <vscale x 4 x i32> @fixed_alloca_scalable_from_fixed(<4 x i32> %a) vscale_range(1) {
+; CHECK-LABEL: @fixed_alloca_scalable_from_fixed(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> [[A:%.*]], i64 0)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+;
+  %tmp = alloca <4 x i32>
+  store <4 x i32> %a, ptr %tmp
+  %cast = load <vscale x 4 x i32>, ptr %tmp
+  ret <vscale x 4 x i32> %cast
+}
+
+define <vscale x 16 x i1> @fixed_alloca_scalable_from_fixed_requires_bitcast(<2 x i8> %a) vscale_range(1) {
+; CHECK-LABEL: @fixed_alloca_scalable_from_fixed_requires_bitcast(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> poison, <2 x i8> [[A:%.*]], i64 0)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[TMP1]] to <vscale x 16 x i1>
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+;
+  %tmp = alloca <2 x i8>
+  store <2 x i8> %a, ptr %tmp
+  %cast = load <vscale x 16 x i1>, ptr %tmp
+  ret <vscale x 16 x i1> %cast
+}
+
+define <vscale x 2 x ptr> @fixed_alloca_scalable_from_fixed_inttoptr(<4 x i32> %a) vscale_range(1) {
+; CHECK-LABEL: @fixed_alloca_scalable_from_fixed_inttoptr(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> [[A:%.*]], i64 0)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 4 x i32> [[TMP1]] to <vscale x 2 x i64>
+; CHECK-NEXT:    [[TMP_0_CAST:%.*]] = inttoptr <vscale x 2 x i64> [[TMP2]] to <vscale x 2 x ptr>
+; CHECK-NEXT:    ret <vscale x 2 x ptr> [[TMP_0_CAST]]
+;
+  %tmp = alloca <4 x i32>
+  store <4 x i32> %a, ptr %tmp
+  %cast = load <vscale x 2 x ptr>, ptr %tmp
+  ret <vscale x 2 x ptr> %cast
+}
+
+define <vscale x 4 x i32> @fixed_alloca_scalable_from_fixed_ptrtoint(<2 x ptr> %a) vscale_range(1) {
+; CHECK-LABEL: @fixed_alloca_scalable_from_fixed_ptrtoint(
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint <2 x ptr> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> [[TMP1]], i64 0)
+; CHECK-NEXT:    [[TMP_0_CAST:%.*]] = bitcast <vscale x 2 x i64> [[TMP2]] to <vscale x 4 x i32>
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP_0_CAST]]
+;
+  %tmp = alloca <4 x i32>
+  store <2 x ptr> %a, ptr %tmp
+  %cast = load <vscale x 4 x i32>, ptr %tmp
+  ret <vscale x 4 x i32> %cast
+}
+
+define <vscale x 2 x ptr> @fixed_alloca_scalable_from_fixed_ptrtoptr(<2 x ptr> %a) vscale_range(1) {
+; CHECK-LABEL: @fixed_alloca_scalable_from_fixed_ptrtoptr(
+; CHECK-NEXT:    [[TMP_0_CAST:%.*]] = call <vscale x 2 x ptr> @llvm.vector.insert.nxv2p0.v2p0(<vscale x 2 x ptr> poison, <2 x ptr> [[A:%.*]], i64 0)
+; CHECK-NEXT:    ret <vscale x 2 x ptr> [[TMP_0_CAST]]
+;
+  %tmp = alloca <2 x ptr>
+  store <2 x ptr> %a, ptr %tmp
+  %cast = load <vscale x 2 x ptr>, ptr %tmp
+  ret <vscale x 2 x ptr> %cast
+}
+
+define <vscale x 2 x ptr addrspace(1)> @fixed_alloca_scalable_from_fixed_ptrtoptr_different_addrspace(<2 x ptr> %a) vscale_range(1) {
+; CHECK-LABEL: @fixed_alloca_scalable_from_fixed_ptrtoptr_different_addrspace(
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint <2 x ptr> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> [[TMP1]], i64 0)
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr <vscale x 2 x i64> [[TMP2]] to <vscale x 2 x ptr addrspace(1)>
+; CHECK-NEXT:    ret <vscale x 2 x ptr addrspace(1)> [[TMP3]]
+;
+  %tmp = alloca <2 x ptr>
+  store <2 x ptr> %a, ptr %tmp
+  %cast = load <vscale x 2 x ptr addrspace(1)>, ptr %tmp
+  ret <vscale x 2 x ptr addrspace(1)> %cast
+}
+
+define <4 x i32> @scalable_alloca_fixed_from_scalable(<vscale x 4 x i32> %a) vscale_range(1) {
+; CHECK-LABEL: @scalable_alloca_fixed_from_scalable(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <vscale x 4 x i32>, align 16
+; CHECK-NEXT:    store <vscale x 4 x i32> [[A:%.*]], ptr [[TMP]], align 16
+; CHECK-NEXT:    [[CAST:%.*]] = load <4 x i32>, ptr [[TMP]], align 16
+; CHECK-NEXT:    ret <4 x i32> [[CAST]]
+;
+  %tmp = alloca <vscale x 4 x i32>
+  store <vscale x 4 x i32> %a, ptr %tmp
+  %cast = load <4 x i32>, ptr %tmp
+  ret <4 x i32> %cast
+}
+
+define <vscale x 4 x i32> @scalable_alloca_scalable_from_fixed(<4 x i32> %a) vscale_range(1) {
+; CHECK-LABEL: @scalable_alloca_scalable_from_fixed(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <vscale x 4 x i32>, align 16
+; CHECK-NEXT:    store <4 x i32> [[A:%.*]], ptr [[TMP]], align 16
+; CHECK-NEXT:    [[CAST:%.*]] = load <vscale x 4 x i32>, ptr [[TMP]], align 16
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST]]
+;
+  %tmp = alloca <vscale x 4 x i32>
+  store <4 x i32> %a, ptr %tmp
+  %cast = load <vscale x 4 x i32>, ptr %tmp
+  ret <vscale x 4 x i32> %cast
+}
+
+define i16 @scalar_alloca_scalar_from_scalable(<vscale x 16 x i1> %a) vscale_range(1) {
+; CHECK-LABEL: @scalar_alloca_scalar_from_scalable(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    store <vscale x 16 x i1> [[A:%.*]], ptr [[TMP]], align 2
+; CHECK-NEXT:    [[TMP_0_CAST:%.*]] = load i16, ptr [[TMP]], align 2
+; CHECK-NEXT:    ret i16 [[TMP_0_CAST]]
+;
+  %tmp = alloca i16
+  store <vscale x 16 x i1> %a, ptr %tmp
+  %cast = load i16, ptr %tmp
+  ret i16 %cast
+}
+
+define <vscale x 16 x i1> @scalar_alloca_scalable_from_scalar(i16 %a) vscale_range(1) {
+; CHECK-LABEL: @scalar_alloca_scalable_from_scalar(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    store i16 [[A:%.*]], ptr [[TMP]], align 2
+; CHECK-NEXT:    [[TMP_0_CAST:%.*]] = load <vscale x 16 x i1>, ptr [[TMP]], align 2
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP_0_CAST]]
+;
+  %tmp = alloca i16
+  store i16 %a, ptr %tmp
+  %cast = load <vscale x 16 x i1>, ptr %tmp
+  ret <vscale x 16 x i1> %cast
+}
+
+define { <2 x i32>, <2 x i32> } @fixed_struct_alloca_fixed_from_scalable(<vscale x 4 x i32> %a) vscale_range(1) {
+; CHECK-LABEL: @fixed_struct_alloca_fixed_from_scalable(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca { <2 x i32>, <2 x i32> }, align 8
+; CHECK-NEXT:    store <vscale x 4 x i32> [[A:%.*]], ptr [[TMP]], align 8
+; CHECK-NEXT:    [[TMP_0_CAST_FCA_0_LOAD:%.*]] = load <2 x i32>, ptr [[TMP]], align 8
+; CHECK-NEXT:    [[CAST_FCA_0_INSERT:%.*]] = insertvalue { <2 x i32>, <2 x i32> } poison, <2 x i32> [[TMP_0_CAST_FCA_0_LOAD]], 0
+; CHECK-NEXT:    [[TMP_8_CAST_FCA_1_GEP_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 8
+; CHECK-NEXT:    [[TMP_8_CAST_FCA_1_LOAD:%.*]] = load <2 x i32>, ptr [[TMP_8_CAST_FCA_1_GEP_SROA_IDX]], align 8
+; CHECK-NEXT:    [[CAST_FCA_1_INSERT:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[CAST_FCA_0_INSERT]], <2 x i32> [[TMP_8_CAST_FCA_1_LOAD]], 1
+; CHECK-NEXT:    ret { <2 x i32>, <2 x i32> } [[CAST_FCA_1_INSERT]]
+;
+  %tmp = alloca { <2 x i32>, <2 x i32> }
+  store <vscale x 4 x i32> %a, ptr %tmp
+  %cast = load { <2 x i32>, <2 x i32> }, ptr %tmp
+  ret { <2 x i32>, <2 x i32> } %cast
+}
+
+define <vscale x 4 x i64> @fixed_struct_alloca_scalable_from_fixed({ <2 x ptr>, <2 x ptr> } %a) vscale_range(1) {
+; CHECK-LABEL: @fixed_struct_alloca_scalable_from_fixed(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca { <2 x ptr>, <2 x ptr> }, align 16
+; CHECK-NEXT:    [[A_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[A:%.*]], 0
+; CHECK-NEXT:    store <2 x ptr> [[A_FCA_0_EXTRACT]], ptr [[TMP]], align 16
+; CHECK-NEXT:    [[A_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[A]], 1
+; CHECK-NEXT:    [[TMP_16_A_FCA_1_GEP_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 16
+; CHECK-NEXT:    store <2 x ptr> [[A_FCA_1_EXTRACT]], ptr [[TMP_16_A_FCA_1_GEP_SROA_IDX]], align 16
+; CHECK-NEXT:    [[TMP_0_CAST:%.*]] = load <vscale x 4 x i64>, ptr [[TMP]], align 16
+; CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP_0_CAST]]
+;
+  %tmp = alloca { <2 x ptr>, <2 x ptr> }
+  store { <2 x ptr>, <2 x ptr> } %a, ptr %tmp
+  %cast = load <vscale x 4 x i64>, ptr %tmp
+  ret <vscale x 4 x i64> %cast
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-MODIFY-CFG: {{.*}}
+; CHECK-PRESERVE-CFG: {{.*}}
diff --git a/llvm/test/Transforms/SROA/scalable-vectors.ll b/llvm/test/Transforms/SROA/scalable-vectors.ll
index d892883ce9dc3..346814d9f630e 100644
--- a/llvm/test/Transforms/SROA/scalable-vectors.ll
+++ b/llvm/test/Transforms/SROA/scalable-vectors.ll
@@ -2,6 +2,8 @@
 ; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG
 ; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG
 
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
 ; This test checks that SROA runs mem2reg on scalable vectors.
 
 define <vscale x 16 x i1> @alloca_nxv16i1(<vscale x 16 x i1> %pg) {
@@ -67,11 +69,12 @@ define <vscale x 4 x i32> @cast_alloca_to_svint32_t(<vscale x 4 x i32> %type.coe
 define <vscale x 4 x i32> @cast_alloca_from_svint32_t() {
 ; CHECK-LABEL: @cast_alloca_from_svint32_t(
 ; CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 4 x i32>, align 16
-; CHECK-NEXT:    store <16 x i32> undef, ptr [[RETVAL_COERCE]], align 16
+; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr [[RETVAL_COERCE]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <vscale x 4 x i32>, ptr [[RETVAL_COERCE]], align 16
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 ;
   %retval = alloca <16 x i32>
+  store <16 x i32> zeroinitializer, ptr %retval
   %retval.coerce = alloca <vscale x 4 x i32>
   call void @llvm.memcpy.p0.p0.i64(ptr align 16 %retval.coerce, ptr align 16 %retval, i64 64, i1 false)
   %1 = load <vscale x 4 x i32>, ptr %retval.coerce
@@ -110,6 +113,224 @@ define void @select_store_alloca_to_svdouble_t(<vscale x 2 x double> %val) {
   ret void
 }
 
+define <4 x i32> @fixed_alloca_fixed_from_scalable(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: @fixed_alloca_fixed_from_scalable(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <4 x i32>, align 16
+; CHECK-NEXT:    store <vscale x 4 x i32> [[A:%.*]], ptr [[TMP]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[TMP]], align 16
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %tmp = alloca <4 x i32>
+  store <vscale x 4 x i32> %a, ptr %tmp
+  %cast = load <4 x i32>, ptr %tmp
+  ret <4 x i32> %cast
+}
+
+define <2 x i8> @fixed_alloca_fixed_from_scalable_requires_bitcast(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: @fixed_alloca_fixed_from_scalable_requires_bitcast(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <2 x i8>, align 2
+; CHECK-NEXT:    store <vscale x 16 x i1> [[A:%.*]], ptr [[TMP]], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i8>, ptr [[TMP]], align 2
+; CHECK-NEXT:    ret <2 x i8> [[TMP2]]
+;
+  %tmp = alloca <2 x i8>
+  store <vscale x 16 x i1> %a, ptr %tmp
+  %cast = load <2 x i8>, ptr %tmp
+  ret <2 x i8> %cast
+}
+
+define <2 x ptr> @fixed_alloca_fixed_from_scalable_inttoptr(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: @fixed_alloca_fixed_from_scalable_inttoptr(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <4 x i32>, align 16
+; CHECK-NEXT:    store <vscale x 4 x i32> [[A:%.*]], ptr [[TMP]], align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x ptr>, ptr [[TMP]], align 16
+; CHECK-NEXT:    ret <2 x ptr> [[TMP2]]
+;
+  %tmp = alloca <4 x i32>
+  store <vscale x 4 x i32> %a, ptr %tmp
+  %cast = load <2 x ptr>, ptr %tmp
+  ret <2 x ptr> %cast
+}
+
+define <4 x i32> @fixed_alloca_fixed_from_scalable_ptrtoint(<vscale x 2 x ptr> %a) {
+; CHECK-LABEL: @fixed_alloca_fixed_from_scalable_ptrtoint(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <4 x i32>, align 16
+; CHECK-NEXT:    store <vscale x 2 x ptr> [[A:%.*]], ptr [[TMP]], align 16
+; CHECK-NEXT:    [[TMP_0_CAST:%.*]] = load <4 x i32>, ptr [[TMP]], align 16
+; CHECK-NEXT:    ret <4 x i32> [[TMP_0_CAST]]
+;
+  %tmp = alloca <4 x i32>
+  store <vscale x 2 x ptr> %a, ptr %tmp
+  %cast = load <4 x i32>, ptr %tmp
+  ret <4 x i32> %cast
+}
+
+define <2 x ptr> @fixed_alloca_fixed_from_scalable_ptrtoptr(<vscale x 2 x ptr> %a) {
+; CHECK-LABEL: @fixed_alloca_fixed_from_scalable_ptrtoptr(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <2 x ptr>, align 16
+; CHECK-NEXT:    store <vscale x 2 x ptr> [[A:%.*]], ptr [[TMP]], align 16
+; CHECK-NEXT:    [[CAST:%.*]] = load <2 x ptr>, ptr [[TMP]], align 16
+; CHECK-NEXT:    ret <2 x ptr> [[CAST]]
+;
+  %tmp = alloca <2 x ptr>
+  store <vscale x 2 x ptr> %a, ptr %tmp
+  %cast = load <2 x ptr>, ptr %tmp
+  ret <2 x ptr> %cast
+}
+
+define <vscale x 4 x i32> @fixed_alloca_scalable_from_fixed(<4 x i32> %a) {
+; CHECK-LABEL: @fixed_alloca_scalable_from_fixed(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <4 x i32>, align 16
+; CHECK-NEXT:    store <4 x i32> [[A:%.*]], ptr [[TMP]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP]], align 16
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+;
+  %tmp = alloca <4 x i32>
+  store <4 x i32> %a, ptr %tmp
+  %cast = load <vscale x 4 x i32>, ptr %tmp
+  ret <vscale x 4 x i32> %cast
+}
+
+define <vscale x 16 x i1> @fixed_alloca_scalable_from_fixed_requires_bitcast(<2 x i8> %a) {
+; CHECK-LABEL: @fixed_alloca_scalable_from_fixed_requires_bitcast(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <2 x i8>, align 2
+; CHECK-NEXT:    store <2 x i8> [[A:%.*]], ptr [[TMP]], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr [[TMP]], align 2
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+;
+  %tmp = alloca <2 x i8>
+  store <2 x i8> %a, ptr %tmp
+  %cast = load <vscale x 16 x i1>, ptr %tmp
+  ret <vscale x 16 x i1> %cast
+}
+
+define <vscale x 2 x ptr> @fixed_alloca_scalable_from_fixed_inttoptr(<4 x i32> %a) {
+; CHECK-LABEL: @fixed_alloca_scalable_from_fixed_inttoptr(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <4 x i32>, align 16
+; CHECK-NEXT:    store <4 x i32> [[A:%.*]], ptr [[TMP]], align 16
+; CHECK-NEXT:    [[TMP_0_CAST:%.*]] = load <vscale x 2 x ptr>, ptr [[TMP]], align 16
+; CHECK-NEXT:    ret <vscale x 2 x ptr> [[TMP_0_CAST]]
+;
+  %tmp = alloca <4 x i32>
+  store <4 x i32> %a, ptr %tmp
+  %cast = load <vscale x 2 x ptr>, ptr %tmp
+  ret <vscale x 2 x ptr> %cast
+}
+
+define <vscale x 4 x i32> @fixed_alloca_scalable_from_fixed_ptrtoint(<2 x ptr> %a) {
+; CHECK-LABEL: @fixed_alloca_scalable_from_fixed_ptrtoint(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <4 x i32>, align 16
+; CHECK-NEXT:    store <2 x ptr> [[A:%.*]], ptr [[TMP]], align 16
+; CHECK-NEXT:    [[TMP_0_CAST:%.*]] = load <vscale x 4 x i32>, ptr [[TMP]], align 16
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP_0_CAST]]
+;
+  %tmp = alloca <4 x i32>
+  store <2 x ptr> %a, ptr %tmp
+  %cast = load <vscale x 4 x i32>, ptr %tmp
+  ret <vscale x 4 x i32> %cast
+}
+
+define <vscale x 2 x ptr> @fixed_alloca_scalable_from_fixed_ptrtoptr(<2 x ptr> %a) {
+; CHECK-LABEL: @fixed_alloca_scalable_from_fixed_ptrtoptr(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <2 x ptr>, align 16
+; CHECK-NEXT:    store <2 x ptr> [[A:%.*]], ptr [[TMP]], align 16
+; CHECK-NEXT:    [[CAST:%.*]] = load <vscale x 2 x ptr>, ptr [[TMP]], align 16
+; CHECK-NEXT:    ret <vscale x 2 x ptr> [[CAST]]
+;
+  %tmp = alloca <2 x ptr>
+  store <2 x ptr> %a, ptr %tmp
+  %cast = load <vscale x 2 x ptr>, ptr %tmp
+  ret <vscale x 2 x ptr> %cast
+}
+
+define <4 x i32> @scalable_alloca_fixed_from_scalable(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: @scalable_alloca_fixed_from_scalable(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <vscale x 4 x i32>, align 16
+; CHECK-NEXT:    store <vscale x 4 x i32> [[A:%.*]], ptr [[TMP]], align 16
+; CHECK-NEXT:    [[CAST:%.*]] = load <4 x i32>, ptr [[TMP]], align 16
+; CHECK-NEXT:    ret <4 x i32> [[CAST]]
+;
+  %tmp = alloca <vscale x 4 x i32>
+  store <vscale x 4 x i32> %a, ptr %tmp
+  %cast = load <4 x i32>, ptr %tmp
+  ret <4 x i32> %cast
+}
+
+define <vscale x 4 x i32> @scalable_alloca_scalable_from_fixed(<4 x i32> %a) {
+; CHECK-LABEL: @scalable_alloca_scalable_from_fixed(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <vscale x 4 x i32>, align 16
+; CHECK-NEXT:    store <4 x i32> [[A:%.*]], ptr [[TMP]], align 16
+; CHECK-NEXT:    [[CAST:%.*]] = load <vscale x 4 x i32>, ptr [[TMP]], align 16
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST]]
+;
+  %tmp = alloca <vscale x 4 x i32>
+  store <4 x i32> %a, ptr %tmp
+  %cast = load <vscale x 4 x i32>, ptr %tmp
+  ret <vscale x 4 x i32> %cast
+}
+
+define i16 @scalar_alloca_scalar_from_scalable(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: @scalar_alloca_scalar_from_scalable(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    store <vscale x 16 x i1> [[A:%.*]], ptr [[TMP]], align 2
+; CHECK-NEXT:    [[TMP_0_CAST:%.*]] = load i16, ptr [[TMP]], align 2
+; CHECK-NEXT:    ret i16 [[TMP_0_CAST]]
+;
+  %tmp = alloca i16
+  store <vscale x 16 x i1> %a, ptr %tmp
+  %cast = load i16, ptr %tmp
+  ret i16 %cast
+}
+
+define <vscale x 16 x i1> @scalar_alloca_scalable_from_scalar(i16 %a) {
+; CHECK-LABEL: @scalar_alloca_scalable_from_scalar(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    store i16 [[A:%.*]], ptr [[TMP]], align 2
+; CHECK-NEXT:    [[TMP_0_CAST:%.*]] = load <vscale x 16 x i1>, ptr [[TMP]], align 2
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP_0_CAST]]
+;
+  %tmp = alloca i16
+  store i16 %a, ptr %tmp
+  %cast = load <vscale x 16 x i1>, ptr %tmp
+  ret <vscale x 16 x i1> %cast
+}
+
+define { <2 x i32>, <2 x i32> } @fixed_struct_alloca_fixed_from_scalable(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: @fixed_struct_alloca_fixed_from_scalable(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca { <2 x i32>, <2 x i32> }, align 8
+; CHECK-NEXT:    store <vscale x 4 x i32> [[A:%.*]], ptr [[TMP]], align 16
+; CHECK-NEXT:    [[CAST_FCA_0_GEP:%.*]] = getelementptr inbounds { <2 x i32>, <2 x i32> }, ptr [[TMP]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP_0_CAST_FCA_0_LOAD:%.*]] = load <2 x i32>, ptr [[CAST_FCA_0_GEP]], align 8
+; CHECK-NEXT:    [[CAST_FCA_0_INSERT:%.*]] = insertvalue { <2 x i32>, <2 x i32> } poison, <2 x i32> [[TMP_0_CAST_FCA_0_LOAD]], 0
+; CHECK-NEXT:    [[TMP_8_CAST_FCA_1_GEP_SROA_IDX:%.*]] = getelementptr inbounds { <2 x i32>, <2 x i32> }, ptr [[TMP]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP_8_CAST_FCA_1_LOAD:%.*]] = load <2 x i32>, ptr [[TMP_8_CAST_FCA_1_GEP_SROA_IDX]], align 8
+; CHECK-NEXT:    [[CAST_FCA_1_INSERT:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[CAST_FCA_0_INSERT]], <2 x i32> [[TMP_8_CAST_FCA_1_LOAD]], 1
+; CHECK-NEXT:    ret { <2 x i32>, <2 x i32> } [[CAST_FCA_1_INSERT]]
+;
+  %tmp = alloca { <2 x i32>, <2 x i32> }
+  store <vscale x 4 x i32> %a, ptr %tmp
+  %cast = load { <2 x i32>, <2 x i32> }, ptr %tmp
+  ret { <2 x i32>, <2 x i32> } %cast
+}
+
+define <vscale x 4 x i64> @fixed_struct_alloca_scalable_from_fixed({ <2 x ptr>, <2 x ptr> } %a) {
+; CHECK-LABEL: @fixed_struct_alloca_scalable_from_fixed(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca { <2 x ptr>, <2 x ptr> }, align 16
+; CHECK-NEXT:    [[A_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[A:%.*]], 0
+; CHECK-NEXT:    [[A_FCA_0_GEP:%.*]] = getelementptr inbounds { <2 x ptr>, <2 x ptr> }, ptr [[TMP]], i32 0, i32 0
+; CHECK-NEXT:    store <2 x ptr> [[A_FCA_0_EXTRACT]], ptr [[A_FCA_0_GEP]], align 16
+; CHECK-NEXT:    [[A_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[A]], 1
+; CHECK-NEXT:    [[TMP_16_A_FCA_1_GEP_SROA_IDX:%.*]] = getelementptr inbounds { <2 x ptr>, <2 x ptr> }, ptr [[TMP]], i32 0, i32 1
+; CHECK-NEXT:    store <2 x ptr> [[A_FCA_1_EXTRACT]], ptr [[TMP_16_A_FCA_1_GEP_SROA_IDX]], align 16
+; CHECK-NEXT:    [[TMP_0_CAST:%.*]] = load <vscale x 4 x i64>, ptr [[TMP]], align 32
+; CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP_0_CAST]]
+;
+  %tmp = alloca { <2 x ptr>, <2 x ptr> }
+  store { <2 x ptr>, <2 x ptr> } %a, ptr %tmp
+  %cast = load <vscale x 4 x i64>, ptr %tmp
+  ret <vscale x 4 x i64> %cast
+}
+
 declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK-MODIFY-CFG: {{.*}}