diff --git a/flang/include/flang/Optimizer/CodeGen/CGPasses.td b/flang/include/flang/Optimizer/CodeGen/CGPasses.td index 0d20a669a15a1..9798019bfd6a9 100644 --- a/flang/include/flang/Optimizer/CodeGen/CGPasses.td +++ b/flang/include/flang/Optimizer/CodeGen/CGPasses.td @@ -27,6 +27,8 @@ def FIRToLLVMLowering : Pass<"fir-to-llvm-ir", "mlir::ModuleOp"> { let options = [ Option<"forcedTargetTriple", "target", "std::string", /*default=*/"", "Override module's target triple.">, + Option<"forcedDataLayout", "datalayout", "std::string", /*default=*/"", + "Override module's data layout.">, Option<"applyTBAA", "apply-tbaa", "bool", /*default=*/"false", "Attach TBAA tags to memory accessing operations."> ]; diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index f2c731d47909a..eae79d2a74867 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -44,6 +44,7 @@ #include "mlir/IR/Matchers.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" +#include "mlir/Target/LLVMIR/Import.h" #include "mlir/Target/LLVMIR/ModuleTranslation.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/TypeSwitch.h" @@ -61,14 +62,40 @@ namespace fir { // TODO: This should really be recovered from the specified target. static constexpr unsigned defaultAlign = 8; +static constexpr unsigned defaultAddressSpace = 0u; /// `fir.box` attribute values as defined for CFI_attribute_t in /// flang/ISO_Fortran_binding.h. static constexpr unsigned kAttrPointer = CFI_attribute_pointer; static constexpr unsigned kAttrAllocatable = CFI_attribute_allocatable; -static inline mlir::Type getLlvmPtrType(mlir::MLIRContext *context) { - return mlir::LLVM::LLVMPointerType::get(context); +static inline unsigned +getAllocaAddressSpace(mlir::ConversionPatternRewriter &rewriter) { + mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp(); + assert(parentOp != nullptr && + "expected insertion block to have parent operation"); + if (auto module = parentOp->getParentOfType()) + if (mlir::Attribute addrSpace = + mlir::DataLayout(module).getAllocaMemorySpace()) + return llvm::cast(addrSpace).getUInt(); + return defaultAddressSpace; +} + +static inline unsigned +getProgramAddressSpace(mlir::ConversionPatternRewriter &rewriter) { + mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp(); + assert(parentOp != nullptr && + "expected insertion block to have parent operation"); + if (auto module = parentOp->getParentOfType()) + if (mlir::Attribute addrSpace = + mlir::DataLayout(module).getProgramMemorySpace()) + return llvm::cast(addrSpace).getUInt(); + return defaultAddressSpace; +} + +static inline mlir::Type getLlvmPtrType(mlir::MLIRContext *context, + unsigned addressSpace = 0) { + return mlir::LLVM::LLVMPointerType::get(context, addressSpace); } static inline mlir::Type getI8Type(mlir::MLIRContext *context) { @@ -368,19 +395,37 @@ class FIROpConversion : public mlir::ConvertOpToLLVMPattern { return getBlockForAllocaInsert(op->getParentOp()); } - // Generate an alloca of size 1 for an object of type \p llvmObjectTy. - mlir::LLVM::AllocaOp - genAllocaWithType(mlir::Location loc, mlir::Type llvmObjectTy, - unsigned alignment, - mlir::ConversionPatternRewriter &rewriter) const { + // Generate an alloca of size 1 for an object of type \p llvmObjectTy in the + // allocation address space provided for the architecture in the DataLayout + // specification. If the address space is different from the devices + // program address space we perform a cast. In the case of most architectures + // the program and allocation address space will be the default of 0 and no + // cast will be emitted. + mlir::Value genAllocaAndAddrCastWithType( + mlir::Location loc, mlir::Type llvmObjectTy, unsigned alignment, + mlir::ConversionPatternRewriter &rewriter) const { auto thisPt = rewriter.saveInsertionPoint(); mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp(); mlir::Block *insertBlock = getBlockForAllocaInsert(parentOp); rewriter.setInsertionPointToStart(insertBlock); auto size = genI32Constant(loc, rewriter, 1); - mlir::Type llvmPtrTy = ::getLlvmPtrType(llvmObjectTy.getContext()); - auto al = rewriter.create( - loc, llvmPtrTy, llvmObjectTy, size, alignment); + unsigned allocaAs = getAllocaAddressSpace(rewriter); + unsigned programAs = getProgramAddressSpace(rewriter); + + mlir::Value al = rewriter.create( + loc, ::getLlvmPtrType(llvmObjectTy.getContext(), allocaAs), + llvmObjectTy, size, alignment); + + // if our allocation address space, is not the same as the program address + // space, then we must emit a cast to the program address space before use. + // An example case would be on AMDGPU, where the allocation address space is + // the numeric value 5 (private), and the program address space is 0 + // (generic). + if (allocaAs != programAs) { + al = rewriter.create( + loc, ::getLlvmPtrType(llvmObjectTy.getContext(), programAs), al); + } + rewriter.restoreInsertionPoint(thisPt); return al; } @@ -532,20 +577,34 @@ struct AllocaOpConversion : public FIROpConversion { size = rewriter.create( loc, ity, size, integerCast(loc, rewriter, ity, operands[i])); } - mlir::Type llvmPtrTy = ::getLlvmPtrType(alloc.getContext()); + + unsigned allocaAs = getAllocaAddressSpace(rewriter); + unsigned programAs = getProgramAddressSpace(rewriter); + // NOTE: we used to pass alloc->getAttrs() in the builder for non opaque // pointers! Only propagate pinned and bindc_name to help debugging, but // this should have no functional purpose (and passing the operand segment // attribute like before is certainly bad). auto llvmAlloc = rewriter.create( - loc, llvmPtrTy, llvmObjectType, size); + loc, ::getLlvmPtrType(alloc.getContext(), allocaAs), llvmObjectType, + size); if (alloc.getPinned()) llvmAlloc->setDiscardableAttr(alloc.getPinnedAttrName(), alloc.getPinnedAttr()); if (alloc.getBindcName()) llvmAlloc->setDiscardableAttr(alloc.getBindcNameAttrName(), alloc.getBindcNameAttr()); - rewriter.replaceOp(alloc, llvmAlloc); + if (allocaAs == programAs) { + rewriter.replaceOp(alloc, llvmAlloc); + } else { + // if our allocation address space, is not the same as the program address + // space, then we must emit a cast to the program address space before + // use. An example case would be on AMDGPU, where the allocation address + // space is the numeric value 5 (private), and the program address space + // is 0 (generic). + rewriter.replaceOpWithNewOp( + alloc, ::getLlvmPtrType(alloc.getContext(), programAs), llvmAlloc); + } return mlir::success(); } }; @@ -1691,8 +1750,8 @@ struct EmboxCommonConversion : public FIROpConversion { if (isInGlobalOp(rewriter)) return boxValue; mlir::Type llvmBoxTy = boxValue.getType(); - auto alloca = - this->genAllocaWithType(loc, llvmBoxTy, defaultAlign, rewriter); + auto alloca = this->genAllocaAndAddrCastWithType(loc, llvmBoxTy, + defaultAlign, rewriter); auto storeOp = rewriter.create(loc, boxValue, alloca); this->attachTBAATag(storeOp, boxTy, boxTy, nullptr); return alloca; @@ -3110,11 +3169,11 @@ struct LoadOpConversion : public FIROpConversion { else attachTBAATag(boxValue, boxTy, boxTy, nullptr); auto newBoxStorage = - genAllocaWithType(loc, llvmLoadTy, defaultAlign, rewriter); + genAllocaAndAddrCastWithType(loc, llvmLoadTy, defaultAlign, rewriter); auto storeOp = rewriter.create(loc, boxValue, newBoxStorage); attachTBAATag(storeOp, boxTy, boxTy, nullptr); - rewriter.replaceOp(load, newBoxStorage.getResult()); + rewriter.replaceOp(load, newBoxStorage); } else { auto loadOp = rewriter.create( load.getLoc(), llvmLoadTy, adaptor.getOperands(), load->getAttrs()); @@ -3808,6 +3867,11 @@ class FIRToLLVMLowering if (!forcedTargetTriple.empty()) fir::setTargetTriple(mod, forcedTargetTriple); + if (!forcedDataLayout.empty()) { + llvm::DataLayout dl(forcedDataLayout); + fir::support::setMLIRDataLayout(mod, dl); + } + // Run dynamic pass pipeline for converting Math dialect // operations into other dialects (llvm, func, etc.). // Some conversions of Math operations cannot be done diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir index be82ffab7e33e..21323a5e657c9 100644 --- a/flang/test/Fir/convert-to-llvm.fir +++ b/flang/test/Fir/convert-to-llvm.fir @@ -1,13 +1,14 @@ -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=i386-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=powerpc64le-unknown-linux-gn" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-pc-win32" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT - -//============================================================================= -// SUMMARY: Tests for FIR --> LLVM MLIR conversion independent of the target -//============================================================================= +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=i386-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=powerpc64le-unknown-linux-gn" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-pc-win32" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT,GENERIC +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-P0" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s + +//=================================================== +// SUMMARY: Tests for FIR --> LLVM MLIR conversion +//=================================================== // Test simple global LLVM conversion @@ -919,7 +920,9 @@ func.func @test_load_box(%addr : !fir.ref>>) { // CHECK-LABEL: llvm.func @test_load_box( // CHECK-SAME: %[[arg0:.*]]: !llvm.ptr) { // CHECK-NEXT: %[[c1:.*]] = llvm.mlir.constant(1 : i32) : i32 -// CHECK-NEXT: %[[box_copy:.*]] = llvm.alloca %[[c1]] x !llvm.struct<([[DESC_TYPE:.*]])> +// GENERIC-NEXT: %[[box_copy:.*]] = llvm.alloca %[[c1]] x !llvm.struct<([[DESC_TYPE:.*]])> +// AMDGPU-NEXT: %[[alloca_box_copy:.*]] = llvm.alloca %[[c1]] x !llvm.struct<([[DESC_TYPE:.*]])>{{.*}} : (i32) -> !llvm.ptr<5> +// AMDGPU-NEXT: %[[box_copy:.*]] = llvm.addrspacecast %[[alloca_box_copy]] : !llvm.ptr<5> to !llvm.ptr // CHECK-NEXT: %[[box_val:.*]] = llvm.load %[[arg0]] : !llvm.ptr -> !llvm.struct<([[DESC_TYPE]])> // CHECK-NEXT: llvm.store %[[box_val]], %[[box_copy]] : !llvm.struct<([[DESC_TYPE]])>, !llvm.ptr // CHECK-NEXT: llvm.call @takes_box(%[[box_copy]]) : (!llvm.ptr) -> () @@ -1064,9 +1067,12 @@ func.func @alloca_one() -> !fir.ref { // CHECK-LABEL: llvm.func @alloca_one() -> !llvm.ptr // CHECK: [[N:%.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: [[A:%.*]] = llvm.alloca [[N]] x i32 +// GENERIC: [[A:%.*]] = llvm.alloca [[N]] x i32 +// AMDGPU: [[AA:%.*]] = llvm.alloca [[N]] x i32 : (i64) -> !llvm.ptr<5> +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr<5> to !llvm.ptr // CHECK: llvm.return [[A]] : !llvm.ptr + // ----- // Test fir.alloca of several elements @@ -1081,7 +1087,9 @@ func.func @alloca_several() -> !fir.ref { // CHECK: [[N:%.*]] = llvm.mlir.constant(100 : index) : i64 // CHECK: [[ONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: [[TOTAL:%.*]] = llvm.mul [[ONE]], [[N]] : i64 -// CHECK: [[A:%.*]] = llvm.alloca [[TOTAL]] x i32 +// GENERIC: [[A:%.*]] = llvm.alloca [[TOTAL]] x i32 +// AMDGPU: [[AA:%.*]] = llvm.alloca [[TOTAL]] x i32 : (i64) -> !llvm.ptr<5> +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr<5> to !llvm.ptr // CHECK: llvm.return [[A]] : !llvm.ptr // ----- @@ -1095,7 +1103,9 @@ func.func @alloca_ptr_to_array() -> !fir.ref>> { // CHECK-LABEL: llvm.func @alloca_ptr_to_array() -> !llvm.ptr // CHECK: [[ONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: [[A:%.*]] = llvm.alloca [[ONE]] x !llvm.ptr +// GENERIC: [[A:%.*]] = llvm.alloca [[ONE]] x !llvm.ptr +// AMDGPU: [[AA:%.*]] = llvm.alloca [[ONE]] x !llvm.ptr : (i64) -> !llvm.ptr<5> +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr<5> to !llvm.ptr // CHECK: llvm.return [[A]] : !llvm.ptr // ----- @@ -1113,7 +1123,9 @@ func.func @alloca_char_array(%l: i32, %e : index) -> !fir.ref !llvm.ptr<5> +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr<5> to !llvm.ptr // CHECK: return [[A]] : !llvm.ptr // ----- @@ -1130,7 +1142,9 @@ func.func @alloca_fixed_char_array(%e : index) -> !fir.ref +// GENERIC: [[A:%.*]] = llvm.alloca [[PROD2]] x !llvm.array<8 x i8> +// AMDGPU: [[AA:%.*]] = llvm.alloca [[PROD2]] x !llvm.array<8 x i8> : (i64) -> !llvm.ptr<5> +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr<5> to !llvm.ptr // CHECK: return [[A]] : !llvm.ptr // ----- @@ -1154,7 +1168,9 @@ func.func @alloca_record(%arg0 : i32, %arg1 : i16) -> !fir.ref !llvm.ptr // CHECK: [[SIZE:%.*]] = llvm.call @_QTtP.mem.size([[ARG0]], [[ARG1]]) : (i32, i16) -> i64 -// CHECK: [[ALLOC:%.*]] = llvm.alloca [[SIZE]] x i8 +// GENERIC: [[ALLOC:%.*]] = llvm.alloca [[SIZE]] x i8 +// AMDGPU: [[A:%.*]] = llvm.alloca [[SIZE]] x i8 : (i64) -> !llvm.ptr<5> +// AMDGPU: [[ALLOC:%.*]] = llvm.addrspacecast [[A]] : !llvm.ptr<5> to !llvm.ptr // CHECK: llvm.return [[ALLOC]] : !llvm.ptr // ----- @@ -1173,7 +1189,9 @@ func.func @alloca_multidim_array(%0 : index) -> !fir.ref // CHECK: [[ONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: [[MUL1:%.*]] = llvm.mul [[ONE]], [[OP1]] : i64 // CHECK: [[TOTAL:%.*]] = llvm.mul [[MUL1]], [[OP2]] : i64 -// CHECK: [[A:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<32 x array<16 x array<8 x f32> +// GENERIC: [[A:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<32 x array<16 x array<8 x f32>>> +// AMDGPU: [[AA:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<32 x array<16 x array<8 x f32>>> : (i64) -> !llvm.ptr<5> +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr<5> to !llvm.ptr // CHECK: llvm.return [[A]] : !llvm.ptr // ----- @@ -1192,7 +1210,9 @@ func.func @alloca_const_interior_array(%0 : index) -> !fir.ref +// GENERIC: [[A:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<9 x array<8 x f32>> +// AMDGPU: [[AA:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<9 x array<8 x f32>> : (i64) -> !llvm.ptr<5> +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr<5> to !llvm.ptr // CHECK: llvm.return [[A]] : !llvm.ptr // ----- @@ -1212,7 +1232,9 @@ func.func @alloca_array_with_holes(%0 : index, %1 : index) -> !fir.ref +// GENERIC: [[RES:%.*]] = llvm.alloca [[PROD3]] x !llvm.array<4 x i32> +// AMDGPU: [[AA:%.*]] = llvm.alloca [[PROD3]] x !llvm.array<4 x i32> : (i64) -> !llvm.ptr<5> +// AMDGPU: [[RES:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr<5> to !llvm.ptr // CHECK: llvm.return [[RES]] : !llvm.ptr // ----- @@ -1551,7 +1573,9 @@ func.func @embox0(%arg0: !fir.ref>) { // CHECK-LABEL: func @embox0( // CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i32) : i32 -// CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[C1]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})> {alignment = 8 : i64} : (i32) -> !llvm.ptr +// GENERIC: %[[ALLOCA:.*]] = llvm.alloca %[[C1]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})> {alignment = 8 : i64} : (i32) -> !llvm.ptr +// AMDGPU: %[[AA:.*]] = llvm.alloca %[[C1]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5> +// AMDGPU: %[[ALLOCA:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr<5> to !llvm.ptr // CHECK: %[[TYPE_CODE:.*]] = llvm.mlir.constant(9 : i32) : i32 // CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] @@ -1694,7 +1718,7 @@ func.func @embox1(%arg0: !fir.ref>) { // CHECK: %{{.*}} = llvm.insertvalue %[[TYPE_CODE_I8]], %{{.*}}[4] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> // CHECK: %[[F18ADDENDUM:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[F18ADDENDUM_I8:.*]] = llvm.trunc %[[F18ADDENDUM]] : i32 to i8 -// CHECK: %{{.*}} = llvm.insertvalue %[[F18ADDENDUM_I8]], %17[6] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> +// CHECK: %{{.*}} = llvm.insertvalue %[[F18ADDENDUM_I8]], %{{.*}}[6] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> // CHECK: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr // CHECK: %{{.*}} = llvm.insertvalue %[[TDESC]], %{{.*}}[7] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> @@ -1752,7 +1776,9 @@ func.func @no_reassoc(%arg0: !fir.ref) { // CHECK-LABEL: llvm.func @no_reassoc( // CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr) { // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[ALLOC:.*]] = llvm.alloca %[[C1]] x i32 : (i64) -> !llvm.ptr +// GENERIC: %[[ALLOC:.*]] = llvm.alloca %[[C1]] x i32 : (i64) -> !llvm.ptr +// AMDGPU: %[[AA:.*]] = llvm.alloca %[[C1]] x i32 : (i64) -> !llvm.ptr<5> +// AMDGPU: %[[ALLOC:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr<5> to !llvm.ptr // CHECK: %[[LOAD:.*]] = llvm.load %[[ARG0]] : !llvm.ptr -> i32 // CHECK: llvm.store %[[LOAD]], %[[ALLOC]] : i32, !llvm.ptr // CHECK: llvm.return @@ -1772,7 +1798,9 @@ func.func @xembox0(%arg0: !fir.ref>) { // CHECK-LABEL: llvm.func @xembox0( // CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr // CHECK: %[[ALLOCA_SIZE:.*]] = llvm.mlir.constant(1 : i32) : i32 -// CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr +// GENERIC: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr +// AMDGPU: %[[AA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5> +// AMDGPU: %[[ALLOCA:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr<5> to !llvm.ptr // CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64 // CHECK: %[[TYPE:.*]] = llvm.mlir.constant(9 : i32) : i32 // CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr @@ -1860,7 +1888,9 @@ func.func private @_QPxb(!fir.box>) // CHECK-LABEL: llvm.func @_QPsb( // CHECK-SAME: %[[N:.*]]: i64, %[[SH1:.*]]: i64, %[[SH2:.*]]: i64) { // CHECK: %[[ALLOCA_SIZE:.*]] = llvm.mlir.constant(1 : i32) : i32 -// CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr +// GENERIC: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr +// AMDGPU: %[[AA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5> +// AMDGPU: %[[ALLOCA:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr<5> to !llvm.ptr // CHECK: %[[C4:.*]] = llvm.mlir.constant(4 : index) : i64 // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) : i64 // CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : index) : i64 @@ -1871,7 +1901,9 @@ func.func private @_QPxb(!fir.box>) // CHECK: %[[C1_0:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[ARR_SIZE_TMP1:.*]] = llvm.mul %[[C1_0]], %[[N1]] : i64 // CHECK: %[[ARR_SIZE:.*]] = llvm.mul %[[ARR_SIZE_TMP1]], %[[N2]] : i64 -// CHECK: %[[ARR:.*]] = llvm.alloca %[[ARR_SIZE]] x f64 {bindc_name = "arr"} : (i64) -> !llvm.ptr +// GENERIC: %[[ARR:.*]] = llvm.alloca %[[ARR_SIZE]] x f64 {bindc_name = "arr"} : (i64) -> !llvm.ptr +// AMDGPU: %[[AR:.*]] = llvm.alloca %[[ARR_SIZE]] x f64 {bindc_name = "arr"} : (i64) -> !llvm.ptr<5> +// AMDGPU: %[[ARR:.*]] = llvm.addrspacecast %[[AR]] : !llvm.ptr<5> to !llvm.ptr // CHECK: %[[TYPE_CODE:.*]] = llvm.mlir.constant(28 : i32) : i32 // CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] @@ -1941,15 +1973,21 @@ func.func private @_QPtest_dt_callee(%arg0: !fir.box>) // CHECK-LABEL: llvm.func @_QPtest_dt_slice // CHECK: %[[ALLOCA_SIZE:.*]] = llvm.mlir.constant(1 : i32) : i32 -// CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr +// GENERIC: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr +// AMDGPU: %[[AA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5> +// AMDGPU: %[[ALLOCA:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr<5> to !llvm.ptr // CHECK: %[[C20:.*]] = llvm.mlir.constant(20 : index) : i64 // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[C10:.*]] = llvm.mlir.constant(10 : i64) : i64 // CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : i64) : i64 // CHECK: %[[ALLOCA_SIZE_V:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[V:.*]] = llvm.alloca %[[ALLOCA_SIZE_V]] x i32 {bindc_name = "v"} : (i64) -> !llvm.ptr +// GENERIC: %[[V:.*]] = llvm.alloca %[[ALLOCA_SIZE_V]] x i32 {bindc_name = "v"} : (i64) -> !llvm.ptr +// AMDGPU: %[[AB:.*]] = llvm.alloca %[[ALLOCA_SIZE_V]] x i32 {bindc_name = "v"} : (i64) -> !llvm.ptr<5> +// AMDGPU: %[[V:.*]] = llvm.addrspacecast %[[AB]] : !llvm.ptr<5> to !llvm.ptr // CHECK: %[[ALLOCA_SIZE_X:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[X:.*]] = llvm.alloca %[[ALLOCA_SIZE_X]] x !llvm.array<20 x struct<"_QFtest_dt_sliceTt", (i32, i32)>> {bindc_name = "x"} : (i64) -> !llvm.ptr +// GENERIC: %[[X:.*]] = llvm.alloca %[[ALLOCA_SIZE_X]] x !llvm.array<20 x struct<"_QFtest_dt_sliceTt", (i32, i32)>> {bindc_name = "x"} : (i64) -> !llvm.ptr +// AMDGPU: %[[AC:.*]] = llvm.alloca %[[ALLOCA_SIZE_X]] x !llvm.array<20 x struct<"_QFtest_dt_sliceTt", (i32, i32)>> {bindc_name = "x"} : (i64) -> !llvm.ptr<5> +// AMDGPU: %[[X:.*]] = llvm.addrspacecast %[[AC]] : !llvm.ptr<5> to !llvm.ptr // CHECK: %[[TYPE_CODE:.*]] = llvm.mlir.constant(9 : i32) : i32 // CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] @@ -1987,7 +2025,7 @@ func.func private @_QPtest_dt_callee(%arg0: !fir.box>) // CHECK: %[[BASE_PTR:.*]] = llvm.getelementptr %[[X]][%[[ZERO]], %[[ADJUSTED_OFFSET]], 0] : (!llvm.ptr, i64, i64) -> !llvm.ptr, !llvm.array<20 x struct<"_QFtest_dt_sliceTt", (i32, i32)>> // CHECK: %[[BOX10:.*]] = llvm.insertvalue %[[BASE_PTR]], %[[BOX9]][0] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> // CHECK: llvm.store %[[BOX10]], %[[ALLOCA]] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>, !llvm.ptr -// CHECK: llvm.call @_QPtest_dt_callee(%1) : (!llvm.ptr) -> () +// CHECK: llvm.call @_QPtest_dt_callee(%[[ALLOCA]]) : (!llvm.ptr) -> () // Conversion with a subcomponent that indexes a 2d array field in a derived type. @@ -2245,7 +2283,9 @@ func.func @test_rebox_1(%arg0: !fir.box>) { //CHECK-LABEL: llvm.func @test_rebox_1 //CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr //CHECK: %[[ONE_1:.*]] = llvm.mlir.constant(1 : i32) : i32 -//CHECK: %[[RESULT_BOX_REF:.*]] = llvm.alloca %[[ONE_1]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr +//GENERIC: %[[RESULT_BOX_REF:.*]] = llvm.alloca %[[ONE_1]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr +//AMDGPU: %[[AA:.*]] = llvm.alloca %[[ONE_1]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5> +//AMDGPU: %[[RESULT_BOX_REF:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr<5> to !llvm.ptr //CHECK: %[[THREE:.*]] = llvm.mlir.constant(3 : index) : i64 //CHECK: %[[FOUR:.*]] = llvm.mlir.constant(4 : index) : i64 //CHECK: %[[FIVE:.*]] = llvm.mlir.constant(5 : index) : i64 @@ -2316,7 +2356,9 @@ func.func @foo(%arg0: !fir.box} //CHECK-LABEL: llvm.func @foo //CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr //CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : i32 -//CHECK: %[[RESULT_BOX_REF:.*]] = llvm.alloca %[[ONE]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr +//GENERIC: %[[RESULT_BOX_REF:.*]] = llvm.alloca %[[ONE]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr +//AMDGPU: %[[AA:.*]] = llvm.alloca %[[ONE]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5> +//AMDGPU: %[[RESULT_BOX_REF:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr<5> to !llvm.ptr //CHECK: %[[RESULT_LB:.*]] = llvm.mlir.constant(3 : i64) : i64 //CHECK: %[[RESULT_UB:.*]] = llvm.mlir.constant(60 : i64) : i64 //CHECK: %[[RESULT_STRIDE:.*]] = llvm.mlir.constant(9 : i64) : i64