diff --git a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp index cb9e48cced2a1..e440852b3103a 100644 --- a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp +++ b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp @@ -180,41 +180,50 @@ class DoConcurrentConversion std::optional localSyms = loop.getLocalSyms(); - for (auto [localVar, localArg, localizerSym] : llvm::zip_equal( + for (auto localInfo : llvm::zip_equal( loop.getLocalVars(), loop.getRegionLocalArgs(), *localSyms)) { + mlir::Value localVar = std::get<0>(localInfo); + mlir::BlockArgument localArg = std::get<1>(localInfo); + mlir::Attribute localizerSym = std::get<2>(localInfo); mlir::SymbolRefAttr localizerName = llvm::cast(localizerSym); fir::LocalitySpecifierOp localizer = findLocalizer(loop, localizerName); - if (!localizer.getInitRegion().empty() || - !localizer.getDeallocRegion().empty()) - TODO(localizer.getLoc(), "localizers with `init` and `dealloc` " - "regions are not handled yet."); - // TODO Should this be a heap allocation instead? For now, we allocate // on the stack for each loop iteration. mlir::Value localAlloc = rewriter.create(loop.getLoc(), localizer.getType()); - if (localizer.getLocalitySpecifierType() == - fir::LocalitySpecifierType::LocalInit) { + auto cloneLocalizerRegion = [&](mlir::Region ®ion, + mlir::ValueRange regionArgs, + mlir::Block::iterator insertionPoint) { // It is reasonable to make this assumption since, at this stage, // control-flow ops are not converted yet. Therefore, things like `if` // conditions will still be represented by their encapsulating `fir` // dialect ops. - assert(localizer.getCopyRegion().hasOneBlock() && - "Expected localizer to have a single block."); - mlir::Block *beforeLocalInit = rewriter.getInsertionBlock(); - mlir::Block *afterLocalInit = rewriter.splitBlock( - rewriter.getInsertionBlock(), rewriter.getInsertionPoint()); - rewriter.cloneRegionBefore(localizer.getCopyRegion(), afterLocalInit); - mlir::Block *copyRegionBody = beforeLocalInit->getNextNode(); - - rewriter.eraseOp(copyRegionBody->getTerminator()); - rewriter.mergeBlocks(afterLocalInit, copyRegionBody); - rewriter.mergeBlocks(copyRegionBody, beforeLocalInit, - {localVar, localArg}); - } + assert(region.hasOneBlock() && + "Expected localizer region to have a single block."); + mlir::OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPoint(rewriter.getInsertionBlock(), + insertionPoint); + mlir::IRMapping mapper; + mapper.map(region.getArguments(), regionArgs); + for (mlir::Operation &op : region.front().without_terminator()) + (void)rewriter.clone(op, mapper); + }; + + if (!localizer.getInitRegion().empty()) + cloneLocalizerRegion(localizer.getInitRegion(), {localVar, localArg}, + rewriter.getInsertionPoint()); + + if (localizer.getLocalitySpecifierType() == + fir::LocalitySpecifierType::LocalInit) + cloneLocalizerRegion(localizer.getCopyRegion(), {localVar, localArg}, + rewriter.getInsertionPoint()); + + if (!localizer.getDeallocRegion().empty()) + cloneLocalizerRegion(localizer.getDeallocRegion(), {localArg}, + rewriter.getInsertionBlock()->end()); rewriter.replaceAllUsesWith(localArg, localAlloc); } diff --git a/flang/test/Transforms/do-concurrent-localizer-dealloc-region.fir b/flang/test/Transforms/do-concurrent-localizer-dealloc-region.fir new file mode 100644 index 0000000000000..b59ffdfb34adf --- /dev/null +++ b/flang/test/Transforms/do-concurrent-localizer-dealloc-region.fir @@ -0,0 +1,126 @@ +// Tests converting `fir.local` ops that have `dealloc` regions. + +// RUN: fir-opt --split-input-file --simplify-fir-operations %s | FileCheck %s + +fir.local {type = local} @_QFlocalizer_with_dealloc_regionEa_private_box_Uxi32 : !fir.box> init { +^bb0(%arg0: !fir.ref>>, %arg1: !fir.ref>>): + %c0 = arith.constant 0 : index + %0 = fir.load %arg0 : !fir.ref>> + %1:3 = fir.box_dims %0, %c0 : (!fir.box>, index) -> (index, index, index) + %2 = fir.shape %1#1 : (index) -> !fir.shape<1> + %3 = fir.allocmem !fir.array, %1#1 {bindc_name = ".tmp", uniq_name = ""} + %4 = fir.declare %3(%2) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> !fir.heap> + %5 = fir.embox %4(%2) : (!fir.heap>, !fir.shape<1>) -> !fir.box> + %6 = fir.shape_shift %1#0, %1#1 : (index, index) -> !fir.shapeshift<1> + %7 = fir.rebox %5(%6) : (!fir.box>, !fir.shapeshift<1>) -> !fir.box> + fir.store %7 to %arg1 : !fir.ref>> + fir.yield(%arg1 : !fir.ref>>) +} dealloc { +^bb0(%arg0: !fir.ref>>): + %c0_i64 = arith.constant 0 : i64 + %0 = fir.load %arg0 : !fir.ref>> + %1 = fir.box_addr %0 : (!fir.box>) -> !fir.ref> + %2 = fir.convert %1 : (!fir.ref>) -> i64 + %3 = arith.cmpi ne, %2, %c0_i64 : i64 + fir.if %3 { + %4 = fir.convert %1 : (!fir.ref>) -> !fir.heap> + fir.freemem %4 : !fir.heap> + } + fir.yield +} + +func.func @_QPlocalizer_with_dealloc_region(%arg0: !fir.ref {fir.bindc_name = "n"}) { + %c42_i32 = arith.constant 42 : i32 + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %0 = fir.alloca !fir.box> + %1 = fir.dummy_scope : !fir.dscope + %2 = fir.declare %arg0 dummy_scope %1 {uniq_name = "_QFlocalizer_with_dealloc_regionEn"} : (!fir.ref, !fir.dscope) -> !fir.ref + %3 = fir.load %2 : !fir.ref + %4 = fir.convert %3 : (i32) -> index + %5 = arith.cmpi sgt, %4, %c0 : index + %6 = arith.select %5, %4, %c0 : index + %7 = fir.alloca !fir.array, %6 {bindc_name = "a", uniq_name = "_QFlocalizer_with_dealloc_regionEa"} + %8 = fir.shape %6 : (index) -> !fir.shape<1> + %9 = fir.declare %7(%8) {uniq_name = "_QFlocalizer_with_dealloc_regionEa"} : (!fir.ref>, !fir.shape<1>) -> !fir.ref> + %10 = fir.embox %9(%8) : (!fir.ref>, !fir.shape<1>) -> !fir.box> + fir.store %10 to %0 : !fir.ref>> + fir.do_concurrent { + %11 = fir.alloca i32 {bindc_name = "i"} + %12 = fir.declare %11 {uniq_name = "_QFlocalizer_with_dealloc_regionEi"} : (!fir.ref) -> !fir.ref + fir.do_concurrent.loop (%arg1) = (%c1) to (%4) step (%c1) local(@_QFlocalizer_with_dealloc_regionEa_private_box_Uxi32 %0 -> %arg2 : !fir.ref>>) { + %13 = fir.convert %arg1 : (index) -> i32 + fir.store %13 to %12 : !fir.ref + %14 = fir.declare %arg2 {uniq_name = "_QFlocalizer_with_dealloc_regionEa"} : (!fir.ref>>) -> !fir.ref>> + %15 = fir.load %14 : !fir.ref>> + %16 = fir.load %12 : !fir.ref + %17 = fir.convert %16 : (i32) -> i64 + %18:3 = fir.box_dims %15, %c0 : (!fir.box>, index) -> (index, index, index) + %19 = fir.shift %18#0 : (index) -> !fir.shift<1> + %20 = fir.array_coor %15(%19) %17 : (!fir.box>, !fir.shift<1>, i64) -> !fir.ref + fir.store %c42_i32 to %20 : !fir.ref + } + } + return +} + +// CHECK-LABEL: func.func @_QPlocalizer_with_dealloc_region( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 42 : i32 +// CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "i"} +// CHECK: %[[VAL_5:.*]] = fir.declare %[[VAL_4]] {uniq_name = "_QFlocalizer_with_dealloc_regionEi"} : (!fir.ref) -> !fir.ref +// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box> +// CHECK: %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_8:.*]] = fir.declare %[[ARG0]] dummy_scope %[[VAL_7]] {uniq_name = "_QFlocalizer_with_dealloc_regionEn"} : (!fir.ref, !fir.dscope) -> !fir.ref +// CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_8]] : !fir.ref +// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index +// CHECK: %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_10]], %[[VAL_1]] : index +// CHECK: %[[VAL_12:.*]] = arith.select %[[VAL_11]], %[[VAL_10]], %[[VAL_1]] : index +// CHECK: %[[VAL_13:.*]] = fir.alloca !fir.array, %[[VAL_12]] {bindc_name = "a", uniq_name = "_QFlocalizer_with_dealloc_regionEa"} +// CHECK: %[[VAL_14:.*]] = fir.shape %[[VAL_12]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_15:.*]] = fir.declare %[[VAL_13]](%[[VAL_14]]) {uniq_name = "_QFlocalizer_with_dealloc_regionEa"} : (!fir.ref>, !fir.shape<1>) -> !fir.ref> +// CHECK: %[[VAL_16:.*]] = fir.embox %[[VAL_15]](%[[VAL_14]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +// CHECK: fir.store %[[VAL_16]] to %[[VAL_6]] : !fir.ref>> +// CHECK: fir.do_loop %[[VAL_17:.*]] = %[[VAL_2]] to %[[VAL_10]] step %[[VAL_2]] unordered { + +// Local allocation +// CHECK: %[[VAL_18:.*]] = fir.alloca !fir.box> + +// `init` region body +// CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_6]] : !fir.ref>> +// CHECK: %[[VAL_20:.*]]:3 = fir.box_dims %[[VAL_19]], %[[VAL_1]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_21:.*]] = fir.shape %[[VAL_20]]#1 : (index) -> !fir.shape<1> +// CHECK: %[[VAL_22:.*]] = fir.allocmem !fir.array, %[[VAL_20]]#1 {bindc_name = ".tmp", uniq_name = ""} +// CHECK: %[[VAL_23:.*]] = fir.declare %[[VAL_22]](%[[VAL_21]]) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> !fir.heap> +// CHECK: %[[VAL_24:.*]] = fir.embox %[[VAL_23]](%[[VAL_21]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box> +// CHECK: %[[VAL_25:.*]] = fir.shape_shift %[[VAL_20]]#0, %[[VAL_20]]#1 : (index, index) -> !fir.shapeshift<1> +// CHECK: %[[VAL_26:.*]] = fir.rebox %[[VAL_24]](%[[VAL_25]]) : (!fir.box>, !fir.shapeshift<1>) -> !fir.box> +// CHECK: fir.store %[[VAL_26]] to %[[VAL_18]] : !fir.ref>> + +// Loop body +// CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_17]] : (index) -> i32 +// CHECK: fir.store %[[VAL_27]] to %[[VAL_5]] : !fir.ref +// CHECK: %[[VAL_28:.*]] = fir.declare %[[VAL_18]] {uniq_name = "_QFlocalizer_with_dealloc_regionEa"} : (!fir.ref>>) -> !fir.ref>> +// CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_28]] : !fir.ref>> +// CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_5]] : !fir.ref +// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (i32) -> i64 +// CHECK: %[[VAL_32:.*]]:3 = fir.box_dims %[[VAL_29]], %[[VAL_1]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_33:.*]] = fir.shift %[[VAL_32]]#0 : (index) -> !fir.shift<1> +// CHECK: %[[VAL_34:.*]] = fir.array_coor %[[VAL_29]](%[[VAL_33]]) %[[VAL_31]] : (!fir.box>, !fir.shift<1>, i64) -> !fir.ref +// CHECK: fir.store %[[VAL_3]] to %[[VAL_34]] : !fir.ref + +// `dealloc` region +// CHECK: %[[VAL_35:.*]] = fir.load %[[VAL_18]] : !fir.ref>> +// CHECK: %[[VAL_36:.*]] = fir.box_addr %[[VAL_35]] : (!fir.box>) -> !fir.ref> +// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (!fir.ref>) -> i64 +// CHECK: %[[VAL_38:.*]] = arith.cmpi ne, %[[VAL_37]], %[[VAL_0]] : i64 +// CHECK: fir.if %[[VAL_38]] { +// CHECK: %[[VAL_39:.*]] = fir.convert %[[VAL_36]] : (!fir.ref>) -> !fir.heap> +// CHECK: fir.freemem %[[VAL_39]] : !fir.heap> +// CHECK: } +// CHECK: } +// CHECK: return +// CHECK: } diff --git a/flang/test/Transforms/do-concurrent-localizer-init-region.fir b/flang/test/Transforms/do-concurrent-localizer-init-region.fir new file mode 100644 index 0000000000000..ebb56aec278f6 --- /dev/null +++ b/flang/test/Transforms/do-concurrent-localizer-init-region.fir @@ -0,0 +1,102 @@ +// Tests converting `fir.local` ops that have `init` regions. + +// RUN: fir-opt --split-input-file --simplify-fir-operations %s | FileCheck %s + +fir.local {type = local_init} @_QFlocalizer_with_init_regionEp_firstprivate_box_ptr_Uxi32 : !fir.box>> init { +^bb0(%arg0: !fir.ref>>>, %arg1: !fir.ref>>>): + %c0 = arith.constant 0 : index + %0 = fir.shape %c0 : (index) -> !fir.shape<1> + %1 = fir.zero_bits !fir.ptr> + %2 = fir.embox %1(%0) : (!fir.ptr>, !fir.shape<1>) -> !fir.box>> + fir.store %2 to %arg1 : !fir.ref>>> + fir.yield(%arg1 : !fir.ref>>>) +} copy { +^bb0(%arg0: !fir.ref>>>, %arg1: !fir.ref>>>): + %0 = fir.load %arg0 : !fir.ref>>> + fir.store %0 to %arg1 : !fir.ref>>> + fir.yield(%arg1 : !fir.ref>>>) +} + +func.func @_QPlocalizer_with_init_region() { + %c42_i32 = arith.constant 42 : i32 + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %0 = fir.dummy_scope : !fir.dscope + %1 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFlocalizer_with_init_regionEn"} + %2 = fir.declare %1 {uniq_name = "_QFlocalizer_with_init_regionEn"} : (!fir.ref) -> !fir.ref + %3 = fir.alloca !fir.box>> {bindc_name = "p", uniq_name = "_QFlocalizer_with_init_regionEp"} + %4 = fir.zero_bits !fir.ptr> + %5 = fir.shape %c0 : (index) -> !fir.shape<1> + %6 = fir.embox %4(%5) : (!fir.ptr>, !fir.shape<1>) -> !fir.box>> + fir.store %6 to %3 : !fir.ref>>> + %7 = fir.declare %3 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFlocalizer_with_init_regionEp"} : (!fir.ref>>>) -> !fir.ref>>> + %8 = fir.load %2 : !fir.ref + %9 = fir.convert %8 : (i32) -> index + + fir.do_concurrent { + %10 = fir.alloca i32 {bindc_name = "i"} + %11 = fir.declare %10 {uniq_name = "_QFlocalizer_with_init_regionEi"} : (!fir.ref) -> !fir.ref + fir.do_concurrent.loop (%arg0) = (%c1) to (%9) step (%c1) local(@_QFlocalizer_with_init_regionEp_firstprivate_box_ptr_Uxi32 %7 -> %arg1 : !fir.ref>>>) { + %12 = fir.convert %arg0 : (index) -> i32 + fir.store %12 to %11 : !fir.ref + %13 = fir.declare %arg1 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFlocalizer_with_init_regionEp"} : (!fir.ref>>>) -> !fir.ref>>> + %14 = fir.load %13 : !fir.ref>>> + %15 = fir.load %11 : !fir.ref + %16 = fir.convert %15 : (i32) -> i64 + %17:3 = fir.box_dims %14, %c0 : (!fir.box>>, index) -> (index, index, index) + %18 = fir.shift %17#0 : (index) -> !fir.shift<1> + %19 = fir.array_coor %14(%18) %16 : (!fir.box>>, !fir.shift<1>, i64) -> !fir.ref + fir.store %c42_i32 to %19 : !fir.ref + } + } + + return +} + +// CHECK-LABEL: func.func @_QPlocalizer_with_init_region() { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = arith.constant 42 : i32 +// CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "i"} +// CHECK: %[[VAL_4:.*]] = fir.declare %[[VAL_3]] {uniq_name = "_QFlocalizer_with_init_regionEi"} : (!fir.ref) -> !fir.ref +// CHECK: %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFlocalizer_with_init_regionEn"} +// CHECK: %[[VAL_7:.*]] = fir.declare %[[VAL_6]] {uniq_name = "_QFlocalizer_with_init_regionEn"} : (!fir.ref) -> !fir.ref +// CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box>> {bindc_name = "p", uniq_name = "_QFlocalizer_with_init_regionEp"} +// CHECK: %[[VAL_9:.*]] = fir.zero_bits !fir.ptr> +// CHECK: %[[VAL_10:.*]] = fir.shape %[[VAL_0]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_11:.*]] = fir.embox %[[VAL_9]](%[[VAL_10]]) : (!fir.ptr>, !fir.shape<1>) -> !fir.box>> +// CHECK: fir.store %[[VAL_11]] to %[[VAL_8]] : !fir.ref>>> +// CHECK: %[[VAL_12:.*]] = fir.declare %[[VAL_8]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFlocalizer_with_init_regionEp"} : (!fir.ref>>>) -> !fir.ref>>> +// CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_7]] : !fir.ref +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> index +// CHECK: fir.do_loop %[[VAL_15:.*]] = %[[VAL_1]] to %[[VAL_14]] step %[[VAL_1]] unordered { + +// Local allocation +// CHECK: %[[VAL_16:.*]] = fir.alloca !fir.box>> + +// `init` region body +// CHECK: %[[VAL_17:.*]] = fir.shape %[[VAL_0]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_18:.*]] = fir.zero_bits !fir.ptr> +// CHECK: %[[VAL_19:.*]] = fir.embox %[[VAL_18]](%[[VAL_17]]) : (!fir.ptr>, !fir.shape<1>) -> !fir.box>> +// CHECK: fir.store %[[VAL_19]] to %[[VAL_16]] : !fir.ref>>> + +// `copy` region body +// CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_12]] : !fir.ref>>> +// CHECK: fir.store %[[VAL_20]] to %[[VAL_16]] : !fir.ref>>> + +// loop body +// CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_15]] : (index) -> i32 +// CHECK: fir.store %[[VAL_21]] to %[[VAL_4]] : !fir.ref +// CHECK: %[[VAL_22:.*]] = fir.declare %[[VAL_16]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFlocalizer_with_init_regionEp"} : (!fir.ref>>>) -> !fir.ref>>> +// CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref>>> +// CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_4]] : !fir.ref +// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> i64 +// CHECK: %[[VAL_26:.*]]:3 = fir.box_dims %[[VAL_23]], %[[VAL_0]] : (!fir.box>>, index) -> (index, index, index) +// CHECK: %[[VAL_27:.*]] = fir.shift %[[VAL_26]]#0 : (index) -> !fir.shift<1> +// CHECK: %[[VAL_28:.*]] = fir.array_coor %[[VAL_23]](%[[VAL_27]]) %[[VAL_25]] : (!fir.box>>, !fir.shift<1>, i64) -> !fir.ref +// CHECK: fir.store %[[VAL_2]] to %[[VAL_28]] : !fir.ref +// CHECK: } +// CHECK: return +// CHECK: } +