Skip to content

[flang][acc] Ensure all acc.loop get a default parallelism determination mode #143623

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions flang/lib/Lower/OpenACC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2150,6 +2150,70 @@ privatizeIv(Fortran::lower::AbstractConverter &converter,
ivPrivate.push_back(privateValue);
}

static void determineDefaultLoopParMode(
Fortran::lower::AbstractConverter &converter, mlir::acc::LoopOp &loopOp,
llvm::SmallVector<mlir::Attribute> &seqDeviceTypes,
llvm::SmallVector<mlir::Attribute> &independentDeviceTypes,
llvm::SmallVector<mlir::Attribute> &autoDeviceTypes) {
auto hasDeviceNone = [](mlir::Attribute attr) -> bool {
return mlir::dyn_cast<mlir::acc::DeviceTypeAttr>(attr).getValue() ==
mlir::acc::DeviceType::None;
};
bool hasDefaultSeq = llvm::any_of(seqDeviceTypes, hasDeviceNone);
bool hasDefaultIndependent =
llvm::any_of(independentDeviceTypes, hasDeviceNone);
bool hasDefaultAuto = llvm::any_of(autoDeviceTypes, hasDeviceNone);
if (hasDefaultSeq || hasDefaultIndependent || hasDefaultAuto)
return; // Default loop par mode is already specified.

mlir::Region *currentRegion =
converter.getFirOpBuilder().getBlock()->getParent();
mlir::Operation *parentOp = mlir::acc::getEnclosingComputeOp(*currentRegion);
const bool isOrphanedLoop = !parentOp;
if (isOrphanedLoop ||
mlir::isa_and_present<mlir::acc::ParallelOp>(parentOp)) {
// As per OpenACC 3.3 standard section 2.9.6 independent clause:
// A loop construct with no auto or seq clause is treated as if it has the
// independent clause when it is an orphaned loop construct or its parent
// compute construct is a parallel construct.
independentDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
converter.getFirOpBuilder().getContext(), mlir::acc::DeviceType::None));
} else if (mlir::isa_and_present<mlir::acc::SerialOp>(parentOp)) {
// Serial construct implies `seq` clause on loop. However, this
// conflicts with parallelism assignment if already set. Therefore check
// that first.
bool hasDefaultGangWorkerOrVector =
loopOp.hasVector() || loopOp.getVectorValue() || loopOp.hasWorker() ||
loopOp.getWorkerValue() || loopOp.hasGang() ||
loopOp.getGangValue(mlir::acc::GangArgType::Num) ||
loopOp.getGangValue(mlir::acc::GangArgType::Dim) ||
loopOp.getGangValue(mlir::acc::GangArgType::Static);
if (!hasDefaultGangWorkerOrVector)
seqDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
converter.getFirOpBuilder().getContext(),
mlir::acc::DeviceType::None));
// Since the loop has some parallelism assigned - we cannot assign `seq`.
// However, the `acc.loop` verifier will check that one of seq, independent,
// or auto is marked. Seems reasonable to mark as auto since the OpenACC
// spec does say "If not, or if it is unable to make a determination, it
// must treat the auto clause as if it is a seq clause, and it must
// ignore any gang, worker, or vector clauses on the loop construct"
else
autoDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
converter.getFirOpBuilder().getContext(),
mlir::acc::DeviceType::None));
} else {
// As per OpenACC 3.3 standard section 2.9.7 auto clause:
// When the parent compute construct is a kernels construct, a loop
// construct with no independent or seq clause is treated as if it has the
// auto clause.
assert(mlir::isa_and_present<mlir::acc::KernelsOp>(parentOp) &&
"Expected kernels construct");
autoDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
converter.getFirOpBuilder().getContext(), mlir::acc::DeviceType::None));
}
}

static mlir::acc::LoopOp createLoopOp(
Fortran::lower::AbstractConverter &converter,
mlir::Location currentLocation,
Expand Down Expand Up @@ -2482,6 +2546,9 @@ static mlir::acc::LoopOp createLoopOp(
loopOp.setTileOperandsSegmentsAttr(
builder.getDenseI32ArrayAttr(tileOperandsSegments));

// Determine the loop's default par mode - either seq, independent, or auto.
determineDefaultLoopParMode(converter, loopOp, seqDeviceTypes,
independentDeviceTypes, autoDeviceTypes);
if (!seqDeviceTypes.empty())
loopOp.setSeqAttr(builder.getArrayAttr(seqDeviceTypes));
if (!independentDeviceTypes.empty())
Expand Down
28 changes: 14 additions & 14 deletions flang/test/Lower/OpenACC/acc-kernels-loop.f90
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ subroutine acc_kernels_loop
! CHECK: acc.kernels {
! CHECK: acc.loop private{{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]{{.*}}}
! CHECK: acc.terminator
! CHECK-NEXT: }{{$}}

Expand All @@ -59,7 +59,7 @@ subroutine acc_kernels_loop
! CHECK: acc.kernels combined(loop) {
! CHECK: acc.loop combined(kernels) private{{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]{{.*}}}
! CHECK: acc.terminator
! CHECK-NEXT: }{{$}}

Expand Down Expand Up @@ -490,7 +490,7 @@ subroutine acc_kernels_loop
! CHECK: acc.kernels {{.*}} {
! CHECK: acc.loop {{.*}} gang {{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}{{$}}
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
! CHECK: acc.terminator
! CHECK-NEXT: }{{$}}

Expand All @@ -503,7 +503,7 @@ subroutine acc_kernels_loop
! CHECK: [[GANGNUM1:%.*]] = arith.constant 8 : i32
! CHECK: acc.loop {{.*}} gang({num=[[GANGNUM1]] : i32}) {{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
! CHECK: acc.terminator
! CHECK-NEXT: }{{$}}

Expand All @@ -516,7 +516,7 @@ subroutine acc_kernels_loop
! CHECK: [[GANGNUM2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
! CHECK: acc.loop {{.*}} gang({num=[[GANGNUM2]] : i32}) {{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
! CHECK: acc.terminator
! CHECK-NEXT: }{{$}}

Expand All @@ -528,7 +528,7 @@ subroutine acc_kernels_loop
! CHECK: acc.kernels {{.*}} {
! CHECK: acc.loop {{.*}} gang({num=%{{.*}} : i32, static=%{{.*}} : i32})
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
! CHECK: acc.terminator
! CHECK-NEXT: }{{$}}

Expand All @@ -540,7 +540,7 @@ subroutine acc_kernels_loop
! CHECK: acc.kernels {{.*}} {
! CHECK: acc.loop {{.*}} vector {{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}{{$}}
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
! CHECK: acc.terminator
! CHECK-NEXT: }{{$}}

Expand All @@ -553,7 +553,7 @@ subroutine acc_kernels_loop
! CHECK: [[CONSTANT128:%.*]] = arith.constant 128 : i32
! CHECK: acc.loop {{.*}} vector([[CONSTANT128]] : i32) {{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
! CHECK: acc.terminator
! CHECK-NEXT: }{{$}}

Expand All @@ -566,7 +566,7 @@ subroutine acc_kernels_loop
! CHECK: [[VECTORLENGTH:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
! CHECK: acc.loop {{.*}} vector([[VECTORLENGTH]] : i32) {{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
! CHECK: acc.terminator
! CHECK-NEXT: }{{$}}

Expand All @@ -578,7 +578,7 @@ subroutine acc_kernels_loop
! CHECK: acc.kernels {{.*}} {
! CHECK: acc.loop {{.*}} worker {{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}{{$}}
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
! CHECK: acc.terminator
! CHECK-NEXT: }{{$}}

Expand All @@ -591,7 +591,7 @@ subroutine acc_kernels_loop
! CHECK: [[WORKER128:%.*]] = arith.constant 128 : i32
! CHECK: acc.loop {{.*}} worker([[WORKER128]] : i32) {{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
! CHECK: acc.terminator
! CHECK-NEXT: }{{$}}

Expand All @@ -605,7 +605,7 @@ subroutine acc_kernels_loop
! CHECK: acc.kernels {{.*}} {
! CHECK: acc.loop {{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: } attributes {collapse = [2], collapseDeviceType = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true, true>}
! CHECK-NEXT: } attributes {{{.*}}collapse = [2], collapseDeviceType = [#acc.device_type<none>]{{.*}}}
! CHECK: acc.terminator
! CHECK-NEXT: }{{$}}

Expand All @@ -621,9 +621,9 @@ subroutine acc_kernels_loop
! CHECK: acc.loop {{.*}} {
! CHECK: acc.loop {{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]{{.*}}}
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]{{.*}}}
! CHECK: acc.terminator
! CHECK-NEXT: }{{$}}

Expand Down
Loading