diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h index c19dcbdcdb390..48804c327e1c7 100644 --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -120,6 +120,9 @@ class AbstractConverter { const Fortran::semantics::Symbol &sym, mlir::OpBuilder::InsertPoint *copyAssignIP = nullptr) = 0; + virtual void copyVar(mlir::Location loc, mlir::Value dst, + mlir::Value src) = 0; + /// For a given symbol, check if it is present in the inner-most /// level of the symbol map. virtual bool isPresentShallowLookup(Fortran::semantics::Symbol &sym) = 0; diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 8006b9b426f4d..04d09c4848491 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -743,6 +743,11 @@ class FirConverter : public Fortran::lower::AbstractConverter { }); } + void copyVar(mlir::Location loc, mlir::Value dst, + mlir::Value src) override final { + copyVarHLFIR(loc, dst, src); + } + void copyHostAssociateVar( const Fortran::semantics::Symbol &sym, mlir::OpBuilder::InsertPoint *copyAssignIP = nullptr) override final { @@ -777,64 +782,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { rhs_sb = &hsb; } - mlir::Location loc = genLocation(sym.name()); - - if (lowerToHighLevelFIR()) { - hlfir::Entity lhs{lhs_sb->getAddr()}; - hlfir::Entity rhs{rhs_sb->getAddr()}; - // Temporary_lhs is set to true in hlfir.assign below to avoid user - // assignment to be used and finalization to be called on the LHS. - // This may or may not be correct but mimics the current behaviour - // without HLFIR. - auto copyData = [&](hlfir::Entity l, hlfir::Entity r) { - // Dereference RHS and load it if trivial scalar. - r = hlfir::loadTrivialScalar(loc, *builder, r); - builder->create( - loc, r, l, - /*isWholeAllocatableAssignment=*/false, - /*keepLhsLengthInAllocatableAssignment=*/false, - /*temporary_lhs=*/true); - }; - if (lhs.isAllocatable()) { - // Deep copy allocatable if it is allocated. - // Note that when allocated, the RHS is already allocated with the LHS - // shape for copy on entry in createHostAssociateVarClone. - // For lastprivate, this assumes that the RHS was not reallocated in - // the OpenMP region. - lhs = hlfir::derefPointersAndAllocatables(loc, *builder, lhs); - mlir::Value addr = hlfir::genVariableRawAddress(loc, *builder, lhs); - mlir::Value isAllocated = builder->genIsNotNullAddr(loc, addr); - builder->genIfThen(loc, isAllocated) - .genThen([&]() { - // Copy the DATA, not the descriptors. - copyData(lhs, rhs); - }) - .end(); - } else if (lhs.isPointer()) { - // Set LHS target to the target of RHS (do not copy the RHS - // target data into the LHS target storage). - auto loadVal = builder->create(loc, rhs); - builder->create(loc, loadVal, lhs); - } else { - // Non ALLOCATABLE/POINTER variable. Simple DATA copy. - copyData(lhs, rhs); - } - } else { - fir::ExtendedValue lhs = symBoxToExtendedValue(*lhs_sb); - fir::ExtendedValue rhs = symBoxToExtendedValue(*rhs_sb); - mlir::Type symType = genType(sym); - if (auto seqTy = symType.dyn_cast()) { - Fortran::lower::StatementContext stmtCtx; - Fortran::lower::createSomeArrayAssignment(*this, lhs, rhs, localSymbols, - stmtCtx); - stmtCtx.finalizeAndReset(); - } else if (lhs.getBoxOf()) { - fir::factory::CharacterExprHelper{*builder, loc}.createAssign(lhs, rhs); - } else { - auto loadVal = builder->create(loc, fir::getBase(rhs)); - builder->create(loc, loadVal, fir::getBase(lhs)); - } - } + copyVar(sym, *lhs_sb, *rhs_sb); if (copyAssignIP && copyAssignIP->isSet() && sym.test(Fortran::semantics::Symbol::Flag::OmpLastPrivate)) { @@ -1092,6 +1040,79 @@ class FirConverter : public Fortran::lower::AbstractConverter { return true; } + void copyVar(const Fortran::semantics::Symbol &sym, + const Fortran::lower::SymbolBox &lhs_sb, + const Fortran::lower::SymbolBox &rhs_sb) { + mlir::Location loc = genLocation(sym.name()); + if (lowerToHighLevelFIR()) + copyVarHLFIR(loc, lhs_sb.getAddr(), rhs_sb.getAddr()); + else + copyVarFIR(loc, sym, lhs_sb, rhs_sb); + } + + void copyVarHLFIR(mlir::Location loc, mlir::Value dst, mlir::Value src) { + assert(lowerToHighLevelFIR()); + hlfir::Entity lhs{dst}; + hlfir::Entity rhs{src}; + // Temporary_lhs is set to true in hlfir.assign below to avoid user + // assignment to be used and finalization to be called on the LHS. + // This may or may not be correct but mimics the current behaviour + // without HLFIR. + auto copyData = [&](hlfir::Entity l, hlfir::Entity r) { + // Dereference RHS and load it if trivial scalar. + r = hlfir::loadTrivialScalar(loc, *builder, r); + builder->create( + loc, r, l, + /*isWholeAllocatableAssignment=*/false, + /*keepLhsLengthInAllocatableAssignment=*/false, + /*temporary_lhs=*/true); + }; + if (lhs.isAllocatable()) { + // Deep copy allocatable if it is allocated. + // Note that when allocated, the RHS is already allocated with the LHS + // shape for copy on entry in createHostAssociateVarClone. + // For lastprivate, this assumes that the RHS was not reallocated in + // the OpenMP region. + lhs = hlfir::derefPointersAndAllocatables(loc, *builder, lhs); + mlir::Value addr = hlfir::genVariableRawAddress(loc, *builder, lhs); + mlir::Value isAllocated = builder->genIsNotNullAddr(loc, addr); + builder->genIfThen(loc, isAllocated) + .genThen([&]() { + // Copy the DATA, not the descriptors. + copyData(lhs, rhs); + }) + .end(); + } else if (lhs.isPointer()) { + // Set LHS target to the target of RHS (do not copy the RHS + // target data into the LHS target storage). + auto loadVal = builder->create(loc, rhs); + builder->create(loc, loadVal, lhs); + } else { + // Non ALLOCATABLE/POINTER variable. Simple DATA copy. + copyData(lhs, rhs); + } + } + + void copyVarFIR(mlir::Location loc, const Fortran::semantics::Symbol &sym, + const Fortran::lower::SymbolBox &lhs_sb, + const Fortran::lower::SymbolBox &rhs_sb) { + assert(!lowerToHighLevelFIR()); + fir::ExtendedValue lhs = symBoxToExtendedValue(lhs_sb); + fir::ExtendedValue rhs = symBoxToExtendedValue(rhs_sb); + mlir::Type symType = genType(sym); + if (auto seqTy = symType.dyn_cast()) { + Fortran::lower::StatementContext stmtCtx; + Fortran::lower::createSomeArrayAssignment(*this, lhs, rhs, localSymbols, + stmtCtx); + stmtCtx.finalizeAndReset(); + } else if (lhs.getBoxOf()) { + fir::factory::CharacterExprHelper{*builder, loc}.createAssign(lhs, rhs); + } else { + auto loadVal = builder->create(loc, fir::getBase(rhs)); + builder->create(loc, loadVal, fir::getBase(lhs)); + } + } + /// Map a block argument to a result or dummy symbol. This is not the /// definitive mapping. The specification expression have not been lowered /// yet. The final mapping will be done using this pre-mapping in diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 7dd25f75d9eb7..7011cfe58f785 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -22,6 +22,7 @@ #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/Todo.h" +#include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Parser/dump-parse-tree.h" #include "flang/Parser/parse-tree.h" @@ -592,6 +593,10 @@ class ClauseProcessor { processAllocate(llvm::SmallVectorImpl &allocatorOperands, llvm::SmallVectorImpl &allocateOperands) const; bool processCopyin() const; + bool processCopyPrivate( + mlir::Location currentLocation, + llvm::SmallVectorImpl ©PrivateVars, + llvm::SmallVectorImpl ©PrivateFuncs) const; bool processDepend(llvm::SmallVectorImpl &dependTypeOperands, llvm::SmallVectorImpl &dependOperands) const; bool @@ -1160,6 +1165,102 @@ class ReductionProcessor { } }; +/// Class that extracts information from the specified type. +class TypeInfo { +public: + TypeInfo(mlir::Type ty) { typeScan(ty); } + + // Returns the length of character types. + std::optional getCharLength() const { + return charLen; + } + + // Returns the shape of array types. + const llvm::SmallVector &getShape() const { return shape; } + + // Is the type inside a box? + bool isBox() const { return inBox; } + +private: + void typeScan(mlir::Type type); + + std::optional charLen; + llvm::SmallVector shape; + bool inBox = false; +}; + +void TypeInfo::typeScan(mlir::Type ty) { + if (auto sty = mlir::dyn_cast(ty)) { + assert(shape.empty() && !sty.getShape().empty()); + shape = llvm::SmallVector(sty.getShape()); + typeScan(sty.getEleTy()); + } else if (auto bty = mlir::dyn_cast(ty)) { + inBox = true; + typeScan(bty.getEleTy()); + } else if (auto cty = mlir::dyn_cast(ty)) { + charLen = cty.getLen(); + } else if (auto hty = mlir::dyn_cast(ty)) { + typeScan(hty.getEleTy()); + } else if (auto pty = mlir::dyn_cast(ty)) { + typeScan(pty.getEleTy()); + } +} + +// Create a function that performs a copy between two variables, compatible +// with their types and attributes. +static mlir::func::FuncOp +createCopyFunc(mlir::Location loc, Fortran::lower::AbstractConverter &converter, + mlir::Type varType, fir::FortranVariableFlagsEnum varAttrs) { + fir::FirOpBuilder &builder = converter.getFirOpBuilder(); + mlir::ModuleOp module = builder.getModule(); + mlir::Type eleTy = mlir::cast(varType).getEleTy(); + TypeInfo typeInfo(eleTy); + std::string copyFuncName = + fir::getTypeAsString(eleTy, builder.getKindMap(), "_copy"); + + if (auto decl = module.lookupSymbol(copyFuncName)) + return decl; + + // create function + mlir::OpBuilder::InsertionGuard guard(builder); + mlir::OpBuilder modBuilder(module.getBodyRegion()); + llvm::SmallVector argsTy = {varType, varType}; + auto funcType = mlir::FunctionType::get(builder.getContext(), argsTy, {}); + mlir::func::FuncOp funcOp = + modBuilder.create(loc, copyFuncName, funcType); + funcOp.setVisibility(mlir::SymbolTable::Visibility::Private); + builder.createBlock(&funcOp.getRegion(), funcOp.getRegion().end(), argsTy, + {loc, loc}); + builder.setInsertionPointToStart(&funcOp.getRegion().back()); + // generate body + fir::FortranVariableFlagsAttr attrs; + if (varAttrs != fir::FortranVariableFlagsEnum::None) + attrs = fir::FortranVariableFlagsAttr::get(builder.getContext(), varAttrs); + llvm::SmallVector typeparams; + if (typeInfo.getCharLength().has_value()) { + mlir::Value charLen = builder.createIntegerConstant( + loc, builder.getCharacterLengthType(), *typeInfo.getCharLength()); + typeparams.push_back(charLen); + } + mlir::Value shape; + if (!typeInfo.isBox() && !typeInfo.getShape().empty()) { + llvm::SmallVector extents; + for (auto extent : typeInfo.getShape()) + extents.push_back( + builder.createIntegerConstant(loc, builder.getIndexType(), extent)); + shape = builder.create(loc, extents); + } + auto declDst = builder.create(loc, funcOp.getArgument(0), + copyFuncName + "_dst", shape, + typeparams, attrs); + auto declSrc = builder.create(loc, funcOp.getArgument(1), + copyFuncName + "_src", shape, + typeparams, attrs); + converter.copyVar(loc, declDst.getBase(), declSrc.getBase()); + builder.create(loc); + return funcOp; +} + static mlir::omp::ScheduleModifier translateScheduleModifier(const Fortran::parser::OmpScheduleModifierType &m) { switch (m.v) { @@ -1740,6 +1841,62 @@ bool ClauseProcessor::processCopyin() const { return hasCopyin; } +bool ClauseProcessor::processCopyPrivate( + mlir::Location currentLocation, + llvm::SmallVectorImpl ©PrivateVars, + llvm::SmallVectorImpl ©PrivateFuncs) const { + auto addCopyPrivateVar = [&](Fortran::semantics::Symbol *sym) { + mlir::Value symVal = converter.getSymbolAddress(*sym); + auto declOp = symVal.getDefiningOp(); + if (!declOp) + fir::emitFatalError(currentLocation, + "COPYPRIVATE is supported only in HLFIR mode"); + symVal = declOp.getBase(); + mlir::Type symType = symVal.getType(); + fir::FortranVariableFlagsEnum attrs = + declOp.getFortranAttrs().has_value() + ? *declOp.getFortranAttrs() + : fir::FortranVariableFlagsEnum::None; + mlir::Value cpVar = symVal; + + // CopyPrivate variables must be passed by reference. However, in the case + // of assumed shapes/vla the type is not a !fir.ref, but a !fir.box. + // In these cases to retrieve the appropriate !fir.ref> to + // access the data we need we must perform an alloca and then store to it + // and retrieve the data from the new alloca. + if (mlir::isa(symType)) { + fir::FirOpBuilder &builder = converter.getFirOpBuilder(); + auto alloca = builder.create(currentLocation, symType); + builder.create(currentLocation, symVal, alloca); + cpVar = alloca; + } + + copyPrivateVars.push_back(cpVar); + mlir::func::FuncOp funcOp = + createCopyFunc(currentLocation, converter, cpVar.getType(), attrs); + copyPrivateFuncs.push_back(mlir::SymbolRefAttr::get(funcOp)); + }; + + bool hasCopyPrivate = findRepeatableClause( + [&](const ClauseTy::Copyprivate *copyPrivateClause, + const Fortran::parser::CharBlock &) { + const Fortran::parser::OmpObjectList &ompObjectList = + copyPrivateClause->v; + for (const Fortran::parser::OmpObject &ompObject : ompObjectList.v) { + Fortran::semantics::Symbol *sym = getOmpObjectSymbol(ompObject); + if (const auto *commonDetails = + sym->detailsIf()) { + for (const auto &mem : commonDetails->objects()) + addCopyPrivateVar(&*mem); + break; + } + addCopyPrivateVar(sym); + } + }); + + return hasCopyPrivate; +} + bool ClauseProcessor::processDepend( llvm::SmallVectorImpl &dependTypeOperands, llvm::SmallVectorImpl &dependOperands) const { @@ -2481,19 +2638,26 @@ genSingleOp(Fortran::lower::AbstractConverter &converter, const Fortran::parser::OmpClauseList &beginClauseList, const Fortran::parser::OmpClauseList &endClauseList) { llvm::SmallVector allocateOperands, allocatorOperands; + llvm::SmallVector copyPrivateVars; + llvm::SmallVector copyPrivateFuncs; mlir::UnitAttr nowaitAttr; ClauseProcessor cp(converter, beginClauseList); cp.processAllocate(allocatorOperands, allocateOperands); - cp.processTODO( - currentLocation, llvm::omp::Directive::OMPD_single); - ClauseProcessor(converter, endClauseList).processNowait(nowaitAttr); + ClauseProcessor ecp(converter, endClauseList); + ecp.processNowait(nowaitAttr); + ecp.processCopyPrivate(currentLocation, copyPrivateVars, copyPrivateFuncs); return genOpWithBody( converter, eval, genNested, currentLocation, /*outerCombined=*/false, &beginClauseList, allocateOperands, - allocatorOperands, nowaitAttr); + allocatorOperands, copyPrivateVars, + copyPrivateFuncs.empty() + ? nullptr + : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), + copyPrivateFuncs), + nowaitAttr); } static mlir::omp::TaskOp @@ -3367,7 +3531,8 @@ genOMP(Fortran::lower::AbstractConverter &converter, for (const auto &clause : endClauseList.v) { mlir::Location clauseLocation = converter.genLocation(clause.source); - if (!std::get_if(&clause.u)) + if (!std::get_if(&clause.u) && + !std::get_if(&clause.u)) TODO(clauseLocation, "OpenMP Block construct clause"); } diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 2c570bc3abeb2..ef5a01c1ca821 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2429,7 +2429,8 @@ void OmpAttributeVisitor::CheckDataCopyingClause( // either 'private' or 'threadprivate' in enclosing context. if (!checkSymbol->test(Symbol::Flag::OmpThreadprivate) && !(HasSymbolInEnclosingScope(symbol, currScope()) && - symbol.test(Symbol::Flag::OmpPrivate))) { + (symbol.test(Symbol::Flag::OmpPrivate) || + symbol.test(Symbol::Flag::OmpFirstPrivate)))) { context_.Say(name.source, "COPYPRIVATE variable '%s' is not PRIVATE or THREADPRIVATE in " "outer context"_err_en_US, diff --git a/flang/test/Integration/OpenMP/copyprivate.f90 b/flang/test/Integration/OpenMP/copyprivate.f90 new file mode 100644 index 0000000000000..9318b743a9529 --- /dev/null +++ b/flang/test/Integration/OpenMP/copyprivate.f90 @@ -0,0 +1,97 @@ +!===----------------------------------------------------------------------===! +! This directory can be used to add Integration tests involving multiple +! stages of the compiler (for eg. from Fortran to LLVM IR). It should not +! contain executable tests. We should only add tests here sparingly and only +! if there is no other way to test. Repeat this message in each test that is +! added to this directory and sub-directories. +!===----------------------------------------------------------------------===! + +!RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s + +!CHECK-DAG: define void @_copy_box_Uxi32(ptr %{{.*}}, ptr %{{.*}}) +!CHECK-DAG: define void @_copy_10xi32(ptr %{{.*}}, ptr %{{.*}}) +!CHECK-DAG: define void @_copy_i64(ptr %{{.*}}, ptr %{{.*}}) +!CHECK-DAG: define void @_copy_box_Uxi64(ptr %{{.*}}, ptr %{{.*}}) +!CHECK-DAG: define void @_copy_f32(ptr %{{.*}}, ptr %{{.*}}) +!CHECK-DAG: define void @_copy_2x3xf32(ptr %{{.*}}, ptr %{{.*}}) +!CHECK-DAG: define void @_copy_z32(ptr %{{.*}}, ptr %{{.*}}) +!CHECK-DAG: define void @_copy_10xz32(ptr %{{.*}}, ptr %{{.*}}) +!CHECK-DAG: define void @_copy_l32(ptr %{{.*}}, ptr %{{.*}}) +!CHECK-DAG: define void @_copy_5xl32(ptr %{{.*}}, ptr %{{.*}}) +!CHECK-DAG: define void @_copy_c8x8(ptr %{{.*}}, ptr %{{.*}}) +!CHECK-DAG: define void @_copy_10xc8x8(ptr %{{.*}}, ptr %{{.*}}) +!CHECK-DAG: define void @_copy_c16x5(ptr %{{.*}}, ptr %{{.*}}) +!CHECK-DAG: define void @_copy_rec__QFtest_typesTdt(ptr %{{.*}}, ptr %{{.*}}) +!CHECK-DAG: define void @_copy_box_heap_Uxi32(ptr %{{.*}}, ptr %{{.*}}) +!CHECK-DAG: define void @_copy_box_ptr_Uxc8x9(ptr %{{.*}}, ptr %{{.*}}) + +!CHECK-LABEL: define void @_copy_i32( +!CHECK-SAME: ptr %[[DST:.*]], ptr %[[SRC:.*]]) { +!CHECK-NEXT: %[[SRC_VAL:.*]] = load i32, ptr %[[SRC]] +!CHECK-NEXT: store i32 %[[SRC_VAL]], ptr %[[DST]] +!CHECK-NEXT: ret void +!CHECK-NEXT: } + +!CHECK-LABEL: define internal void @test_scalar_..omp_par({{.*}}) +!CHECK: %[[I:.*]] = alloca i32, i64 1 +!CHECK: %[[J:.*]] = alloca i32, i64 1 +!CHECK: %[[DID_IT:.*]] = alloca i32 +!CHECK: store i32 0, ptr %[[DID_IT]] +!CHECK: %[[THREAD_NUM1:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[LOC:.*]]) +!CHECK: %[[RET:.*]] = call i32 @__kmpc_single({{.*}}) +!CHECK: %[[NOT_ZERO:.*]] = icmp ne i32 %[[RET]], 0 +!CHECK: br i1 %[[NOT_ZERO]], label %[[OMP_REGION_BODY:.*]], label %[[OMP_REGION_END:.*]] + +!CHECK: [[OMP_REGION_END]]: +!CHECK: %[[THREAD_NUM2:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[LOC:.*]]) +!CHECK: %[[DID_IT_VAL:.*]] = load i32, ptr %[[DID_IT]] +!CHECK: call void @__kmpc_copyprivate(ptr @[[LOC]], i32 %[[THREAD_NUM2]], i64 0, ptr %[[I]], ptr @_copy_i32, i32 %[[DID_IT_VAL]]) +!CHECK: %[[THREAD_NUM3:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[LOC]]) +!CHECK: %[[DID_IT_VAL2:.*]] = load i32, ptr %[[DID_IT]] +!CHECK: call void @__kmpc_copyprivate(ptr @[[LOC]], i32 %[[THREAD_NUM3]], i64 0, ptr %[[J]], ptr @_copy_i32, i32 %[[DID_IT_VAL2]]) + +!CHECK: [[OMP_REGION_BODY]]: +!CHECK: br label %[[OMP_SINGLE_REGION:.*]] +!CHECK: [[OMP_SINGLE_REGION]]: +!CHECK: store i32 11, ptr %[[I]] +!CHECK: store i32 22, ptr %[[J]] +!CHECK: br label %[[OMP_REGION_CONT3:.*]] +!CHECK: [[OMP_REGION_CONT3:.*]]: +!CHECK: store i32 1, ptr %[[DID_IT]] +!CHECK: call void @__kmpc_end_single(ptr @[[LOC]], i32 %[[THREAD_NUM1]]) +!CHECK: br label %[[OMP_REGION_END]] +subroutine test_scalar() + integer :: i, j + + !$omp parallel private(i, j) + !$omp single + i = 11 + j = 22 + !$omp end single copyprivate(i, j) + !$omp end parallel +end subroutine + +subroutine test_types(a, n) + integer :: a(:), n + integer(4) :: i4, i4a(10) + integer(8) :: i8, i8a(n) + real :: r, ra(2, 3) + complex :: z, za(10) + logical :: l, la(5) + character(kind=1, len=8) :: c1, c1a(10) + character(kind=2, len=5) :: c2 + + type dt + integer :: i + real :: r + end type + type(dt) :: t + + integer, allocatable :: aloc(:) + character(kind=1, len=9), pointer :: ptr(:) + + !$omp parallel private(a, i4, i4a, i8, i8a, r, ra, z, za, l, la, c1, c1a, c2, t, aloc, ptr) + !$omp single + !$omp end single copyprivate(a, i4, i4a, i8, i8a, r, ra, z, za, l, la, c1, c1a, c2, t, aloc, ptr) + !$omp end parallel +end subroutine diff --git a/flang/test/Lower/OpenMP/Todo/copyprivate.f90 b/flang/test/Lower/OpenMP/Todo/copyprivate.f90 deleted file mode 100644 index 0d871427ce60f..0000000000000 --- a/flang/test/Lower/OpenMP/Todo/copyprivate.f90 +++ /dev/null @@ -1,13 +0,0 @@ -! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s - -! CHECK: not yet implemented: OpenMP Block construct clause -subroutine sb - integer, save :: a - !$omp threadprivate(a) - !$omp parallel - !$omp single - a = 3 - !$omp end single copyprivate(a) - !$omp end parallel -end subroutine diff --git a/flang/test/Lower/OpenMP/copyprivate.f90 b/flang/test/Lower/OpenMP/copyprivate.f90 new file mode 100644 index 0000000000000..9b76a996ef3e1 --- /dev/null +++ b/flang/test/Lower/OpenMP/copyprivate.f90 @@ -0,0 +1,164 @@ +! Test COPYPRIVATE. +! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s + +!CHECK-DAG: func private @_copy_i64(%{{.*}}: !fir.ref, %{{.*}}: !fir.ref) +!CHECK-DAG: func private @_copy_f32(%{{.*}}: !fir.ref, %{{.*}}: !fir.ref) +!CHECK-DAG: func private @_copy_f64(%{{.*}}: !fir.ref, %{{.*}}: !fir.ref) +!CHECK-DAG: func private @_copy_z32(%{{.*}}: !fir.ref>, %{{.*}}: !fir.ref>) +!CHECK-DAG: func private @_copy_z64(%{{.*}}: !fir.ref>, %{{.*}}: !fir.ref>) +!CHECK-DAG: func private @_copy_l32(%{{.*}}: !fir.ref>, %{{.*}}: !fir.ref>) +!CHECK-DAG: func private @_copy_l64(%{{.*}}: !fir.ref>, %{{.*}}: !fir.ref>) +!CHECK-DAG: func private @_copy_c8x3(%{{.*}}: !fir.ref>, %{{.*}}: !fir.ref>) +!CHECK-DAG: func private @_copy_c8x8(%{{.*}}: !fir.ref>, %{{.*}}: !fir.ref>) +!CHECK-DAG: func private @_copy_c16x8(%{{.*}}: !fir.ref>, %{{.*}}: !fir.ref>) + +!CHECK-DAG: func private @_copy_box_Uxi32(%{{.*}}: !fir.ref>>, %{{.*}}: !fir.ref>>) +!CHECK-DAG: func private @_copy_10xi32(%{{.*}}: !fir.ref>, %{{.*}}: !fir.ref>) +!CHECK-DAG: func private @_copy_3x4xi32(%{{.*}}: !fir.ref>, %{{.*}}: !fir.ref>) +!CHECK-DAG: func private @_copy_10xf32(%{{.*}}: !fir.ref>, %{{.*}}: !fir.ref>) +!CHECK-DAG: func private @_copy_3x4xz32(%{{.*}}: !fir.ref>>, %{{.*}}: !fir.ref>>) +!CHECK-DAG: func private @_copy_10xl32(%{{.*}}: !fir.ref>>, %{{.*}}: !fir.ref>>) +!CHECK-DAG: func private @_copy_3xc8x8(%{{.*}}: !fir.ref>>, %{{.*}}: !fir.ref>>) +!CHECK-DAG: func private @_copy_3xc16x5(%{{.*}}: !fir.ref>>, %{{.*}}: !fir.ref>>) + +!CHECK-DAG: func private @_copy_rec__QFtest_dtTdt(%{{.*}}: !fir.ref>, %{{.*}}: !fir.ref>) +!CHECK-DAG: func private @_copy_box_heap_Uxi32(%{{.*}}: !fir.ref>>>, %{{.*}}: !fir.ref>>>) +!CHECK-DAG: func private @_copy_box_heap_i32(%{{.*}}: !fir.ref>>, %{{.*}}: !fir.ref>>) +!CHECK-DAG: func private @_copy_box_ptr_i32(%{{.*}}: !fir.ref>>, %{{.*}}: !fir.ref>>) +!CHECK-DAG: func private @_copy_box_ptr_Uxf32(%{{.*}}: !fir.ref>>>, %{{.*}}: !fir.ref>>>) +!CHECK-DAG: func private @_copy_box_heap_Uxc8x5(%{{.*}}: !fir.ref>>>>, %{{.*}}: !fir.ref>>>>) +!CHECK-DAG: func private @_copy_box_ptr_Uxc8x9(%{{.*}}: !fir.ref>>>>, %{{.*}}: !fir.ref>>>>) + +!CHECK-LABEL: func private @_copy_i32( +!CHECK-SAME: %[[ARG0:.*]]: !fir.ref, %[[ARG1:.*]]: !fir.ref) { +!CHECK-NEXT: %[[DST:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_copy_i32_dst"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK-NEXT: %[[SRC:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_copy_i32_src"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK-NEXT: %[[SRC_VAL:.*]] = fir.load %[[SRC]]#0 : !fir.ref +!CHECK-NEXT: hlfir.assign %[[SRC_VAL]] to %[[DST]]#0 temporary_lhs : i32, !fir.ref +!CHECK-NEXT: return +!CHECK-NEXT: } + +!CHECK-LABEL: func @_QPtest_tp +!CHECK: omp.parallel +!CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_tpEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[J:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_tpEj"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[K:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_tpEk"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: omp.single copyprivate(%[[I]]#0 -> @_copy_i32 : !fir.ref, %[[J]]#0 -> @_copy_i32 : !fir.ref, %[[K]]#0 -> @_copy_f32 : !fir.ref) +subroutine test_tp() + integer, save :: i, j + !$omp threadprivate(i, j) + real :: k + + k = 33.3 + !$omp parallel firstprivate(k) + !$omp single + i = 11 + j = 22 + !$omp end single copyprivate(i, j, k) + !$omp end parallel +end subroutine + +!CHECK-LABEL: func @_QPtest_scalar +!CHECK: omp.parallel +!CHECK: %[[I1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEi1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[I2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEi2"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[I3:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEi3"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[R1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEr1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[R2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEr2"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[C1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEc1"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[C2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEc2"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[L1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEl1"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[L2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEl2"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[S1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEs1"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[S2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEs2"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[S3:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEs3"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) +!CHECK: omp.single copyprivate(%[[I1]]#0 -> @_copy_i32 : !fir.ref, %[[I2]]#0 -> @_copy_i64 : !fir.ref, %[[I3]]#0 -> @_copy_i64 : !fir.ref, %[[R1]]#0 -> @_copy_f32 : !fir.ref, %[[R2]]#0 -> @_copy_f64 : !fir.ref, %[[C1]]#0 -> @_copy_z32 : !fir.ref>, %[[C2]]#0 -> @_copy_z64 : !fir.ref>, %[[L1]]#0 -> @_copy_l32 : !fir.ref>, %[[L2]]#0 -> @_copy_l64 : !fir.ref>, %[[S1]]#0 -> @_copy_c8x3 : !fir.ref>, %[[S2]]#0 -> @_copy_c8x8 : !fir.ref>, %[[S3]]#0 -> @_copy_c16x8 : !fir.ref>) +subroutine test_scalar() + integer(4) :: i1 + integer(8) :: i2, i3 + real(4) :: r1 + real(8) :: r2 + complex(4) :: c1 + complex(8) :: c2 + logical(4) :: l1 + logical(8) :: l2 + character(kind=1, len=3) :: s1 + character(kind=1, len=8) :: s2 + character(kind=2, len=8) :: s3 + + !$omp parallel private(i1, i2, i3, r1, r2, c1, c2, l1, l2, s1, s2, s3) + !$omp single + !$omp end single copyprivate(i1, i2, i3, r1, r2, c1, c2, l1, l2, s1, s2, s3) + !$omp end parallel +end subroutine + +!CHECK-LABEL: func @_QPtest_array +!CHECK: omp.parallel +!CHECK: %[[A:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEa"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) +!CHECK: %[[I1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi1"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[I2:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi2"} : (!fir.ref>, !fir.shape<2>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[I3:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi3"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) +!CHECK: %[[R1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEr1"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[C1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEc1"} : (!fir.ref>>, !fir.shape<2>) -> (!fir.ref>>, !fir.ref>>) +!CHECK: %[[L1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEl1"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) +!CHECK: %[[S1:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_arrayEs1"} : (!fir.ref>>, !fir.shape<1>, index) -> (!fir.ref>>, !fir.ref>>) +!CHECK: %[[S2:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_arrayEs2"} : (!fir.ref>>, !fir.shape<1>, index) -> (!fir.ref>>, !fir.ref>>) +!CHECK: %[[A_REF:.*]] = fir.alloca !fir.box> +!CHECK: fir.store %[[A]]#0 to %[[A_REF]] : !fir.ref>> +!CHECK: %[[I3_REF:.*]] = fir.alloca !fir.box> +!CHECK: fir.store %[[I3]]#0 to %[[I3_REF]] : !fir.ref>> +!CHECK: omp.single copyprivate(%[[A_REF]] -> @_copy_box_Uxi32 : !fir.ref>>, %[[I1]]#0 -> @_copy_10xi32 : !fir.ref>, %[[I2]]#0 -> @_copy_3x4xi32 : !fir.ref>, %[[I3_REF]] -> @_copy_box_Uxi32 : !fir.ref>>, %[[R1]]#0 -> @_copy_10xf32 : !fir.ref>, %[[C1]]#0 -> @_copy_3x4xz32 : !fir.ref>>, %[[L1]]#0 -> @_copy_10xl32 : !fir.ref>>, %[[S1]]#0 -> @_copy_3xc8x8 : !fir.ref>>, %[[S2]]#0 -> @_copy_3xc16x5 : !fir.ref>>) +subroutine test_array(a, n) + integer :: a(:), n + integer :: i1(10), i2(3, 4), i3(n) + real :: r1(10) + complex :: c1(3, 4) + logical :: l1(10) + character(8) :: s1(3) + character(kind=2, len=5) :: s2(3) + + !$omp parallel private(a, i1, i2, i3, r1, c1, l1, s1, s2) + !$omp single + !$omp end single copyprivate(a, i1, i2, i3, r1, c1, l1, s1, s2) + !$omp end parallel +end subroutine + +!CHECK-LABEL: func @_QPtest_dt +!CHECK: omp.parallel +!CHECK: %[[T:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_dtEt"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: omp.single copyprivate(%[[T]]#0 -> @_copy_rec__QFtest_dtTdt : !fir.ref>) +subroutine test_dt() + type dt + integer :: i + real :: r + end type + type(dt) :: t + + !$omp parallel private(t) + !$omp single + !$omp end single copyprivate(t) + !$omp end parallel +end subroutine + +!CHECK-LABEL: func @_QPtest_attr +!CHECK: omp.parallel +!CHECK: %[[I1:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_attrEi1"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +!CHECK: %[[I2:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_attrEi2"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) +!CHECK: %[[I3:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_attrEi3"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) +!CHECK: %[[R1:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_attrEr1"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +!CHECK: %[[C1:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_attrEc1"} : (!fir.ref>>>>) -> (!fir.ref>>>>, !fir.ref>>>>) +!CHECK: %[[C2:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_attrEc2"} : (!fir.ref>>>>) -> (!fir.ref>>>>, !fir.ref>>>>) +!CHECK: omp.single copyprivate(%[[I1]]#0 -> @_copy_box_heap_Uxi32 : !fir.ref>>>, %[[I2:.*]]#0 -> @_copy_box_heap_i32 : !fir.ref>>, %[[I3]]#0 -> @_copy_box_ptr_i32 : !fir.ref>>, %[[R1]]#0 -> @_copy_box_ptr_Uxf32 : !fir.ref>>>, %[[C1]]#0 -> @_copy_box_heap_Uxc8x5 : !fir.ref>>>>, %[[C2]]#0 -> @_copy_box_ptr_Uxc8x9 : !fir.ref>>>>) +subroutine test_attr() + integer, allocatable :: i1(:) + integer, allocatable :: i2 + integer, pointer :: i3 + real, pointer :: r1(:) + character(kind=1, len=5), allocatable :: c1(:) + character(kind=1, len=9), pointer :: c2(:) + + !$omp parallel private(i1, i2, i3, r1, c1, c2) + !$omp single + !$omp end single copyprivate(i1, i2, i3, r1, c1, c2) + !$omp end parallel +end subroutine diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 669104307fa0e..ab92c172c75ae 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1819,12 +1819,16 @@ class OpenMPIRBuilder { /// \param FiniCB Callback to finalize variable copies. /// \param IsNowait If false, a barrier is emitted. /// \param DidIt Local variable used as a flag to indicate 'single' thread + /// \param CPVars copyprivate variables. + /// \param CPFuncs copy functions to use for each copyprivate variable. /// /// \returns The insertion position *after* the single call. InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, - llvm::Value *DidIt); + llvm::Value *DidIt, + ArrayRef CPVars = {}, + ArrayRef CPFuncs = {}); /// Generator for '#omp master' /// diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index f6cf358119fb7..7abac0f660ef8 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -3992,7 +3992,8 @@ OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle( const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt) { + FinalizeCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt, + ArrayRef CPVars, ArrayRef CPFuncs) { if (!updateToLocation(Loc)) return Loc.IP; @@ -4015,17 +4016,33 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle( Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single); Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); + auto FiniCBWrapper = [&](InsertPointTy IP) { + FiniCB(IP); + + if (DidIt) + Builder.CreateStore(Builder.getInt32(1), DidIt); + }; + // generates the following: // if (__kmpc_single()) { // .... single region ... // __kmpc_end_single // } + // __kmpc_copyprivate // __kmpc_barrier - EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, + EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper, /*Conditional*/ true, /*hasFinalize*/ true); - if (!IsNowait) + + if (DidIt) { + for (size_t I = 0, E = CPVars.size(); I < E; ++I) + // NOTE BufSize is currently unused, so just pass 0. + createCopyPrivate(LocationDescription(Builder.saveIP(), Loc.DL), + /*BufSize=*/ConstantInt::get(Int64, 0), CPVars[I], + CPFuncs[I], DidIt); + // NOTE __kmpc_copyprivate already inserts a barrier + } else if (!IsNowait) createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, /* CheckCancelFlag */ false); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index e79d0bb2f65ae..0eb1039aa442c 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -3464,6 +3464,117 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) { EXPECT_EQ(ExitBarrier, nullptr); } +TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + AllocaInst *PrivAI = nullptr; + + BasicBlock *EntryBB = nullptr; + BasicBlock *ThenBB = nullptr; + + Value *CPVar = Builder.CreateAlloca(F->arg_begin()->getType()); + Builder.CreateStore(F->arg_begin(), CPVar); + + FunctionType *CopyFuncTy = FunctionType::get( + Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getPtrTy()}, false); + Function *CopyFunc = + Function::Create(CopyFuncTy, Function::PrivateLinkage, "copy_var", *M); + + Value *DidIt = Builder.CreateAlloca(Type::getInt32Ty(Builder.getContext())); + + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + if (AllocaIP.isSet()) + Builder.restoreIP(AllocaIP); + else + Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); + PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); + Builder.CreateStore(F->arg_begin(), PrivAI); + + llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); + llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); + EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); + + Builder.restoreIP(CodeGenIP); + + // collect some info for checks later + ThenBB = Builder.GetInsertBlock(); + EntryBB = ThenBB->getUniquePredecessor(); + + // simple instructions for body + Value *PrivLoad = + Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); + Builder.CreateICmpNE(F->arg_begin(), PrivLoad); + }; + + auto FiniCB = [&](InsertPointTy IP) { + BasicBlock *IPBB = IP.getBlock(); + EXPECT_NE(IPBB->end(), IP.getPoint()); + }; + + Builder.restoreIP(OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB, + /*IsNowait*/ false, DidIt, {CPVar}, + {CopyFunc})); + Value *EntryBBTI = EntryBB->getTerminator(); + EXPECT_NE(EntryBBTI, nullptr); + EXPECT_TRUE(isa(EntryBBTI)); + BranchInst *EntryBr = cast(EntryBB->getTerminator()); + EXPECT_TRUE(EntryBr->isConditional()); + EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); + BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); + EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); + + CmpInst *CondInst = cast(EntryBr->getCondition()); + EXPECT_TRUE(isa(CondInst->getOperand(0))); + + CallInst *SingleEntryCI = cast(CondInst->getOperand(0)); + EXPECT_EQ(SingleEntryCI->arg_size(), 2U); + EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single"); + EXPECT_TRUE(isa(SingleEntryCI->getArgOperand(0))); + + CallInst *SingleEndCI = nullptr; + for (auto &FI : *ThenBB) { + Instruction *Cur = &FI; + if (isa(Cur)) { + SingleEndCI = cast(Cur); + if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single") + break; + SingleEndCI = nullptr; + } + } + EXPECT_NE(SingleEndCI, nullptr); + EXPECT_EQ(SingleEndCI->arg_size(), 2U); + EXPECT_TRUE(isa(SingleEndCI->getArgOperand(0))); + EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1)); + + CallInst *CopyPrivateCI = nullptr; + bool FoundBarrier = false; + for (auto &FI : *ExitBB) { + Instruction *Cur = &FI; + if (auto *CI = dyn_cast(Cur)) { + if (CI->getCalledFunction()->getName() == "__kmpc_barrier") + FoundBarrier = true; + else if (CI->getCalledFunction()->getName() == "__kmpc_copyprivate") + CopyPrivateCI = CI; + } + } + EXPECT_FALSE(FoundBarrier); + EXPECT_NE(CopyPrivateCI, nullptr); + EXPECT_EQ(CopyPrivateCI->arg_size(), 6U); + EXPECT_TRUE(isa(CopyPrivateCI->getArgOperand(3))); + EXPECT_EQ(CopyPrivateCI->getArgOperand(3), CPVar); + EXPECT_TRUE(isa(CopyPrivateCI->getArgOperand(4))); + EXPECT_EQ(CopyPrivateCI->getArgOperand(4), CopyFunc); + EXPECT_TRUE(isa(CopyPrivateCI->getArgOperand(5))); + LoadInst *DidItLI = cast(CopyPrivateCI->getArgOperand(5)); + EXPECT_EQ(DidItLI->getOperand(0), DidIt); +} + TEST_F(OpenMPIRBuilderTest, OMPAtomicReadFlt) { OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.initialize(); diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index d614f2666a85a..088327c35b44f 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -387,10 +387,16 @@ def SingleOp : OpenMP_Op<"single", [AttrSizedOperandSegments]> { master thread), in the context of its implicit task. The other threads in the team, which do not execute the block, wait at an implicit barrier at the end of the single construct unless a nowait clause is specified. + + If copyprivate variables and functions are specified, then each thread + variable is updated with the variable value of the thread that executed + the single region, using the specified copy functions. }]; let arguments = (ins Variadic:$allocate_vars, Variadic:$allocators_vars, + Variadic:$copyprivate_vars, + OptionalAttr:$copyprivate_funcs, UnitAttr:$nowait); let regions = (region AnyRegion:$region); @@ -402,6 +408,10 @@ def SingleOp : OpenMP_Op<"single", [AttrSizedOperandSegments]> { $allocators_vars, type($allocators_vars) ) `)` |`nowait` $nowait + |`copyprivate` `(` + custom( + $copyprivate_vars, type($copyprivate_vars), $copyprivate_funcs + ) `)` ) $region attr-dict }]; let hasVerifier = 1; diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 2d3be76c65e81..b64391fb248ba 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -505,6 +505,107 @@ static LogicalResult verifyReductionVarList(Operation *op, return success(); } +//===----------------------------------------------------------------------===// +// Parser, printer and verifier for CopyPrivateVarList +//===----------------------------------------------------------------------===// + +/// copyprivate-entry-list ::= copyprivate-entry +/// | copyprivate-entry-list `,` copyprivate-entry +/// copyprivate-entry ::= ssa-id `->` symbol-ref `:` type +static ParseResult parseCopyPrivateVarList( + OpAsmParser &parser, + SmallVectorImpl &operands, + SmallVectorImpl &types, ArrayAttr ©PrivateSymbols) { + SmallVector copyPrivateFuncsVec; + if (failed(parser.parseCommaSeparatedList([&]() { + if (parser.parseOperand(operands.emplace_back()) || + parser.parseArrow() || + parser.parseAttribute(copyPrivateFuncsVec.emplace_back()) || + parser.parseColonType(types.emplace_back())) + return failure(); + return success(); + }))) + return failure(); + SmallVector copyPrivateFuncs(copyPrivateFuncsVec.begin(), + copyPrivateFuncsVec.end()); + copyPrivateSymbols = ArrayAttr::get(parser.getContext(), copyPrivateFuncs); + return success(); +} + +/// Print CopyPrivate clause +static void printCopyPrivateVarList(OpAsmPrinter &p, Operation *op, + OperandRange copyPrivateVars, + TypeRange copyPrivateTypes, + std::optional copyPrivateFuncs) { + assert(copyPrivateFuncs.has_value() || copyPrivateVars.empty()); + for (unsigned i = 0, e = copyPrivateVars.size(); i < e; ++i) { + if (i != 0) + p << ", "; + p << copyPrivateVars[i] << " -> " << (*copyPrivateFuncs)[i] << " : " + << copyPrivateTypes[i]; + } +} + +/// Verifies CopyPrivate Clause +static LogicalResult +verifyCopyPrivateVarList(Operation *op, OperandRange copyPrivateVars, + std::optional copyPrivateFuncs) { + if (!copyPrivateVars.empty()) { + if (!copyPrivateFuncs || copyPrivateFuncs->size() != copyPrivateVars.size()) + return op->emitOpError() << "expected as many copyPrivate functions as " + "copyPrivate variables"; + } else { + if (copyPrivateFuncs) + return op->emitOpError() << "unexpected copyPrivate functions"; + return success(); + } + + for (auto args : llvm::zip(copyPrivateVars, *copyPrivateFuncs)) { + auto symbolRef = llvm::cast(std::get<1>(args)); + std::optional> + funcOp; + if (mlir::func::FuncOp mlirFuncOp = + SymbolTable::lookupNearestSymbolFrom(op, + symbolRef)) + funcOp = mlirFuncOp; + else if (mlir::LLVM::LLVMFuncOp llvmFuncOp = + SymbolTable::lookupNearestSymbolFrom( + op, symbolRef)) + funcOp = llvmFuncOp; + + auto getNumArguments = [&] { + return std::visit([](auto &f) { return f.getNumArguments(); }, *funcOp); + }; + + auto getArgumentType = [&](unsigned i) { + return std::visit([i](auto &f) { return f.getArgumentTypes()[i]; }, + *funcOp); + }; + + if (!funcOp) + return op->emitOpError() << "expected symbol reference " << symbolRef + << " to point to a copy function"; + + if (getNumArguments() != 2) + return op->emitOpError() + << "expected copy function " << symbolRef << " to have 2 operands"; + + Type argTy = getArgumentType(0); + if (argTy != getArgumentType(1)) + return op->emitOpError() << "expected copy function " << symbolRef + << " arguments to have the same type"; + + Type varType = std::get<0>(args).getType(); + if (argTy != varType) + return op->emitOpError() + << "expected copy function arguments' type (" << argTy + << ") to be the same as copyprivate variable's type (" << varType + << ")"; + } + + return success(); +} + //===----------------------------------------------------------------------===// // Parser, printer and verifier for DependVarList //===----------------------------------------------------------------------===// @@ -1072,7 +1173,8 @@ LogicalResult SingleOp::verify() { return emitError( "expected equal sizes for allocate and allocator variables"); - return success(); + return verifyCopyPrivateVarList(*this, getCopyprivateVars(), + getCopyprivateFuncs()); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 23e101f1e4527..964a1aeb5a00d 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -656,8 +656,26 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, moduleTranslation, bodyGenStatus); }; auto finiCB = [&](InsertPointTy codeGenIP) {}; + + // Handle copyprivate + Operation::operand_range cpVars = singleOp.getCopyprivateVars(); + std::optional cpFuncs = singleOp.getCopyprivateFuncs(); + llvm::SmallVector llvmCPVars; + llvm::SmallVector llvmCPFuncs; + for (size_t i = 0, e = cpVars.size(); i < e; ++i) { + llvmCPVars.push_back(moduleTranslation.lookupValue(cpVars[i])); + auto llvmFuncOp = SymbolTable::lookupNearestSymbolFrom( + singleOp, cast((*cpFuncs)[i])); + llvmCPFuncs.push_back( + moduleTranslation.lookupFunction(llvmFuncOp.getName())); + } + llvm::Value *didIt = nullptr; + if (!llvmCPVars.empty()) + didIt = builder.CreateAlloca(llvm::Type::getInt32Ty(builder.getContext())); + builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle( - ompLoc, bodyCB, finiCB, singleOp.getNowait(), /*DidIt=*/nullptr)); + ompLoc, bodyCB, finiCB, singleOp.getNowait(), didIt, llvmCPVars, + llvmCPFuncs)); return bodyGenStatus; } diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 2b0e86ddd22bb..2089cbccc9c2d 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -1284,7 +1284,53 @@ func.func @omp_single(%data_var : memref) -> () { // expected-error @below {{expected equal sizes for allocate and allocator variables}} "omp.single" (%data_var) ({ omp.barrier - }) {operandSegmentSizes = array} : (memref) -> () + }) {operandSegmentSizes = array} : (memref) -> () + return +} + +// ----- + +func.func @omp_single_copyprivate(%data_var : memref) -> () { + // expected-error @below {{expected symbol reference @copy_func to point to a copy function}} + omp.single copyprivate(%data_var -> @copy_func : memref) { + omp.barrier + } + return +} + +// ----- + +func.func private @copy_func(memref) + +func.func @omp_single_copyprivate(%data_var : memref) -> () { + // expected-error @below {{expected copy function @copy_func to have 2 operands}} + omp.single copyprivate(%data_var -> @copy_func : memref) { + omp.barrier + } + return +} + +// ----- + +func.func private @copy_func(memref, memref) + +func.func @omp_single_copyprivate(%data_var : memref) -> () { + // expected-error @below {{expected copy function @copy_func arguments to have the same type}} + omp.single copyprivate(%data_var -> @copy_func : memref) { + omp.barrier + } + return +} + +// ----- + +func.func private @copy_func(memref, memref) + +func.func @omp_single_copyprivate(%data_var : memref) -> () { + // expected-error @below {{expected copy function arguments' type ('memref') to be the same as copyprivate variable's type ('memref')}} + omp.single copyprivate(%data_var -> @copy_func : memref) { + omp.barrier + } return } diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 3d4f6435572f7..af7879652a1a9 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -1577,6 +1577,23 @@ func.func @omp_single_multiple_blocks() { return } +func.func private @copy_i32(memref, memref) + +// CHECK-LABEL: func @omp_single_copyprivate +func.func @omp_single_copyprivate(%data_var: memref) { + omp.parallel { + // CHECK: omp.single copyprivate(%{{.*}} -> @copy_i32 : memref) { + omp.single copyprivate(%data_var -> @copy_i32 : memref) { + "test.payload"() : () -> () + // CHECK: omp.terminator + omp.terminator + } + // CHECK: omp.terminator + omp.terminator + } + return +} + // CHECK-LABEL: @omp_task // CHECK-SAME: (%[[bool_var:.*]]: i1, %[[i64_var:.*]]: i64, %[[i32_var:.*]]: i32, %[[data_var:.*]]: memref) func.func @omp_task(%bool_var: i1, %i64_var: i64, %i32_var: i32, %data_var: memref) { diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 29baa84e7e19d..8a3d5d6407659 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -2165,6 +2165,38 @@ llvm.func @single_nowait(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // ----- +llvm.func @copy_i32(!llvm.ptr, !llvm.ptr) +llvm.func @copy_f32(!llvm.ptr, !llvm.ptr) + +// CHECK-LABEL: @single_copyprivate +// CHECK-SAME: (ptr %[[ip:.*]], ptr %[[fp:.*]]) +llvm.func @single_copyprivate(%ip: !llvm.ptr, %fp: !llvm.ptr) { + // CHECK: call i32 @__kmpc_single + omp.single copyprivate(%ip -> @copy_i32 : !llvm.ptr, %fp -> @copy_f32 : !llvm.ptr) { + // CHECK: %[[i:.*]] = load i32, ptr %[[ip]] + %i = llvm.load %ip : !llvm.ptr -> i32 + // CHECK: %[[i2:.*]] = add i32 %[[i]], %[[i]] + %i2 = llvm.add %i, %i : i32 + // CHECK: store i32 %[[i2]], ptr %[[ip]] + llvm.store %i2, %ip : i32, !llvm.ptr + // CHECK: %[[f:.*]] = load float, ptr %[[fp]] + %f = llvm.load %fp : !llvm.ptr -> f32 + // CHECK: %[[f2:.*]] = fadd float %[[f]], %[[f]] + %f2 = llvm.fadd %f, %f : f32 + // CHECK: store float %[[f2]], ptr %[[fp]] + llvm.store %f2, %fp : f32, !llvm.ptr + // CHECK: call void @__kmpc_end_single + // CHECK: call void @__kmpc_copyprivate({{.*}}, ptr %[[ip]], ptr @copy_i32, {{.*}}) + // CHECK: call void @__kmpc_copyprivate({{.*}}, ptr %[[fp]], ptr @copy_f32, {{.*}}) + // CHECK-NOT: call void @__kmpc_barrier + omp.terminator + } + // CHECK: ret void + llvm.return +} + +// ----- + // CHECK: @_QFsubEx = internal global i32 undef // CHECK: @_QFsubEx.cache = common global ptr null