From fb91129401f61b332fc1147e5a81d553abd7658a Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:25:33 +0000 Subject: [PATCH 01/11] Add fuse directive patch --- clang/include/clang-c/Index.h | 4 + clang/include/clang/AST/RecursiveASTVisitor.h | 3 + clang/include/clang/AST/StmtOpenMP.h | 99 +- .../clang/Basic/DiagnosticSemaKinds.td | 8 + clang/include/clang/Basic/StmtNodes.td | 1 + clang/include/clang/Sema/SemaOpenMP.h | 27 + .../include/clang/Serialization/ASTBitCodes.h | 1 + clang/lib/AST/StmtOpenMP.cpp | 25 + clang/lib/AST/StmtPrinter.cpp | 5 + clang/lib/AST/StmtProfile.cpp | 4 + clang/lib/Basic/OpenMPKinds.cpp | 2 +- clang/lib/CodeGen/CGStmt.cpp | 3 + clang/lib/CodeGen/CGStmtOpenMP.cpp | 8 + clang/lib/CodeGen/CodeGenFunction.h | 1 + clang/lib/Sema/SemaExceptionSpec.cpp | 1 + clang/lib/Sema/SemaOpenMP.cpp | 600 +++++++ clang/lib/Sema/TreeTransform.h | 11 + clang/lib/Serialization/ASTReaderStmt.cpp | 11 + clang/lib/Serialization/ASTWriterStmt.cpp | 6 + clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 1 + clang/test/OpenMP/fuse_ast_print.cpp | 278 +++ clang/test/OpenMP/fuse_codegen.cpp | 1511 +++++++++++++++++ clang/test/OpenMP/fuse_messages.cpp | 76 + clang/tools/libclang/CIndex.cpp | 7 + clang/tools/libclang/CXCursor.cpp | 3 + llvm/include/llvm/Frontend/OpenMP/OMP.td | 4 + .../runtime/test/transform/fuse/foreach.cpp | 192 +++ openmp/runtime/test/transform/fuse/intfor.c | 50 + .../runtime/test/transform/fuse/iterfor.cpp | 194 +++ .../fuse/parallel-wsloop-collapse-foreach.cpp | 208 +++ .../fuse/parallel-wsloop-collapse-intfor.c | 45 + 31 files changed, 3387 insertions(+), 2 deletions(-) create mode 100644 clang/test/OpenMP/fuse_ast_print.cpp create mode 100644 clang/test/OpenMP/fuse_codegen.cpp create mode 100644 clang/test/OpenMP/fuse_messages.cpp create mode 100644 openmp/runtime/test/transform/fuse/foreach.cpp create mode 100644 openmp/runtime/test/transform/fuse/intfor.c create mode 100644 openmp/runtime/test/transform/fuse/iterfor.cpp create mode 100644 openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp create mode 100644 openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index e4cb4327fbaac..148b89ab9cfa4 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2162,6 +2162,10 @@ enum CXCursorKind { */ CXCursor_OMPStripeDirective = 310, + /** OpenMP fuse directive + */ + CXCursor_OMPFuseDirective = 318, + /** OpenACC Compute Construct. */ CXCursor_OpenACCComputeConstruct = 320, diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 5cb2f57edffe4..918216e8df4aa 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3090,6 +3090,9 @@ DEF_TRAVERSE_STMT(OMPUnrollDirective, DEF_TRAVERSE_STMT(OMPReverseDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) +DEF_TRAVERSE_STMT(OMPFuseDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + DEF_TRAVERSE_STMT(OMPInterchangeDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index e2fd2114026f7..cb8bb91f4768c 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -962,6 +962,9 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { /// Number of loops generated by this loop transformation. unsigned NumGeneratedLoops = 0; + /// Number of top level canonical loop nests generated by this loop + /// transformation + unsigned NumGeneratedLoopNests = 0; protected: explicit OMPLoopTransformationDirective(StmtClass SC, @@ -973,6 +976,9 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { /// Set the number of loops generated by this loop transformation. void setNumGeneratedLoops(unsigned Num) { NumGeneratedLoops = Num; } + /// Set the number of top level canonical loop nests generated by this loop + /// transformation + void setNumGeneratedLoopNests(unsigned Num) { NumGeneratedLoopNests = Num; } public: /// Return the number of associated (consumed) loops. @@ -981,6 +987,10 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { /// Return the number of loops generated by this loop transformation. unsigned getNumGeneratedLoops() const { return NumGeneratedLoops; } + /// Return the number of top level canonical loop nests generated by this loop + /// transformation + unsigned getNumGeneratedLoopNests() const { return NumGeneratedLoopNests; } + /// Get the de-sugared statements after the loop transformation. /// /// Might be nullptr if either the directive generates no loops and is handled @@ -995,7 +1005,8 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { Stmt::StmtClass C = T->getStmtClass(); return C == OMPTileDirectiveClass || C == OMPUnrollDirectiveClass || C == OMPReverseDirectiveClass || C == OMPInterchangeDirectiveClass || - C == OMPStripeDirectiveClass; + C == OMPStripeDirectiveClass || + C == OMPFuseDirectiveClass; } }; @@ -5562,6 +5573,7 @@ class OMPTileDirective final : public OMPLoopTransformationDirective { llvm::omp::OMPD_tile, StartLoc, EndLoc, NumLoops) { setNumGeneratedLoops(2 * NumLoops); + setNumGeneratedLoopNests(1); } void setPreInits(Stmt *PreInits) { @@ -5793,6 +5805,7 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective { llvm::omp::OMPD_reverse, StartLoc, EndLoc, NumLoops) { setNumGeneratedLoops(NumLoops); + setNumGeneratedLoopNests(1); } void setPreInits(Stmt *PreInits) { @@ -5865,6 +5878,7 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective { llvm::omp::OMPD_interchange, StartLoc, EndLoc, NumLoops) { setNumGeneratedLoops(NumLoops); + setNumGeneratedLoopNests(1); } void setPreInits(Stmt *PreInits) { @@ -5915,6 +5929,89 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective { } }; +/// Represents the '#pragma omp fuse' loop transformation directive +/// +/// \code{c} +/// #pragma omp fuse +/// { +/// for(int i = 0; i < m1; ++i) {...} +/// for(int j = 0; j < m2; ++j) {...} +/// ... +/// } +/// \endcode + +class OMPFuseDirective final : public OMPLoopTransformationDirective { + friend class ASTStmtReader; + friend class OMPExecutableDirective; + + // Offsets of child members. + enum { + PreInitsOffset = 0, + TransformedStmtOffset, + }; + + explicit OMPFuseDirective(SourceLocation StartLoc, SourceLocation EndLoc, + unsigned NumLoops) + : OMPLoopTransformationDirective(OMPFuseDirectiveClass, + llvm::omp::OMPD_fuse, StartLoc, EndLoc, + NumLoops) { + // This default initialization assumes simple loop fusion. + // If a 'looprange' clause is specified, these values must be explicitly set + setNumGeneratedLoopNests(1); + setNumGeneratedLoops(NumLoops); + } + + void setPreInits(Stmt *PreInits) { + Data->getChildren()[PreInitsOffset] = PreInits; + } + + void setTransformedStmt(Stmt *S) { + Data->getChildren()[TransformedStmtOffset] = S; + } + +public: + /// Create a new AST node representation for #pragma omp fuse' + /// + /// \param C Context of the AST + /// \param StartLoc Location of the introducer (e.g the 'omp' token) + /// \param EndLoc Location of the directive's end (e.g the tok::eod) + /// \param Clauses The directive's clauses + /// \param NumLoops Number of total affected loops + /// \param NumLoopNests Number of affected top level canonical loops + /// (number of items in the 'looprange' clause if present) + /// \param AssociatedStmt The outermost associated loop + /// \param TransformedStmt The loop nest after fusion, or nullptr in + /// dependent + /// \param PreInits Helper preinits statements for the loop nest + static OMPFuseDirective *Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation EndLoc, + ArrayRef Clauses, + unsigned NumLoops, unsigned NumLoopNests, + Stmt *AssociatedStmt, Stmt *TransformedStmt, + Stmt *PreInits); + + /// Build an empty '#pragma omp fuse' AST node for deserialization + /// + /// \param C Context of the AST + /// \param NumClauses Number of clauses to allocate + /// \param NumLoops Number of associated loops to allocate + static OMPFuseDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses, + unsigned NumLoops); + + /// Gets the associated loops after the transformation. This is the de-sugared + /// replacement or nulltpr in dependent contexts. + Stmt *getTransformedStmt() const { + return Data->getChildren()[TransformedStmtOffset]; + } + + /// Return preinits statement. + Stmt *getPreInits() const { return Data->getChildren()[PreInitsOffset]; } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPFuseDirectiveClass; + } +}; + /// This represents '#pragma omp scan' directive. /// /// \code diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 979ff60b73b75..fe9ca29038a1f 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11612,6 +11612,14 @@ def note_omp_implicit_dsa : Note< "implicitly determined as %0">; def err_omp_loop_var_dsa : Error< "loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">; +def warn_omp_different_loop_ind_var_types : Warning < + "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">; +def err_omp_not_canonical_loop : Error < + "loop after '#pragma omp %0' is not in canonical form">; +def err_omp_not_a_loop_sequence : Error < + "statement after '#pragma omp %0' must be a loop sequence containing canonical loops or loop-generating constructs">; +def err_omp_empty_loop_sequence : Error < + "loop sequence after '#pragma omp %0' must contain at least 1 canonical loop or loop-generating construct">; def err_omp_not_for : Error< "%select{statement after '#pragma omp %1' must be a for loop|" "expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">; diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index c9c173f5c7469..45d1a813e4b1f 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -233,6 +233,7 @@ def OMPStripeDirective : StmtNode; def OMPUnrollDirective : StmtNode; def OMPReverseDirective : StmtNode; def OMPInterchangeDirective : StmtNode; +def OMPFuseDirective : StmtNode; def OMPForDirective : StmtNode; def OMPForSimdDirective : StmtNode; def OMPSectionsDirective : StmtNode; diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index 7b169f56b6807..ea21377a8db9c 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -457,6 +457,13 @@ class SemaOpenMP : public SemaBase { Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc); + + /// Called on well-formed '#pragma omp fuse' after parsing of its + /// clauses and the associated statement. + StmtResult ActOnOpenMPFuseDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp for' after parsing /// of the associated statement. StmtResult @@ -1481,6 +1488,26 @@ class SemaOpenMP : public SemaBase { SmallVectorImpl &LoopHelpers, Stmt *&Body, SmallVectorImpl> &OriginalInits); + /// Analyzes and checks a loop sequence for use by a loop transformation + /// + /// \param Kind The loop transformation directive kind. + /// \param NumLoops [out] Number of total canonical loops + /// \param LoopSeqSize [out] Number of top level canonical loops + /// \param LoopHelpers [out] The multiple loop analyses results. + /// \param LoopStmts [out] The multiple Stmt of each For loop. + /// \param OriginalInits [out] The multiple collection of statements and + /// declarations that must have been executed/declared + /// before entering the loop. + /// \param Context + /// \return Whether there was an absence of errors or not + bool checkTransformableLoopSequence( + OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize, + unsigned &NumLoops, + SmallVectorImpl &LoopHelpers, + SmallVectorImpl &ForStmts, + SmallVectorImpl> &OriginalInits, + ASTContext &Context); + /// Helper to keep information about the current `omp begin/end declare /// variant` nesting. struct OMPDeclareVariantScope { diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 9d265f27b8e31..83b73554d693c 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1948,6 +1948,7 @@ enum StmtCode { STMT_OMP_UNROLL_DIRECTIVE, STMT_OMP_REVERSE_DIRECTIVE, STMT_OMP_INTERCHANGE_DIRECTIVE, + STMT_OMP_FUSE_DIRECTIVE, STMT_OMP_FOR_DIRECTIVE, STMT_OMP_FOR_SIMD_DIRECTIVE, STMT_OMP_SECTIONS_DIRECTIVE, diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 2eeb5e45ab511..276e43ec9f7d5 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -456,6 +456,8 @@ OMPUnrollDirective::Create(const ASTContext &C, SourceLocation StartLoc, auto *Dir = createDirective( C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc); Dir->setNumGeneratedLoops(NumGeneratedLoops); + // The number of generated loops and loop nests during unroll matches + Dir->setNumGeneratedLoopNests(NumGeneratedLoops); Dir->setTransformedStmt(TransformedStmt); Dir->setPreInits(PreInits); return Dir; @@ -508,6 +510,29 @@ OMPInterchangeDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses, SourceLocation(), SourceLocation(), NumLoops); } +OMPFuseDirective *OMPFuseDirective::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + ArrayRef Clauses, unsigned NumLoops, unsigned NumLoopNests, + Stmt *AssociatedStmt, Stmt *TransformedStmt, Stmt *PreInits) { + + OMPFuseDirective *Dir = createDirective( + C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc, + NumLoops); + Dir->setTransformedStmt(TransformedStmt); + Dir->setPreInits(PreInits); + Dir->setNumGeneratedLoopNests(NumLoopNests); + Dir->setNumGeneratedLoops(NumLoops); + return Dir; +} + +OMPFuseDirective *OMPFuseDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses, + unsigned NumLoops) { + return createEmptyDirective( + C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1, + SourceLocation(), SourceLocation(), NumLoops); +} + OMPForSimdDirective * OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 28317911d825b..4f57c63154da0 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -790,6 +790,11 @@ void StmtPrinter::VisitOMPInterchangeDirective(OMPInterchangeDirective *Node) { PrintOMPExecutableDirective(Node); } +void StmtPrinter::VisitOMPFuseDirective(OMPFuseDirective *Node) { + Indent() << "#pragma omp fuse"; + PrintOMPExecutableDirective(Node); +} + void StmtPrinter::VisitOMPForDirective(OMPForDirective *Node) { Indent() << "#pragma omp for"; PrintOMPExecutableDirective(Node); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index c61450e19f1b6..c5d1d5b48508e 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -1026,6 +1026,10 @@ void StmtProfiler::VisitOMPInterchangeDirective( VisitOMPLoopTransformationDirective(S); } +void StmtProfiler::VisitOMPFuseDirective(const OMPFuseDirective *S) { + VisitOMPLoopTransformationDirective(S); +} + void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) { VisitOMPLoopDirective(S); } diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index a451fc7c01841..d172450512f13 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -702,7 +702,7 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) { bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) { return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse || - DKind == OMPD_interchange || DKind == OMPD_stripe; + DKind == OMPD_interchange || DKind == OMPD_stripe || DKind == OMPD_fuse; } bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) { diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 8742f8e0fc04a..aa12d62d1b865 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -234,6 +234,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef Attrs) { case Stmt::OMPInterchangeDirectiveClass: EmitOMPInterchangeDirective(cast(*S)); break; + case Stmt::OMPFuseDirectiveClass: + EmitOMPFuseDirective(cast(*S)); + break; case Stmt::OMPForDirectiveClass: EmitOMPForDirective(cast(*S)); break; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index d9195d749e056..cf03d5d3d88a3 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -198,6 +198,8 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { } else if (const auto *Interchange = dyn_cast(&S)) { PreInits = Interchange->getPreInits(); + } else if (const auto *Fuse = dyn_cast(&S)) { + PreInits = Fuse->getPreInits(); } else { llvm_unreachable("Unknown loop-based directive kind."); } @@ -2922,6 +2924,12 @@ void CodeGenFunction::EmitOMPInterchangeDirective( EmitStmt(S.getTransformedStmt()); } +void CodeGenFunction::EmitOMPFuseDirective(const OMPFuseDirective &S) { + // Emit the de-sugared statement + OMPTransformDirectiveScopeRAII FuseScope(*this, &S); + EmitStmt(S.getTransformedStmt()); +} + void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index a5ab9df01dba9..fe753e5b688b1 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3855,6 +3855,7 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitOMPUnrollDirective(const OMPUnrollDirective &S); void EmitOMPReverseDirective(const OMPReverseDirective &S); void EmitOMPInterchangeDirective(const OMPInterchangeDirective &S); + void EmitOMPFuseDirective(const OMPFuseDirective &S); void EmitOMPForDirective(const OMPForDirective &S); void EmitOMPForSimdDirective(const OMPForSimdDirective &S); void EmitOMPScopeDirective(const OMPScopeDirective &S); diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 0a6cea8869c14..3eb59156c04af 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1493,6 +1493,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::OMPUnrollDirectiveClass: case Stmt::OMPReverseDirectiveClass: case Stmt::OMPInterchangeDirectiveClass: + case Stmt::OMPFuseDirectiveClass: case Stmt::OMPSingleDirectiveClass: case Stmt::OMPTargetDataDirectiveClass: case Stmt::OMPTargetDirectiveClass: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 00f4658180807..84ac9587bd54d 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4404,6 +4404,7 @@ void SemaOpenMP::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, case OMPD_unroll: case OMPD_reverse: case OMPD_interchange: + case OMPD_fuse: case OMPD_assume: break; default: @@ -6221,6 +6222,10 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective( Res = ActOnOpenMPInterchangeDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); break; + case OMPD_fuse: + Res = + ActOnOpenMPFuseDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); + break; case OMPD_for: Res = ActOnOpenMPForDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); @@ -14230,6 +14235,8 @@ bool SemaOpenMP::checkTransformableLoopNest( DependentPreInits = Dir->getPreInits(); else if (auto *Dir = dyn_cast(Transform)) DependentPreInits = Dir->getPreInits(); + else if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); else llvm_unreachable("Unhandled loop transformation"); @@ -14240,6 +14247,265 @@ bool SemaOpenMP::checkTransformableLoopNest( return Result; } +class NestedLoopCounterVisitor + : public clang::RecursiveASTVisitor { +public: + explicit NestedLoopCounterVisitor() : NestedLoopCount(0) {} + + bool VisitForStmt(clang::ForStmt *FS) { + ++NestedLoopCount; + return true; + } + + bool VisitCXXForRangeStmt(clang::CXXForRangeStmt *FRS) { + ++NestedLoopCount; + return true; + } + + unsigned getNestedLoopCount() const { return NestedLoopCount; } + +private: + unsigned NestedLoopCount; +}; + +bool SemaOpenMP::checkTransformableLoopSequence( + OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize, + unsigned &NumLoops, + SmallVectorImpl &LoopHelpers, + SmallVectorImpl &ForStmts, + SmallVectorImpl> &OriginalInits, + ASTContext &Context) { + + // Checks whether the given statement is a compound statement + VarsWithInheritedDSAType TmpDSA; + if (!isa(AStmt)) { + Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence) + << getOpenMPDirectiveName(Kind); + return false; + } + // Callback for updating pre-inits in case there are even more + // loop-sequence-generating-constructs inside of the main compound stmt + auto OnTransformationCallback = + [&OriginalInits](OMPLoopBasedDirective *Transform) { + Stmt *DependentPreInits; + if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); + else if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); + else if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); + else if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); + else if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); + else + llvm_unreachable("Unhandled loop transformation"); + + appendFlattenedStmtList(OriginalInits.back(), DependentPreInits); + }; + + // Number of top level canonical loop nests observed (And acts as index) + LoopSeqSize = 0; + // Number of total observed loops + NumLoops = 0; + + // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows + // the grammar: + // + // canonical-loop-sequence: + // { + // loop-sequence+ + // } + // where loop-sequence can be any of the following: + // 1. canonical-loop-sequence + // 2. loop-nest + // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective) + // + // To recognise and traverse this structure the following helper functions + // have been defined. handleLoopSequence serves as the recurisve entry point + // and tries to match the input AST to the canonical loop sequence grammar + // structure + + auto NLCV = NestedLoopCounterVisitor(); + // Helper functions to validate canonical loop sequence grammar is valid + auto isLoopSequenceDerivation = [](auto *Child) { + return isa(Child) || isa(Child) || + isa(Child); + }; + auto isLoopGeneratingStmt = [](auto *Child) { + return isa(Child); + }; + + // Helper Lambda to handle storing initialization and body statements for both + // ForStmt and CXXForRangeStmt and checks for any possible mismatch between + // induction variables types + QualType BaseInductionVarType; + auto storeLoopStatements = [&OriginalInits, &ForStmts, &BaseInductionVarType, + this, &Context](Stmt *LoopStmt) { + if (auto *For = dyn_cast(LoopStmt)) { + OriginalInits.back().push_back(For->getInit()); + ForStmts.push_back(For); + // Extract induction variable + if (auto *InitStmt = dyn_cast_or_null(For->getInit())) { + if (auto *InitDecl = dyn_cast(InitStmt->getSingleDecl())) { + QualType InductionVarType = InitDecl->getType().getCanonicalType(); + + // Compare with first loop type + if (BaseInductionVarType.isNull()) { + BaseInductionVarType = InductionVarType; + } else if (!Context.hasSameType(BaseInductionVarType, + InductionVarType)) { + Diag(InitDecl->getBeginLoc(), + diag::warn_omp_different_loop_ind_var_types) + << getOpenMPDirectiveName(OMPD_fuse) << BaseInductionVarType + << InductionVarType; + } + } + } + + } else { + assert(isa(LoopStmt) && + "Expected canonical for or range-based for loops."); + auto *CXXFor = dyn_cast(LoopStmt); + OriginalInits.back().push_back(CXXFor->getBeginStmt()); + ForStmts.push_back(CXXFor); + } + }; + // Helper lambda functions to encapsulate the processing of different + // derivations of the canonical loop sequence grammar + // + // Modularized code for handling loop generation and transformations + auto handleLoopGeneration = [&storeLoopStatements, &LoopHelpers, + &OriginalInits, &LoopSeqSize, &NumLoops, Kind, + &TmpDSA, &OnTransformationCallback, + this](Stmt *Child) { + auto LoopTransform = dyn_cast(Child); + Stmt *TransformedStmt = LoopTransform->getTransformedStmt(); + unsigned NumGeneratedLoopNests = LoopTransform->getNumGeneratedLoopNests(); + + // Handle the case where transformed statement is not available due to + // dependent contexts + if (!TransformedStmt) { + if (NumGeneratedLoopNests > 0) + return true; + // Unroll full + else { + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; + } + } + // Handle loop transformations with multiple loop nests + // Unroll full + if (NumGeneratedLoopNests <= 0) { + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; + // Future loop transformations that generate multiple canonical loops + } else if (NumGeneratedLoopNests > 1) { + llvm_unreachable("Multiple canonical loop generating transformations " + "like loop splitting are not yet supported"); + } + + // Process the transformed loop statement + Child = TransformedStmt; + OriginalInits.emplace_back(); + LoopHelpers.emplace_back(); + OnTransformationCallback(LoopTransform); + + unsigned IsCanonical = + checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack, + TmpDSA, LoopHelpers[LoopSeqSize]); + + if (!IsCanonical) { + Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop) + << getOpenMPDirectiveName(Kind); + return false; + } + storeLoopStatements(TransformedStmt); + NumLoops += LoopTransform->getNumGeneratedLoops(); + return true; + }; + + // Modularized code for handling regular canonical loops + auto handleRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits, + &LoopSeqSize, &NumLoops, Kind, &TmpDSA, &NLCV, + this](Stmt *Child) { + OriginalInits.emplace_back(); + LoopHelpers.emplace_back(); + unsigned IsCanonical = + checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack, + TmpDSA, LoopHelpers[LoopSeqSize]); + + if (!IsCanonical) { + Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop) + << getOpenMPDirectiveName(Kind); + return false; + } + storeLoopStatements(Child); + NumLoops += NLCV.TraverseStmt(Child); + return true; + }; + + // Helper function to process a Loop Sequence Recursively + auto handleLoopSequence = [&](Stmt *LoopSeqStmt, + auto &handleLoopSequenceCallback) -> bool { + for (auto *Child : LoopSeqStmt->children()) { + if (!Child) + continue; + + // Skip over non-loop-sequence statements + if (!isLoopSequenceDerivation(Child)) { + Child = Child->IgnoreContainers(); + + // Ignore empty compound statement + if (!Child) + continue; + + // In the case of a nested loop sequence ignoring containers would not + // be enough, a recurisve transversal of the loop sequence is required + if (isa(Child)) { + if (!handleLoopSequenceCallback(Child, handleLoopSequenceCallback)) + return false; + // Already been treated, skip this children + continue; + } + } + // Regular loop sequence handling + if (isLoopSequenceDerivation(Child)) { + if (isLoopGeneratingStmt(Child)) { + if (!handleLoopGeneration(Child)) { + return false; + } + } else { + if (!handleRegularLoop(Child)) { + return false; + } + } + ++LoopSeqSize; + } else { + // Report error for invalid statement inside canonical loop sequence + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; + } + } + return true; + }; + + // Recursive entry point to process the main loop sequence + if (!handleLoopSequence(AStmt, handleLoopSequence)) { + return false; + } + + if (LoopSeqSize <= 0) { + Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence) + << getOpenMPDirectiveName(Kind); + return false; + } + return true; +} + /// Add preinit statements that need to be propageted from the selected loop. static void addLoopPreInits(ASTContext &Context, OMPLoopBasedDirective::HelperExprs &LoopHelper, @@ -15499,6 +15765,340 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( buildPreInits(Context, PreInits)); } +StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { + ASTContext &Context = getASTContext(); + DeclContext *CurrContext = SemaRef.CurContext; + Scope *CurScope = SemaRef.getCurScope(); + CaptureVars CopyTransformer(SemaRef); + + // Ensure the structured block is not empty + if (!AStmt) { + return StmtError(); + } + // Validate that the potential loop sequence is transformable for fusion + // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops + SmallVector LoopHelpers; + SmallVector LoopStmts; + SmallVector> OriginalInits; + + unsigned NumLoops; + // TODO: Support looprange clause using LoopSeqSize + unsigned LoopSeqSize; + if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops, + LoopHelpers, LoopStmts, OriginalInits, + Context)) { + return StmtError(); + } + + // Defer transformation in dependent contexts + if (CurrContext->isDependentContext()) { + return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, + NumLoops, 1, AStmt, nullptr, nullptr); + } + assert(LoopHelpers.size() == LoopSeqSize && + "Expecting loop iteration space dimensionality to match number of " + "affected loops"); + assert(OriginalInits.size() == LoopSeqSize && + "Expecting loop iteration space dimensionality to match number of " + "affected loops"); + + // PreInits hold a sequence of variable declarations that must be executed + // before the fused loop begins. These include bounds, strides, and other + // helper variables required for the transformation. + SmallVector PreInits; + + // Select the type with the largest bit width among all induction variables + QualType IVType = LoopHelpers[0].IterationVarRef->getType(); + for (unsigned int I = 1; I < LoopSeqSize; ++I) { + QualType CurrentIVType = LoopHelpers[I].IterationVarRef->getType(); + if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) { + IVType = CurrentIVType; + } + } + uint64_t IVBitWidth = Context.getIntWidth(IVType); + + // Create pre-init declarations for all loops lower bounds, upper bounds, + // strides and num-iterations + SmallVector LBVarDecls; + SmallVector STVarDecls; + SmallVector NIVarDecls; + SmallVector UBVarDecls; + SmallVector IVVarDecls; + + // Helper lambda to create variables for bounds, strides, and other + // expressions. Generates both the variable declaration and the corresponding + // initialization statement. + auto CreateHelperVarAndStmt = + [&SemaRef = this->SemaRef, &Context, &CopyTransformer, + &IVType](Expr *ExprToCopy, const std::string &BaseName, unsigned I, + bool NeedsNewVD = false) { + Expr *TransformedExpr = + AssertSuccess(CopyTransformer.TransformExpr(ExprToCopy)); + if (!TransformedExpr) + return std::pair(nullptr, StmtError()); + + auto Name = (Twine(".omp.") + BaseName + std::to_string(I)).str(); + + VarDecl *VD; + if (NeedsNewVD) { + VD = buildVarDecl(SemaRef, SourceLocation(), IVType, Name); + SemaRef.AddInitializerToDecl(VD, TransformedExpr, false); + + } else { + // Create a unique variable name + DeclRefExpr *DRE = cast(TransformedExpr); + VD = cast(DRE->getDecl()); + VD->setDeclName(&SemaRef.PP.getIdentifierTable().get(Name)); + } + // Create the corresponding declaration statement + StmtResult DeclStmt = new (Context) class DeclStmt( + DeclGroupRef(VD), SourceLocation(), SourceLocation()); + return std::make_pair(VD, DeclStmt); + }; + + // Process each single loop to generate and collect declarations + // and statements for all helper expressions + for (unsigned int I = 0; I < LoopSeqSize; ++I) { + addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], + PreInits); + + auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", I); + auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", I); + auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", I); + auto [NIVD, NIDStmt] = + CreateHelperVarAndStmt(LoopHelpers[I].NumIterations, "ni", I, true); + auto [IVVD, IVDStmt] = + CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", I); + + if (!LBVD || !STVD || !NIVD || !IVVD) + return StmtError(); + + UBVarDecls.push_back(UBVD); + LBVarDecls.push_back(LBVD); + STVarDecls.push_back(STVD); + NIVarDecls.push_back(NIVD); + IVVarDecls.push_back(IVVD); + + PreInits.push_back(UBDStmt.get()); + PreInits.push_back(LBDStmt.get()); + PreInits.push_back(STDStmt.get()); + PreInits.push_back(NIDStmt.get()); + PreInits.push_back(IVDStmt.get()); + } + + auto MakeVarDeclRef = [&SemaRef = this->SemaRef](VarDecl *VD) { + return buildDeclRefExpr(SemaRef, VD, VD->getType(), VD->getLocation(), + false); + }; + + // Following up the creation of the final fused loop will be performed + // which has the following shape (considering the selected loops): + // + // for (fuse.index = 0; fuse.index < max(ni0, ni1..., nik); ++fuse.index) { + // if (fuse.index < ni0){ + // iv0 = lb0 + st0 * fuse.index; + // original.index0 = iv0 + // body(0); + // } + // if (fuse.index < ni1){ + // iv1 = lb1 + st1 * fuse.index; + // original.index1 = iv1 + // body(1); + // } + // + // ... + // + // if (fuse.index < nik){ + // ivk = lbk + stk * fuse.index; + // original.indexk = ivk + // body(k); Expr *InitVal = IntegerLiteral::Create(Context, + // llvm::APInt(IVWidth, 0), + + // } + + // 1. Create the initialized fuse index + const std::string IndexName = Twine(".omp.fuse.index").str(); + Expr *InitVal = IntegerLiteral::Create(Context, llvm::APInt(IVBitWidth, 0), + IVType, SourceLocation()); + VarDecl *IndexDecl = + buildVarDecl(SemaRef, {}, IVType, IndexName, nullptr, nullptr); + SemaRef.AddInitializerToDecl(IndexDecl, InitVal, false); + StmtResult InitStmt = new (Context) + DeclStmt(DeclGroupRef(IndexDecl), SourceLocation(), SourceLocation()); + + if (!InitStmt.isUsable()) + return StmtError(); + + auto MakeIVRef = [&SemaRef = this->SemaRef, IndexDecl, IVType, + Loc = InitVal->getExprLoc()]() { + return buildDeclRefExpr(SemaRef, IndexDecl, IVType, Loc, false); + }; + + // 2. Iteratively compute the max number of logical iterations Max(NI_1, NI_2, + // ..., NI_k) + // + // This loop accumulates the maximum value across multiple expressions, + // ensuring each step constructs a unique AST node for correctness. By using + // intermediate temporary variables and conditional operators, we maintain + // distinct nodes and avoid duplicating subtrees, For instance, max(a,b,c): + // omp.temp0 = max(a, b) + // omp.temp1 = max(omp.temp0, c) + // omp.fuse.max = max(omp.temp1, omp.temp0) + + ExprResult MaxExpr; + for (unsigned I = 0; I < LoopSeqSize; ++I) { + DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[I]); + QualType NITy = NIRef->getType(); + + if (MaxExpr.isUnset()) { + // Initialize MaxExpr with the first NI expression + MaxExpr = NIRef; + } else { + // Create a new acummulator variable t_i = MaxExpr + std::string TempName = (Twine(".omp.temp.") + Twine(I)).str(); + VarDecl *TempDecl = + buildVarDecl(SemaRef, {}, NITy, TempName, nullptr, nullptr); + TempDecl->setInit(MaxExpr.get()); + DeclRefExpr *TempRef = + buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false); + DeclRefExpr *TempRef2 = + buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false); + // Add a DeclStmt to PreInits to ensure the variable is declared. + StmtResult TempStmt = new (Context) + DeclStmt(DeclGroupRef(TempDecl), SourceLocation(), SourceLocation()); + + if (!TempStmt.isUsable()) + return StmtError(); + PreInits.push_back(TempStmt.get()); + + // Build MaxExpr <-(MaxExpr > NIRef ? MaxExpr : NIRef) + ExprResult Comparison = + SemaRef.BuildBinOp(nullptr, SourceLocation(), BO_GT, TempRef, NIRef); + // Handle any errors in Comparison creation + if (!Comparison.isUsable()) + return StmtError(); + + DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[I]); + // Update MaxExpr using a conditional expression to hold the max value + MaxExpr = new (Context) ConditionalOperator( + Comparison.get(), SourceLocation(), TempRef2, SourceLocation(), + NIRef2->getExprStmt(), NITy, VK_LValue, OK_Ordinary); + + if (!MaxExpr.isUsable()) + return StmtError(); + } + } + if (!MaxExpr.isUsable()) + return StmtError(); + + // 3. Declare the max variable + const std::string MaxName = Twine(".omp.fuse.max").str(); + VarDecl *MaxDecl = + buildVarDecl(SemaRef, {}, IVType, MaxName, nullptr, nullptr); + MaxDecl->setInit(MaxExpr.get()); + DeclRefExpr *MaxRef = buildDeclRefExpr(SemaRef, MaxDecl, IVType, {}, false); + StmtResult MaxStmt = new (Context) + DeclStmt(DeclGroupRef(MaxDecl), SourceLocation(), SourceLocation()); + + if (MaxStmt.isInvalid()) + return StmtError(); + PreInits.push_back(MaxStmt.get()); + + // 4. Create condition Expr: index < n_max + ExprResult CondExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, + MakeIVRef(), MaxRef); + if (!CondExpr.isUsable()) + return StmtError(); + // 5. Increment Expr: ++index + ExprResult IncrExpr = + SemaRef.BuildUnaryOp(CurScope, SourceLocation(), UO_PreInc, MakeIVRef()); + if (!IncrExpr.isUsable()) + return StmtError(); + + // 6. Build the Fused Loop Body + // The final fused loop iterates over the maximum logical range. Inside the + // loop, each original loop's index is calculated dynamically, and its body + // is executed conditionally. + // + // Each sub-loop's body is guarded by a conditional statement to ensure + // it executes only within its logical iteration range: + // + // if (fuse.index < ni_k){ + // iv_k = lb_k + st_k * fuse.index; + // original.index = iv_k + // body(k); + // } + + CompoundStmt *FusedBody = nullptr; + SmallVector FusedBodyStmts; + for (unsigned I = 0; I < LoopSeqSize; ++I) { + + // Assingment of the original sub-loop index to compute the logical index + // IV_k = LB_k + omp.fuse.index * ST_k + + ExprResult IdxExpr = + SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Mul, + MakeVarDeclRef(STVarDecls[I]), MakeIVRef()); + if (!IdxExpr.isUsable()) + return StmtError(); + IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Add, + MakeVarDeclRef(LBVarDecls[I]), IdxExpr.get()); + + if (!IdxExpr.isUsable()) + return StmtError(); + IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Assign, + MakeVarDeclRef(IVVarDecls[I]), IdxExpr.get()); + if (!IdxExpr.isUsable()) + return StmtError(); + + // Update the original i_k = IV_k + SmallVector BodyStmts; + BodyStmts.push_back(IdxExpr.get()); + llvm::append_range(BodyStmts, LoopHelpers[I].Updates); + + if (auto *SourceCXXFor = dyn_cast(LoopStmts[I])) + BodyStmts.push_back(SourceCXXFor->getLoopVarStmt()); + + Stmt *Body = (isa(LoopStmts[I])) + ? cast(LoopStmts[I])->getBody() + : cast(LoopStmts[I])->getBody(); + + BodyStmts.push_back(Body); + + CompoundStmt *CombinedBody = + CompoundStmt::Create(Context, BodyStmts, FPOptionsOverride(), + SourceLocation(), SourceLocation()); + ExprResult Condition = + SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, MakeIVRef(), + MakeVarDeclRef(NIVarDecls[I])); + + if (!Condition.isUsable()) + return StmtError(); + + IfStmt *IfStatement = IfStmt::Create( + Context, SourceLocation(), IfStatementKind::Ordinary, nullptr, nullptr, + Condition.get(), SourceLocation(), SourceLocation(), CombinedBody, + SourceLocation(), nullptr); + + FusedBodyStmts.push_back(IfStatement); + } + FusedBody = CompoundStmt::Create(Context, FusedBodyStmts, FPOptionsOverride(), + SourceLocation(), SourceLocation()); + + // 7. Construct the final fused loop + ForStmt *FusedForStmt = new (Context) + ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr, IncrExpr.get(), + FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(), + IncrExpr.get()->getEndLoc()); + + return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, NumLoops, + 1, AStmt, FusedForStmt, + buildPreInits(Context, PreInits)); +} + OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, SourceLocation StartLoc, diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 3e33fb73e01b4..45f556f22c511 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -9675,6 +9675,17 @@ StmtResult TreeTransform::TransformOMPInterchangeDirective( return Res; } +template +StmtResult +TreeTransform::TransformOMPFuseDirective(OMPFuseDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc()); + StmtResult Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); + return Res; +} + template StmtResult TreeTransform::TransformOMPForDirective(OMPForDirective *D) { diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 44cfb83ad2db4..291bd8ea4bf18 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2445,6 +2445,7 @@ void ASTStmtReader::VisitOMPLoopTransformationDirective( OMPLoopTransformationDirective *D) { VisitOMPLoopBasedDirective(D); D->setNumGeneratedLoops(Record.readUInt32()); + D->setNumGeneratedLoopNests(Record.readUInt32()); } void ASTStmtReader::VisitOMPTileDirective(OMPTileDirective *D) { @@ -2467,6 +2468,10 @@ void ASTStmtReader::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) { VisitOMPLoopTransformationDirective(D); } +void ASTStmtReader::VisitOMPFuseDirective(OMPFuseDirective *D) { + VisitOMPLoopTransformationDirective(D); +} + void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); D->setHasCancel(Record.readBool()); @@ -3608,6 +3613,12 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { S = OMPReverseDirective::CreateEmpty(Context, NumLoops); break; } + case STMT_OMP_FUSE_DIRECTIVE: { + unsigned NumLoops = Record[ASTStmtReader::NumStmtFields]; + unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; + S = OMPFuseDirective::CreateEmpty(Context, NumClauses, NumLoops); + break; + } case STMT_OMP_INTERCHANGE_DIRECTIVE: { unsigned NumLoops = Record[ASTStmtReader::NumStmtFields]; diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index a6e320c7f3eb0..5bf1ecfb968e8 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2449,6 +2449,7 @@ void ASTStmtWriter::VisitOMPLoopTransformationDirective( OMPLoopTransformationDirective *D) { VisitOMPLoopBasedDirective(D); Record.writeUInt32(D->getNumGeneratedLoops()); + Record.writeUInt32(D->getNumGeneratedLoopNests()); } void ASTStmtWriter::VisitOMPTileDirective(OMPTileDirective *D) { @@ -2476,6 +2477,11 @@ void ASTStmtWriter::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) { Code = serialization::STMT_OMP_INTERCHANGE_DIRECTIVE; } +void ASTStmtWriter::VisitOMPFuseDirective(OMPFuseDirective *D) { + VisitOMPLoopTransformationDirective(D); + Code = serialization::STMT_OMP_FUSE_DIRECTIVE; +} + void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); Record.writeBool(D->hasCancel()); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index c77ef26da568d..7218d7e62acdd 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1814,6 +1814,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OMPStripeDirectiveClass: case Stmt::OMPTileDirectiveClass: case Stmt::OMPInterchangeDirectiveClass: + case Stmt::OMPFuseDirectiveClass: case Stmt::OMPInteropDirectiveClass: case Stmt::OMPDispatchDirectiveClass: case Stmt::OMPMaskedDirectiveClass: diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp new file mode 100644 index 0000000000000..43ce815dab024 --- /dev/null +++ b/clang/test/OpenMP/fuse_ast_print.cpp @@ -0,0 +1,278 @@ +// Check no warnings/errors +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Check AST and unparsing +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-dump %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-print %s | FileCheck %s --check-prefix=PRINT + +#ifndef HEADER +#define HEADER + +// placeholder for loop body code +extern "C" void body(...); + +// PRINT-LABEL: void foo1( +// DUMP-LABEL: FunctionDecl {{.*}} foo1 +void foo1() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + + } + +} + +// PRINT-LABEL: void foo2( +// DUMP-LABEL: FunctionDecl {{.*}} foo2 +void foo2() { + // PRINT: #pragma omp unroll partial(4) + // DUMP: OMPUnrollDirective + // DUMP-NEXT: OMPPartialClause + // DUMP-NEXT: ConstantExpr + // DUMP-NEXT: value: Int 4 + // DUMP-NEXT: IntegerLiteral {{.*}} 4 + #pragma omp unroll partial(4) + // PRINT: #pragma omp fuse + // DUMP-NEXT: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + } + +} + +//PRINT-LABEL: void foo3( +//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo3 +template +void foo3() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: #pragma omp unroll partial(Factor1) + // DUMP: OMPUnrollDirective + #pragma omp unroll partial(Factor1) + // PRINT: for (int i = 0; i < 12; i += 1) + // DUMP: ForStmt + for (int i = 0; i < 12; i += 1) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: #pragma omp unroll partial(Factor2) + // DUMP: OMPUnrollDirective + #pragma omp unroll partial(Factor2) + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + + } +} + +// Also test instantiating the template. +void tfoo3() { + foo3<4,2>(); +} + +//PRINT-LABEL: void foo4( +//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo4 +template +void foo4(int start, int end) { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (T i = start; i < end; i += Step) + // DUMP: ForStmt + for (T i = start; i < end; i += Step) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + + // PRINT: for (T j = end; j > start; j -= Step) + // DUMP: ForStmt + for (T j = end; j > start; j -= Step) { + // PRINT: body(j) + // DUMP: CallExpr + body(j); + } + + } +} + +// Also test instantiating the template. +void tfoo4() { + foo4(0, 64); +} + + + +// PRINT-LABEL: void foo5( +// DUMP-LABEL: FunctionDecl {{.*}} foo5 +void foo5() { + double arr[128], arr2[128]; + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT-NEXT: for (auto &&a : arr) + // DUMP-NEXT: CXXForRangeStmt + for (auto &&a: arr) + // PRINT: body(a) + // DUMP: CallExpr + body(a); + // PRINT: for (double v = 42; auto &&b : arr) + // DUMP: CXXForRangeStmt + for (double v = 42; auto &&b: arr) + // PRINT: body(b, v); + // DUMP: CallExpr + body(b, v); + // PRINT: for (auto &&c : arr2) + // DUMP: CXXForRangeStmt + for (auto &&c: arr2) + // PRINT: body(c) + // DUMP: CallExpr + body(c); + + } + +} + +// PRINT-LABEL: void foo6( +// DUMP-LABEL: FunctionDecl {{.*}} foo6 +void foo6() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i <= 10; ++i) + // DUMP: ForStmt + for (int i = 0; i <= 10; ++i) + body(i); + // PRINT: for (int j = 0; j < 100; ++j) + // DUMP: ForStmt + for(int j = 0; j < 100; ++j) + body(j); + } + // PRINT: #pragma omp unroll partial(4) + // DUMP: OMPUnrollDirective + #pragma omp unroll partial(4) + // PRINT: for (int k = 0; k < 250; ++k) + // DUMP: ForStmt + for (int k = 0; k < 250; ++k) + body(k); + } +} + +// PRINT-LABEL: void foo7( +// DUMP-LABEL: FunctionDecl {{.*}} foo7 +void foo7() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + } + } + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + } + } + } + } + +} + + + + + +#endif \ No newline at end of file diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp new file mode 100644 index 0000000000000..6c1e21092da43 --- /dev/null +++ b/clang/test/OpenMP/fuse_codegen.cpp @@ -0,0 +1,1511 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 5 +// expected-no-diagnostics + +// Check code generation +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2 + +#ifndef HEADER +#define HEADER + +//placeholder for loop body code. +extern "C" void body(...) {} + +extern "C" void foo1(int start1, int end1, int step1, int start2, int end2, int step2) { + int i,j; + #pragma omp fuse + { + for(i = start1; i < end1; i += step1) body(i); + for(j = start2; j < end2; j += step2) body(j); + } + +} + +template +void foo2(T start, T end, T step){ + T i,j,k; + #pragma omp fuse + { + for(i = start; i < end; i += step) body(i); + for(j = end; j > start; j -= step) body(j); + for(k = start+step; k < end+step; k += step) body(k); + } +} + +extern "C" void tfoo2() { + foo2(0, 64, 4); +} + +extern "C" void foo3() { + double arr[256]; + #pragma omp fuse + { + #pragma omp fuse + { + for(int i = 0; i < 128; ++i) body(i); + for(int j = 0; j < 256; j+=2) body(j); + } + for(int c = 42; auto &&v: arr) body(c,v); + for(int cc = 37; auto &&vv: arr) body(cc, vv); + } +} + + +#endif +// CHECK1-LABEL: define dso_local void @body( +// CHECK1-SAME: ...) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo1( +// CHECK1-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]] +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP35]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]] +// CHECK1: [[IF_THEN22]]: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]] +// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]] +// CHECK1-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK1-NEXT: br label %[[IF_END27]] +// CHECK1: [[IF_END27]]: +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @tfoo2( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_( +// CHECK1-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]] +// CHECK1-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1 +// CHECK1-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK1-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK1-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1 +// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]] +// CHECK1: [[COND_TRUE30]]: +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: br label %[[COND_END32:.*]] +// CHECK1: [[COND_FALSE31]]: +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: br label %[[COND_END32]] +// CHECK1: [[COND_END32]]: +// CHECK1-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ] +// CHECK1-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]] +// CHECK1-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]] +// CHECK1-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]] +// CHECK1-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]] +// CHECK1-NEXT: store i32 [[ADD38]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]] +// CHECK1: [[IF_THEN40]]: +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]] +// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]] +// CHECK1-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]] +// CHECK1-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]] +// CHECK1-NEXT: store i32 [[SUB44]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP61]]) +// CHECK1-NEXT: br label %[[IF_END45]] +// CHECK1: [[IF_END45]]: +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]] +// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK1: [[IF_THEN47]]: +// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 +// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]] +// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4 +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 +// CHECK1-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]] +// CHECK1-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]] +// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP70]]) +// CHECK1-NEXT: br label %[[IF_END52]] +// CHECK1: [[IF_END52]]: +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo3( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END224:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8 +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1 +// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1 +// CHECK1-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1 +// CHECK1-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1 +// CHECK1-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8 +// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8 +// CHECK1-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]] +// CHECK1-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8 +// CHECK1-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1 +// CHECK1-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1 +// CHECK1-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1 +// CHECK1-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1 +// CHECK1-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 +// CHECK1-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1 +// CHECK1-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 +// CHECK1-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]] +// CHECK1: [[COND_TRUE44]]: +// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 +// CHECK1-NEXT: br label %[[COND_END46:.*]] +// CHECK1: [[COND_FALSE45]]: +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 +// CHECK1-NEXT: br label %[[COND_END46]] +// CHECK1: [[COND_END46]]: +// CHECK1-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ] +// CHECK1-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]] +// CHECK1: [[COND_TRUE50]]: +// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: br label %[[COND_END52:.*]] +// CHECK1: [[COND_FALSE51]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: br label %[[COND_END52]] +// CHECK1: [[COND_END52]]: +// CHECK1-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ] +// CHECK1-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8 +// CHECK1-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 +// CHECK1-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4 +// CHECK1-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4 +// CHECK1-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]] +// CHECK1-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]] +// CHECK1-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32 +// CHECK1-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4 +// CHECK1-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1 +// CHECK1-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]] +// CHECK1-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN64]]: +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]] +// CHECK1-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]] +// CHECK1-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1 +// CHECK1-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]] +// CHECK1-NEXT: store i32 [[ADD68]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP48]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]] +// CHECK1-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]] +// CHECK1: [[IF_THEN70]]: +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]] +// CHECK1-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]] +// CHECK1-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2 +// CHECK1-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]] +// CHECK1-NEXT: store i32 [[ADD74]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP55]]) +// CHECK1-NEXT: br label %[[IF_END75]] +// CHECK1: [[IF_END75]]: +// CHECK1-NEXT: br label %[[IF_END76]] +// CHECK1: [[IF_END76]]: +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 +// CHECK1-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]] +// CHECK1-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]] +// CHECK1: [[IF_THEN78]]: +// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]] +// CHECK1-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]] +// CHECK1-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8 +// CHECK1-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1 +// CHECK1-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]] +// CHECK1-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP63]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]]) +// CHECK1-NEXT: br label %[[IF_END83]] +// CHECK1: [[IF_END83]]: +// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]] +// CHECK1-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]] +// CHECK1: [[IF_THEN85]]: +// CHECK1-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]] +// CHECK1-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]] +// CHECK1-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 +// CHECK1-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1 +// CHECK1-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]] +// CHECK1-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8 +// CHECK1-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]]) +// CHECK1-NEXT: br label %[[IF_END90]] +// CHECK1: [[IF_END90]]: +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1 +// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @body( +// CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo1( +// CHECK2-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]] +// CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]] +// CHECK2-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP35]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]] +// CHECK2: [[IF_THEN22]]: +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]] +// CHECK2-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]] +// CHECK2-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK2-NEXT: br label %[[IF_END27]] +// CHECK2: [[IF_END27]]: +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo3( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END224:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8 +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1 +// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1 +// CHECK2-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1 +// CHECK2-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1 +// CHECK2-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8 +// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8 +// CHECK2-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]] +// CHECK2-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8 +// CHECK2-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1 +// CHECK2-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1 +// CHECK2-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1 +// CHECK2-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1 +// CHECK2-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 +// CHECK2-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 +// CHECK2-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1 +// CHECK2-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 +// CHECK2-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 +// CHECK2-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK2-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]] +// CHECK2: [[COND_TRUE44]]: +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 +// CHECK2-NEXT: br label %[[COND_END46:.*]] +// CHECK2: [[COND_FALSE45]]: +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 +// CHECK2-NEXT: br label %[[COND_END46]] +// CHECK2: [[COND_END46]]: +// CHECK2-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ] +// CHECK2-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]] +// CHECK2-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]] +// CHECK2: [[COND_TRUE50]]: +// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: br label %[[COND_END52:.*]] +// CHECK2: [[COND_FALSE51]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: br label %[[COND_END52]] +// CHECK2: [[COND_END52]]: +// CHECK2-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ] +// CHECK2-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8 +// CHECK2-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 +// CHECK2-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4 +// CHECK2-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4 +// CHECK2-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK2-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]] +// CHECK2-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]] +// CHECK2-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32 +// CHECK2-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4 +// CHECK2-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1 +// CHECK2-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]] +// CHECK2-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN64]]: +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]] +// CHECK2-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]] +// CHECK2-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1 +// CHECK2-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]] +// CHECK2-NEXT: store i32 [[ADD68]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP48]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]] +// CHECK2-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]] +// CHECK2: [[IF_THEN70]]: +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]] +// CHECK2-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]] +// CHECK2-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2 +// CHECK2-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]] +// CHECK2-NEXT: store i32 [[ADD74]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP55]]) +// CHECK2-NEXT: br label %[[IF_END75]] +// CHECK2: [[IF_END75]]: +// CHECK2-NEXT: br label %[[IF_END76]] +// CHECK2: [[IF_END76]]: +// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 +// CHECK2-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]] +// CHECK2-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]] +// CHECK2: [[IF_THEN78]]: +// CHECK2-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8 +// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8 +// CHECK2-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]] +// CHECK2-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]] +// CHECK2-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8 +// CHECK2-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK2-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8 +// CHECK2-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1 +// CHECK2-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]] +// CHECK2-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP63]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]]) +// CHECK2-NEXT: br label %[[IF_END83]] +// CHECK2: [[IF_END83]]: +// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]] +// CHECK2-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]] +// CHECK2: [[IF_THEN85]]: +// CHECK2-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 +// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 +// CHECK2-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]] +// CHECK2-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]] +// CHECK2-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8 +// CHECK2-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 +// CHECK2-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1 +// CHECK2-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]] +// CHECK2-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8 +// CHECK2-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8 +// CHECK2-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK2-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]]) +// CHECK2-NEXT: br label %[[IF_END90]] +// CHECK2: [[IF_END90]]: +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1 +// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @tfoo2( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_( +// CHECK2-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]] +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]] +// CHECK2-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1 +// CHECK2-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK2-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK2-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1 +// CHECK2-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]] +// CHECK2: [[COND_TRUE30]]: +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: br label %[[COND_END32:.*]] +// CHECK2: [[COND_FALSE31]]: +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: br label %[[COND_END32]] +// CHECK2: [[COND_END32]]: +// CHECK2-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ] +// CHECK2-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]] +// CHECK2-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]] +// CHECK2-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]] +// CHECK2-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]] +// CHECK2-NEXT: store i32 [[ADD38]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]] +// CHECK2-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]] +// CHECK2: [[IF_THEN40]]: +// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]] +// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]] +// CHECK2-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]] +// CHECK2-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]] +// CHECK2-NEXT: store i32 [[SUB44]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP61]]) +// CHECK2-NEXT: br label %[[IF_END45]] +// CHECK2: [[IF_END45]]: +// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]] +// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK2: [[IF_THEN47]]: +// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 +// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 +// CHECK2-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]] +// CHECK2-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]] +// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4 +// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 +// CHECK2-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]] +// CHECK2-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]] +// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP70]]) +// CHECK2-NEXT: br label %[[IF_END52]] +// CHECK2: [[IF_END52]]: +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: ret void +// +//. +// CHECK1: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +// CHECK1: [[META4]] = !{!"llvm.loop.mustprogress"} +// CHECK1: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} +// CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +//. +// CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +// CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"} +// CHECK2: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} +// CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +//. diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp new file mode 100644 index 0000000000000..50dedfd2c0dc6 --- /dev/null +++ b/clang/test/OpenMP/fuse_messages.cpp @@ -0,0 +1,76 @@ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++20 -fopenmp -fopenmp-version=60 -fsyntax-only -Wuninitialized -verify %s + +void func() { + + // expected-error@+2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}} + #pragma omp fuse + ; + + // expected-error@+2 {{statement after '#pragma omp fuse' must be a for loop}} + #pragma omp fuse + {int bar = 0;} + + // expected-error@+4 {{statement after '#pragma omp fuse' must be a for loop}} + #pragma omp fuse + { + for(int i = 0; i < 10; ++i); + int x = 2; + } + + // expected-error@+2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}} + #pragma omp fuse + #pragma omp for + for (int i = 0; i < 7; ++i) + ; + + { + // expected-error@+2 {{expected statement}} + #pragma omp fuse + } + + // expected-warning@+1 {{extra tokens at the end of '#pragma omp fuse' are ignored}} + #pragma omp fuse foo + { + for (int i = 0; i < 7; ++i) + ; + } + + + // expected-error@+1 {{unexpected OpenMP clause 'final' in directive '#pragma omp fuse'}} + #pragma omp fuse final(0) + { + for (int i = 0; i < 7; ++i) + ; + } + + //expected-error@+4 {{loop after '#pragma omp fuse' is not in canonical form}} + //expected-error@+3 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'i'}} + #pragma omp fuse + { + for(int i = 0; i < 10; i*=2) { + ; + } + } + + //expected-error@+2 {{loop sequence after '#pragma omp fuse' must contain at least 1 canonical loop or loop-generating construct}} + #pragma omp fuse + {} + + //expected-error@+3 {{statement after '#pragma omp fuse' must be a for loop}} + #pragma omp fuse + { + #pragma omp unroll full + for(int i = 0; i < 10; ++i); + + for(int j = 0; j < 10; ++j); + } + + //expected-warning@+5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'unsigned int'}} + //expected-warning@+5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'long long'}} + #pragma omp fuse + { + for(int i = 0; i < 10; ++i); + for(unsigned int j = 0; j < 10; ++j); + for(long long k = 0; k < 100; ++k); + } +} \ No newline at end of file diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 3068621d9c004..3afa59b2f2d6c 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2211,6 +2211,7 @@ class EnqueueVisitor : public ConstStmtVisitor, void VisitOMPUnrollDirective(const OMPUnrollDirective *D); void VisitOMPReverseDirective(const OMPReverseDirective *D); void VisitOMPInterchangeDirective(const OMPInterchangeDirective *D); + void VisitOMPFuseDirective(const OMPFuseDirective *D); void VisitOMPForDirective(const OMPForDirective *D); void VisitOMPForSimdDirective(const OMPForSimdDirective *D); void VisitOMPSectionsDirective(const OMPSectionsDirective *D); @@ -3369,6 +3370,10 @@ void EnqueueVisitor::VisitOMPInterchangeDirective( VisitOMPLoopTransformationDirective(D); } +void EnqueueVisitor::VisitOMPFuseDirective(const OMPFuseDirective *D) { + VisitOMPLoopTransformationDirective(D); +} + void EnqueueVisitor::VisitOMPForDirective(const OMPForDirective *D) { VisitOMPLoopDirective(D); } @@ -6323,6 +6328,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("OMPReverseDirective"); case CXCursor_OMPInterchangeDirective: return cxstring::createRef("OMPInterchangeDirective"); + case CXCursor_OMPFuseDirective: + return cxstring::createRef("OMPFuseDirective"); case CXCursor_OMPForDirective: return cxstring::createRef("OMPForDirective"); case CXCursor_OMPForSimdDirective: diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index a6301daa672c3..a6d032fa302b1 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -687,6 +687,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, case Stmt::OMPInterchangeDirectiveClass: K = CXCursor_OMPInterchangeDirective; break; + case Stmt::OMPFuseDirectiveClass: + K = CXCursor_OMPFuseDirective; + break; case Stmt::OMPForDirectiveClass: K = CXCursor_OMPForDirective; break; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index a87111cb5a11d..6352be8069e9e 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -855,6 +855,10 @@ def OMP_For : Directive<[Spelling<"for">]> { let category = CA_Executable; let languages = [L_C]; } +def OMP_Fuse : Directive<[Spelling<"fuse">]> { + let association = AS_Loop; + let category = CA_Executable; +} def OMP_Interchange : Directive<[Spelling<"interchange">]> { let allowedOnceClauses = [ VersionedClause, diff --git a/openmp/runtime/test/transform/fuse/foreach.cpp b/openmp/runtime/test/transform/fuse/foreach.cpp new file mode 100644 index 0000000000000..cabf4bf8a511d --- /dev/null +++ b/openmp/runtime/test/transform/fuse/foreach.cpp @@ -0,0 +1,192 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + + void print(const char *msg) const { owner->print(msg); } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); +#pragma omp fuse + { + for (Reporter a{"C"}; auto &&v : Reporter("A")) + printf("v=%d\n", v); + for (Reporter aa{"D"}; auto &&vv : Reporter("B")) + printf("vv=%d\n", vv); + } + printf("done\n"); + return EXIT_SUCCESS; +} + +// CHECK: [C] ctor +// CHECK-NEXT: [A] ctor +// CHECK-NEXT: [A] end() +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] iterator distance: 3 +// CHECK-NEXT: [D] ctor +// CHECK-NEXT: [B] ctor +// CHECK-NEXT: [B] end() +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] iterator distance: 3 +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: v=0 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: vv=0 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: v=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: vv=1 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: v=2 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: vv=2 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] dtor +// CHECK-NEXT: [D] dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] dtor +// CHECK-NEXT: [C] dtor +// CHECK-NEXT: done + + +#endif diff --git a/openmp/runtime/test/transform/fuse/intfor.c b/openmp/runtime/test/transform/fuse/intfor.c new file mode 100644 index 0000000000000..b8171b4df7042 --- /dev/null +++ b/openmp/runtime/test/transform/fuse/intfor.c @@ -0,0 +1,50 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include + +int main() { + printf("do\n"); +#pragma omp fuse + { + for (int i = 5; i <= 25; i += 5) + printf("i=%d\n", i); + for (int j = 10; j < 100; j += 10) + printf("j=%d\n", j); + for (int k = 10; k > 0; --k) + printf("k=%d\n", k); + } + printf("done\n"); + return EXIT_SUCCESS; +} +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: i=5 +// CHECK-NEXT: j=10 +// CHECK-NEXT: k=10 +// CHECK-NEXT: i=10 +// CHECK-NEXT: j=20 +// CHECK-NEXT: k=9 +// CHECK-NEXT: i=15 +// CHECK-NEXT: j=30 +// CHECK-NEXT: k=8 +// CHECK-NEXT: i=20 +// CHECK-NEXT: j=40 +// CHECK-NEXT: k=7 +// CHECK-NEXT: i=25 +// CHECK-NEXT: j=50 +// CHECK-NEXT: k=6 +// CHECK-NEXT: j=60 +// CHECK-NEXT: k=5 +// CHECK-NEXT: j=70 +// CHECK-NEXT: k=4 +// CHECK-NEXT: j=80 +// CHECK-NEXT: k=3 +// CHECK-NEXT: j=90 +// CHECK-NEXT: k=2 +// CHECK-NEXT: k=1 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/fuse/iterfor.cpp b/openmp/runtime/test/transform/fuse/iterfor.cpp new file mode 100644 index 0000000000000..552484b2981c4 --- /dev/null +++ b/openmp/runtime/test/transform/fuse/iterfor.cpp @@ -0,0 +1,194 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + bool operator!=(const Iterator &that) const { + owner->print("iterator %d != %d", 2 - this->pos, 2 - that.pos); + return this->pos != that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); + Reporter C("C"); + Reporter D("D"); +#pragma omp fuse + { + for (auto it = C.begin(); it != C.end(); ++it) + printf("v=%d\n", *it); + + for (auto it = D.begin(); it != D.end(); ++it) + printf("vv=%d\n", *it); + } + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK: [C] ctor +// CHECK-NEXT: [D] ctor +// CHECK-NEXT: [C] begin() +// CHECK-NEXT: [C] begin() +// CHECK-NEXT: [C] end() +// CHECK-NEXT: [C] iterator distance: 3 +// CHECK-NEXT: [D] begin() +// CHECK-NEXT: [D] begin() +// CHECK-NEXT: [D] end() +// CHECK-NEXT: [D] iterator distance: 3 +// CHECK-NEXT: [C] iterator advance: 0 += 0 +// CHECK-NEXT: [C] iterator move assign +// CHECK-NEXT: [C] iterator deref: 0 +// CHECK-NEXT: v=0 +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [D] iterator advance: 0 += 0 +// CHECK-NEXT: [D] iterator move assign +// CHECK-NEXT: [D] iterator deref: 0 +// CHECK-NEXT: vv=0 +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [C] iterator advance: 0 += 1 +// CHECK-NEXT: [C] iterator move assign +// CHECK-NEXT: [C] iterator deref: 1 +// CHECK-NEXT: v=1 +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [D] iterator advance: 0 += 1 +// CHECK-NEXT: [D] iterator move assign +// CHECK-NEXT: [D] iterator deref: 1 +// CHECK-NEXT: vv=1 +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [C] iterator advance: 0 += 2 +// CHECK-NEXT: [C] iterator move assign +// CHECK-NEXT: [C] iterator deref: 2 +// CHECK-NEXT: v=2 +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [D] iterator advance: 0 += 2 +// CHECK-NEXT: [D] iterator move assign +// CHECK-NEXT: [D] iterator deref: 2 +// CHECK-NEXT: vv=2 +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: done +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [D] dtor +// CHECK-NEXT: [C] dtor diff --git a/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp new file mode 100644 index 0000000000000..e9f76713fe3e0 --- /dev/null +++ b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp @@ -0,0 +1,208 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + + void print(const char *msg) const { owner->print(msg); } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); +#pragma omp parallel for collapse(2) num_threads(1) + for (int i = 0; i < 3; ++i) +#pragma omp fuse + { + for (Reporter c{"init-stmt"}; auto &&v : Reporter("range")) + printf("i=%d v=%d\n", i, v); + for (int vv = 0; vv < 3; ++vv) + printf("i=%d vv=%d\n", i, vv); + } + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: [init-stmt] ctor +// CHECK-NEXT: [range] ctor +// CHECK-NEXT: [range] end() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] iterator distance: 3 +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=0 v=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=0 vv=0 +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=0 v=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=0 vv=1 +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=0 v=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=0 vv=2 +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=1 v=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=1 vv=0 +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=1 v=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=1 vv=1 +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=1 v=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=1 vv=2 +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=2 v=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=2 vv=0 +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=2 v=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=2 vv=1 +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=2 v=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=2 vv=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] dtor +// CHECK-NEXT: [init-stmt] dtor +// CHECK-NEXT: done + diff --git a/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c new file mode 100644 index 0000000000000..272908e72c429 --- /dev/null +++ b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c @@ -0,0 +1,45 @@ +// RUN: %libomp-cxx-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include + +int main() { + printf("do\n"); +#pragma omp parallel for collapse(2) num_threads(1) + for (int i = 0; i < 3; ++i) +#pragma omp fuse + { + for (int j = 0; j < 3; ++j) + printf("i=%d j=%d\n", i, j); + for (int k = 0; k < 3; ++k) + printf("i=%d k=%d\n", i, k); + } + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK: i=0 j=0 +// CHECK-NEXT: i=0 k=0 +// CHECK-NEXT: i=0 j=1 +// CHECK-NEXT: i=0 k=1 +// CHECK-NEXT: i=0 j=2 +// CHECK-NEXT: i=0 k=2 +// CHECK-NEXT: i=1 j=0 +// CHECK-NEXT: i=1 k=0 +// CHECK-NEXT: i=1 j=1 +// CHECK-NEXT: i=1 k=1 +// CHECK-NEXT: i=1 j=2 +// CHECK-NEXT: i=1 k=2 +// CHECK-NEXT: i=2 j=0 +// CHECK-NEXT: i=2 k=0 +// CHECK-NEXT: i=2 j=1 +// CHECK-NEXT: i=2 k=1 +// CHECK-NEXT: i=2 j=2 +// CHECK-NEXT: i=2 k=2 +// CHECK-NEXT: done From 34ac92ada84eeca9573d0b005f24d73738f46626 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:28:04 +0000 Subject: [PATCH 02/11] Add looprange clause --- clang/include/clang/AST/OpenMPClause.h | 100 ++++++ clang/include/clang/AST/RecursiveASTVisitor.h | 8 + clang/include/clang/AST/StmtOpenMP.h | 9 +- .../clang/Basic/DiagnosticSemaKinds.td | 5 + clang/include/clang/Parse/Parser.h | 3 + clang/include/clang/Sema/SemaOpenMP.h | 6 + clang/lib/AST/OpenMPClause.cpp | 35 ++ clang/lib/AST/StmtOpenMP.cpp | 7 +- clang/lib/AST/StmtProfile.cpp | 7 + clang/lib/Basic/OpenMPKinds.cpp | 2 + clang/lib/Parse/ParseOpenMP.cpp | 36 ++ clang/lib/Sema/SemaOpenMP.cpp | 155 +++++++-- clang/lib/Sema/TreeTransform.h | 33 ++ clang/lib/Serialization/ASTReader.cpp | 11 + clang/lib/Serialization/ASTReaderStmt.cpp | 4 +- clang/lib/Serialization/ASTWriter.cpp | 8 + clang/test/OpenMP/fuse_ast_print.cpp | 67 ++++ clang/test/OpenMP/fuse_codegen.cpp | 320 +++++++++++++++++- clang/test/OpenMP/fuse_messages.cpp | 112 +++++- clang/tools/libclang/CIndex.cpp | 5 + llvm/include/llvm/Frontend/OpenMP/ClauseT.h | 16 +- llvm/include/llvm/Frontend/OpenMP/OMP.td | 6 + 22 files changed, 919 insertions(+), 36 deletions(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 6fd16bc0f03be..8f937cdef9cd0 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1143,6 +1143,106 @@ class OMPFullClause final : public OMPNoChildClause { static OMPFullClause *CreateEmpty(const ASTContext &C); }; +/// This class represents the 'looprange' clause in the +/// '#pragma omp fuse' directive +/// +/// \code {c} +/// #pragma omp fuse looprange(1,2) +/// { +/// for(int i = 0; i < 64; ++i) +/// for(int j = 0; j < 256; j+=2) +/// for(int k = 127; k >= 0; --k) +/// \endcode +class OMPLoopRangeClause final : public OMPClause { + friend class OMPClauseReader; + + explicit OMPLoopRangeClause() + : OMPClause(llvm::omp::OMPC_looprange, {}, {}) {} + + /// Location of '(' + SourceLocation LParenLoc; + + /// Location of 'first' + SourceLocation FirstLoc; + + /// Location of 'count' + SourceLocation CountLoc; + + /// Expr associated with 'first' argument + Expr *First = nullptr; + + /// Expr associated with 'count' argument + Expr *Count = nullptr; + + /// Set 'first' + void setFirst(Expr *First) { this->First = First; } + + /// Set 'count' + void setCount(Expr *Count) { this->Count = Count; } + + /// Set location of '('. + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + + /// Set location of 'first' argument + void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; } + + /// Set location of 'count' argument + void setCountLoc(SourceLocation Loc) { CountLoc = Loc; } + +public: + /// Build an AST node for a 'looprange' clause + /// + /// \param StartLoc Starting location of the clause. + /// \param LParenLoc Location of '('. + /// \param ModifierLoc Modifier location. + /// \param + static OMPLoopRangeClause * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation FirstLoc, SourceLocation CountLoc, + SourceLocation EndLoc, Expr *First, Expr *Count); + + /// Build an empty 'looprange' node for deserialization + /// + /// \param C Context of the AST. + static OMPLoopRangeClause *CreateEmpty(const ASTContext &C); + + /// Returns the location of '(' + SourceLocation getLParenLoc() const { return LParenLoc; } + + /// Returns the location of 'first' + SourceLocation getFirstLoc() const { return FirstLoc; } + + /// Returns the location of 'count' + SourceLocation getCountLoc() const { return CountLoc; } + + /// Returns the argument 'first' or nullptr if not set + Expr *getFirst() const { return cast_or_null(First); } + + /// Returns the argument 'count' or nullptr if not set + Expr *getCount() const { return cast_or_null(Count); } + + child_range children() { + return child_range(reinterpret_cast(&First), + reinterpret_cast(&Count) + 1); + } + + const_child_range children() const { + auto Children = const_cast(this)->children(); + return const_child_range(Children.begin(), Children.end()); + } + + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == llvm::omp::OMPC_looprange; + } +}; + /// Representation of the 'partial' clause of the '#pragma omp unroll' /// directive. /// diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 918216e8df4aa..10e44e69dd5da 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3410,6 +3410,14 @@ bool RecursiveASTVisitor::VisitOMPFullClause(OMPFullClause *C) { return true; } +template +bool RecursiveASTVisitor::VisitOMPLoopRangeClause( + OMPLoopRangeClause *C) { + TRY_TO(TraverseStmt(C->getFirst())); + TRY_TO(TraverseStmt(C->getCount())); + return true; +} + template bool RecursiveASTVisitor::VisitOMPPartialClause(OMPPartialClause *C) { TRY_TO(TraverseStmt(C->getFactor())); diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index cb8bb91f4768c..f5115afd0753e 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -5572,7 +5572,9 @@ class OMPTileDirective final : public OMPLoopTransformationDirective { : OMPLoopTransformationDirective(OMPTileDirectiveClass, llvm::omp::OMPD_tile, StartLoc, EndLoc, NumLoops) { + // Tiling doubles the original number of loops setNumGeneratedLoops(2 * NumLoops); + // Produces a single top-level canonical loop nest setNumGeneratedLoopNests(1); } @@ -5804,6 +5806,7 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective { : OMPLoopTransformationDirective(OMPReverseDirectiveClass, llvm::omp::OMPD_reverse, StartLoc, EndLoc, NumLoops) { + // Reverse produces a single top-level canonical loop nest setNumGeneratedLoops(NumLoops); setNumGeneratedLoopNests(1); } @@ -5877,6 +5880,8 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective { : OMPLoopTransformationDirective(OMPInterchangeDirectiveClass, llvm::omp::OMPD_interchange, StartLoc, EndLoc, NumLoops) { + // Interchange produces a single top-level canonical loop + // nest, with the exact same amount of total loops setNumGeneratedLoops(NumLoops); setNumGeneratedLoopNests(1); } @@ -5995,8 +6000,10 @@ class OMPFuseDirective final : public OMPLoopTransformationDirective { /// \param C Context of the AST /// \param NumClauses Number of clauses to allocate /// \param NumLoops Number of associated loops to allocate + /// \param NumLoopNests Number of top level loops to allocate static OMPFuseDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses, - unsigned NumLoops); + unsigned NumLoops, + unsigned NumLoopNests); /// Gets the associated loops after the transformation. This is the de-sugared /// replacement or nulltpr in dependent contexts. diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index fe9ca29038a1f..002aa7a774fbe 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11620,6 +11620,11 @@ def err_omp_not_a_loop_sequence : Error < "statement after '#pragma omp %0' must be a loop sequence containing canonical loops or loop-generating constructs">; def err_omp_empty_loop_sequence : Error < "loop sequence after '#pragma omp %0' must contain at least 1 canonical loop or loop-generating construct">; +def err_omp_invalid_looprange : Error < + "loop range in '#pragma omp %0' exceeds the number of available loops: " + "range end '%1' is greater than the total number of loops '%2'">; +def warn_omp_redundant_fusion : Warning < + "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">; def err_omp_not_for : Error< "%select{statement after '#pragma omp %1' must be a for loop|" "expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">; diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index a47e23ffbd357..08bee0078b5ff 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -6735,6 +6735,9 @@ class Parser : public CodeCompletionHandler { OpenMPClauseKind Kind, bool ParseOnly); + /// Parses the 'looprange' clause of a '#pragma omp fuse' directive. + OMPClause *ParseOpenMPLoopRangeClause(); + /// Parses the 'sizes' clause of a '#pragma omp tile' directive. OMPClause *ParseOpenMPSizesClause(); diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index ea21377a8db9c..0c28aaf6ab21a 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -922,6 +922,12 @@ class SemaOpenMP : public SemaBase { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); + + /// Called on well-form 'looprange' clause after parsing its arguments. + OMPClause * + ActOnOpenMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation FirstLoc, + SourceLocation CountLoc, SourceLocation EndLoc); /// Called on well-formed 'ordered' clause. OMPClause * ActOnOpenMPOrderedClause(SourceLocation StartLoc, SourceLocation EndLoc, diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 0e5052b944162..0b5808eb100e4 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1024,6 +1024,26 @@ OMPPartialClause *OMPPartialClause::CreateEmpty(const ASTContext &C) { return new (C) OMPPartialClause(); } +OMPLoopRangeClause * +OMPLoopRangeClause::Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation EndLoc, + SourceLocation FirstLoc, SourceLocation CountLoc, + Expr *First, Expr *Count) { + OMPLoopRangeClause *Clause = CreateEmpty(C); + Clause->setLocStart(StartLoc); + Clause->setLParenLoc(LParenLoc); + Clause->setLocEnd(EndLoc); + Clause->setFirstLoc(FirstLoc); + Clause->setCountLoc(CountLoc); + Clause->setFirst(First); + Clause->setCount(Count); + return Clause; +} + +OMPLoopRangeClause *OMPLoopRangeClause::CreateEmpty(const ASTContext &C) { + return new (C) OMPLoopRangeClause(); +} + OMPAllocateClause *OMPAllocateClause::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, Expr *Allocator, Expr *Alignment, SourceLocation ColonLoc, @@ -1888,6 +1908,21 @@ void OMPClausePrinter::VisitOMPPartialClause(OMPPartialClause *Node) { } } +void OMPClausePrinter::VisitOMPLoopRangeClause(OMPLoopRangeClause *Node) { + OS << "looprange"; + + Expr *First = Node->getFirst(); + Expr *Count = Node->getCount(); + + if (First && Count) { + OS << "("; + First->printPretty(OS, nullptr, Policy, 0); + OS << ","; + Count->printPretty(OS, nullptr, Policy, 0); + OS << ")"; + } +} + void OMPClausePrinter::VisitOMPAllocatorClause(OMPAllocatorClause *Node) { OS << "allocator("; Node->getAllocator()->printPretty(OS, nullptr, Policy, 0); diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 276e43ec9f7d5..c5a6732cc2217 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -527,10 +527,13 @@ OMPFuseDirective *OMPFuseDirective::Create( OMPFuseDirective *OMPFuseDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses, - unsigned NumLoops) { - return createEmptyDirective( + unsigned NumLoops, + unsigned NumLoopNests) { + OMPFuseDirective *Dir = createEmptyDirective( C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1, SourceLocation(), SourceLocation(), NumLoops); + Dir->setNumGeneratedLoopNests(NumLoopNests); + return Dir; } OMPForSimdDirective * diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index c5d1d5b48508e..34ed3f22f6eb7 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -511,6 +511,13 @@ void OMPClauseProfiler::VisitOMPPartialClause(const OMPPartialClause *C) { Profiler->VisitExpr(Factor); } +void OMPClauseProfiler::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) { + if (const Expr *First = C->getFirst()) + Profiler->VisitExpr(First); + if (const Expr *Count = C->getCount()) + Profiler->VisitExpr(Count); +} + void OMPClauseProfiler::VisitOMPAllocatorClause(const OMPAllocatorClause *C) { if (C->getAllocator()) Profiler->VisitStmt(C->getAllocator()); diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index d172450512f13..18330181f1509 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -248,6 +248,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str, case OMPC_affinity: case OMPC_when: case OMPC_append_args: + case OMPC_looprange: break; default: break; @@ -583,6 +584,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, case OMPC_affinity: case OMPC_when: case OMPC_append_args: + case OMPC_looprange: break; default: break; diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 78d3503d8eb68..2d6d624c1ecc8 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3057,6 +3057,39 @@ OMPClause *Parser::ParseOpenMPSizesClause() { OpenLoc, CloseLoc); } +OMPClause *Parser::ParseOpenMPLoopRangeClause() { + SourceLocation ClauseNameLoc = ConsumeToken(); + SourceLocation FirstLoc, CountLoc; + + BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end); + if (T.consumeOpen()) { + Diag(Tok, diag::err_expected) << tok::l_paren; + return nullptr; + } + + FirstLoc = Tok.getLocation(); + ExprResult FirstVal = ParseConstantExpression(); + if (!FirstVal.isUsable()) { + T.skipToEnd(); + return nullptr; + } + + ExpectAndConsume(tok::comma); + + CountLoc = Tok.getLocation(); + ExprResult CountVal = ParseConstantExpression(); + if (!CountVal.isUsable()) { + T.skipToEnd(); + return nullptr; + } + + T.consumeClose(); + + return Actions.OpenMP().ActOnOpenMPLoopRangeClause( + FirstVal.get(), CountVal.get(), ClauseNameLoc, T.getOpenLocation(), + FirstLoc, CountLoc, T.getCloseLocation()); +} + OMPClause *Parser::ParseOpenMPPermutationClause() { SourceLocation ClauseNameLoc, OpenLoc, CloseLoc; SmallVector ArgExprs; @@ -3485,6 +3518,9 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, } Clause = ParseOpenMPClause(CKind, WrongDirective); break; + case OMPC_looprange: + Clause = ParseOpenMPLoopRangeClause(); + break; default: break; } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 84ac9587bd54d..3ec3f2ad31e78 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14326,7 +14326,6 @@ bool SemaOpenMP::checkTransformableLoopSequence( // and tries to match the input AST to the canonical loop sequence grammar // structure - auto NLCV = NestedLoopCounterVisitor(); // Helper functions to validate canonical loop sequence grammar is valid auto isLoopSequenceDerivation = [](auto *Child) { return isa(Child) || isa(Child) || @@ -14429,7 +14428,7 @@ bool SemaOpenMP::checkTransformableLoopSequence( // Modularized code for handling regular canonical loops auto handleRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits, - &LoopSeqSize, &NumLoops, Kind, &TmpDSA, &NLCV, + &LoopSeqSize, &NumLoops, Kind, &TmpDSA, this](Stmt *Child) { OriginalInits.emplace_back(); LoopHelpers.emplace_back(); @@ -14442,8 +14441,11 @@ bool SemaOpenMP::checkTransformableLoopSequence( << getOpenMPDirectiveName(Kind); return false; } + storeLoopStatements(Child); - NumLoops += NLCV.TraverseStmt(Child); + auto NLCV = NestedLoopCounterVisitor(); + NLCV.TraverseStmt(Child); + NumLoops += NLCV.getNestedLoopCount(); return true; }; @@ -15769,6 +15771,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc) { + ASTContext &Context = getASTContext(); DeclContext *CurrContext = SemaRef.CurContext; Scope *CurScope = SemaRef.getCurScope(); @@ -15785,7 +15788,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, SmallVector> OriginalInits; unsigned NumLoops; - // TODO: Support looprange clause using LoopSeqSize unsigned LoopSeqSize; if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops, LoopHelpers, LoopStmts, OriginalInits, @@ -15794,10 +15796,67 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, } // Defer transformation in dependent contexts + // The NumLoopNests argument is set to a placeholder (0) + // because a dependent context could prevent determining its true value if (CurrContext->isDependentContext()) { return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, - NumLoops, 1, AStmt, nullptr, nullptr); + NumLoops, 0, AStmt, nullptr, nullptr); } + + // Handle clauses, which can be any of the following: [looprange, apply] + const OMPLoopRangeClause *LRC = + OMPExecutableDirective::getSingleClause(Clauses); + + // The clause arguments are invalidated if any error arises + // such as non-constant or non-positive arguments + if (LRC && (!LRC->getFirst() || !LRC->getCount())) + return StmtError(); + + // Delayed semantic check of LoopRange constraint + // Evaluates the loop range arguments and returns the first and count values + auto EvaluateLoopRangeArguments = [&Context](Expr *First, Expr *Count, + uint64_t &FirstVal, + uint64_t &CountVal) { + llvm::APSInt FirstInt = First->EvaluateKnownConstInt(Context); + llvm::APSInt CountInt = Count->EvaluateKnownConstInt(Context); + FirstVal = FirstInt.getZExtValue(); + CountVal = CountInt.getZExtValue(); + }; + + // Checks if the loop range is valid + auto ValidLoopRange = [](uint64_t FirstVal, uint64_t CountVal, + unsigned NumLoops) -> bool { + return FirstVal + CountVal - 1 <= NumLoops; + }; + uint64_t FirstVal = 1, CountVal = 0, LastVal = LoopSeqSize; + + if (LRC) { + EvaluateLoopRangeArguments(LRC->getFirst(), LRC->getCount(), FirstVal, + CountVal); + if (CountVal == 1) + SemaRef.Diag(LRC->getCountLoc(), diag::warn_omp_redundant_fusion) + << getOpenMPDirectiveName(OMPD_fuse); + + if (!ValidLoopRange(FirstVal, CountVal, LoopSeqSize)) { + SemaRef.Diag(LRC->getFirstLoc(), diag::err_omp_invalid_looprange) + << getOpenMPDirectiveName(OMPD_fuse) << (FirstVal + CountVal - 1) + << LoopSeqSize; + return StmtError(); + } + + LastVal = FirstVal + CountVal - 1; + } + + // Complete fusion generates a single canonical loop nest + // However looprange clause generates several loop nests + unsigned NumLoopNests = LRC ? LoopSeqSize - CountVal + 1 : 1; + + // Emit a warning for redundant loop fusion when the sequence contains only + // one loop. + if (LoopSeqSize == 1) + SemaRef.Diag(AStmt->getBeginLoc(), diag::warn_omp_redundant_fusion) + << getOpenMPDirectiveName(OMPD_fuse); + assert(LoopHelpers.size() == LoopSeqSize && "Expecting loop iteration space dimensionality to match number of " "affected loops"); @@ -15811,8 +15870,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, SmallVector PreInits; // Select the type with the largest bit width among all induction variables - QualType IVType = LoopHelpers[0].IterationVarRef->getType(); - for (unsigned int I = 1; I < LoopSeqSize; ++I) { + QualType IVType = LoopHelpers[FirstVal - 1].IterationVarRef->getType(); + for (unsigned int I = FirstVal; I < LastVal; ++I) { QualType CurrentIVType = LoopHelpers[I].IterationVarRef->getType(); if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) { IVType = CurrentIVType; @@ -15861,20 +15920,21 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // Process each single loop to generate and collect declarations // and statements for all helper expressions - for (unsigned int I = 0; I < LoopSeqSize; ++I) { + for (unsigned int I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], PreInits); - auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", I); - auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", I); - auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", I); + auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", J); + auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", J); + auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", J); auto [NIVD, NIDStmt] = - CreateHelperVarAndStmt(LoopHelpers[I].NumIterations, "ni", I, true); + CreateHelperVarAndStmt(LoopHelpers[I].NumIterations, "ni", J, true); auto [IVVD, IVDStmt] = - CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", I); + CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", J); if (!LBVD || !STVD || !NIVD || !IVVD) - return StmtError(); + assert(LBVD && STVD && NIVD && IVVD && + "OpenMP Fuse Helper variables creation failed"); UBVarDecls.push_back(UBVD); LBVarDecls.push_back(LBVD); @@ -15949,8 +16009,9 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // omp.fuse.max = max(omp.temp1, omp.temp0) ExprResult MaxExpr; - for (unsigned I = 0; I < LoopSeqSize; ++I) { - DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[I]); + // I is the true + for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { + DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[J]); QualType NITy = NIRef->getType(); if (MaxExpr.isUnset()) { @@ -15958,7 +16019,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, MaxExpr = NIRef; } else { // Create a new acummulator variable t_i = MaxExpr - std::string TempName = (Twine(".omp.temp.") + Twine(I)).str(); + std::string TempName = (Twine(".omp.temp.") + Twine(J)).str(); VarDecl *TempDecl = buildVarDecl(SemaRef, {}, NITy, TempName, nullptr, nullptr); TempDecl->setInit(MaxExpr.get()); @@ -15981,7 +16042,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, if (!Comparison.isUsable()) return StmtError(); - DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[I]); + DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[J]); // Update MaxExpr using a conditional expression to hold the max value MaxExpr = new (Context) ConditionalOperator( Comparison.get(), SourceLocation(), TempRef2, SourceLocation(), @@ -16034,23 +16095,21 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, CompoundStmt *FusedBody = nullptr; SmallVector FusedBodyStmts; - for (unsigned I = 0; I < LoopSeqSize; ++I) { - + for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { // Assingment of the original sub-loop index to compute the logical index // IV_k = LB_k + omp.fuse.index * ST_k - ExprResult IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Mul, - MakeVarDeclRef(STVarDecls[I]), MakeIVRef()); + MakeVarDeclRef(STVarDecls[J]), MakeIVRef()); if (!IdxExpr.isUsable()) return StmtError(); IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Add, - MakeVarDeclRef(LBVarDecls[I]), IdxExpr.get()); + MakeVarDeclRef(LBVarDecls[J]), IdxExpr.get()); if (!IdxExpr.isUsable()) return StmtError(); IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Assign, - MakeVarDeclRef(IVVarDecls[I]), IdxExpr.get()); + MakeVarDeclRef(IVVarDecls[J]), IdxExpr.get()); if (!IdxExpr.isUsable()) return StmtError(); @@ -16065,7 +16124,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, Stmt *Body = (isa(LoopStmts[I])) ? cast(LoopStmts[I])->getBody() : cast(LoopStmts[I])->getBody(); - BodyStmts.push_back(Body); CompoundStmt *CombinedBody = @@ -16073,7 +16131,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, SourceLocation(), SourceLocation()); ExprResult Condition = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, MakeIVRef(), - MakeVarDeclRef(NIVarDecls[I])); + MakeVarDeclRef(NIVarDecls[J])); if (!Condition.isUsable()) return StmtError(); @@ -16094,8 +16152,26 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(), IncrExpr.get()->getEndLoc()); + // In the case of looprange, the result of fuse won't simply + // be a single loop (ForStmt), but rather a loop sequence + // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop + // and the post-fusion loops, preserving its original order. + Stmt *FusionStmt = FusedForStmt; + if (LRC) { + SmallVector FinalLoops; + // Gather all the pre-fusion loops + for (unsigned I = 0; I < FirstVal - 1; ++I) + FinalLoops.push_back(LoopStmts[I]); + // Gather the fused loop + FinalLoops.push_back(FusedForStmt); + // Gather all the post-fusion loops + for (unsigned I = FirstVal + CountVal - 1; I < LoopSeqSize; ++I) + FinalLoops.push_back(LoopStmts[I]); + FusionStmt = CompoundStmt::Create(Context, FinalLoops, FPOptionsOverride(), + SourceLocation(), SourceLocation()); + } return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, NumLoops, - 1, AStmt, FusedForStmt, + NumLoopNests, AStmt, FusionStmt, buildPreInits(Context, PreInits)); } @@ -17218,6 +17294,31 @@ OMPClause *SemaOpenMP::ActOnOpenMPPartialClause(Expr *FactorExpr, FactorExpr); } +OMPClause *SemaOpenMP::ActOnOpenMPLoopRangeClause( + Expr *First, Expr *Count, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation FirstLoc, SourceLocation CountLoc, SourceLocation EndLoc) { + + // OpenMP [6.0, Restrictions] + // First and Count must be integer expressions with positive value + ExprResult FirstVal = + VerifyPositiveIntegerConstantInClause(First, OMPC_looprange); + if (FirstVal.isInvalid()) + First = nullptr; + + ExprResult CountVal = + VerifyPositiveIntegerConstantInClause(Count, OMPC_looprange); + if (CountVal.isInvalid()) + Count = nullptr; + + // OpenMP [6.0, Restrictions] + // first + count - 1 must not evaluate to a value greater than the + // loop sequence length of the associated canonical loop sequence. + // This check must be performed afterwards due to the delayed + // parsing and computation of the associated loop sequence + return OMPLoopRangeClause::Create(getASTContext(), StartLoc, LParenLoc, + FirstLoc, CountLoc, EndLoc, First, Count); +} + OMPClause *SemaOpenMP::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 45f556f22c511..30204faf59b7b 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -1775,6 +1775,14 @@ class TreeTransform { LParenLoc, EndLoc); } + OMPClause * + RebuildOMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation FirstLoc, + SourceLocation CountLoc, SourceLocation EndLoc) { + return getSema().OpenMP().ActOnOpenMPLoopRangeClause( + First, Count, StartLoc, LParenLoc, FirstLoc, CountLoc, EndLoc); + } + /// Build a new OpenMP 'allocator' clause. /// /// By default, performs semantic analysis to build the new OpenMP clause. @@ -10578,6 +10586,31 @@ TreeTransform::TransformOMPPartialClause(OMPPartialClause *C) { C->getEndLoc()); } +template +OMPClause * +TreeTransform::TransformOMPLoopRangeClause(OMPLoopRangeClause *C) { + ExprResult F = getDerived().TransformExpr(C->getFirst()); + if (F.isInvalid()) + return nullptr; + + ExprResult Cn = getDerived().TransformExpr(C->getCount()); + if (Cn.isInvalid()) + return nullptr; + + Expr *First = F.get(); + Expr *Count = Cn.get(); + + bool Changed = (First != C->getFirst()) || (Count != C->getCount()); + + // If no changes and AlwaysRebuild() is false, return the original clause + if (!Changed && !getDerived().AlwaysRebuild()) + return C; + + return RebuildOMPLoopRangeClause(First, Count, C->getBeginLoc(), + C->getLParenLoc(), C->getFirstLoc(), + C->getCountLoc(), C->getEndLoc()); +} + template OMPClause * TreeTransform::TransformOMPCollapseClause(OMPCollapseClause *C) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index a3fbc3d25acab..d5e7c287c23a4 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11089,6 +11089,9 @@ OMPClause *OMPClauseReader::readClause() { case llvm::omp::OMPC_partial: C = OMPPartialClause::CreateEmpty(Context); break; + case llvm::omp::OMPC_looprange: + C = OMPLoopRangeClause::CreateEmpty(Context); + break; case llvm::omp::OMPC_allocator: C = new (Context) OMPAllocatorClause(); break; @@ -11490,6 +11493,14 @@ void OMPClauseReader::VisitOMPPartialClause(OMPPartialClause *C) { C->setLParenLoc(Record.readSourceLocation()); } +void OMPClauseReader::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) { + C->setFirst(Record.readSubExpr()); + C->setCount(Record.readSubExpr()); + C->setLParenLoc(Record.readSourceLocation()); + C->setFirstLoc(Record.readSourceLocation()); + C->setCountLoc(Record.readSourceLocation()); +} + void OMPClauseReader::VisitOMPAllocatorClause(OMPAllocatorClause *C) { C->setAllocator(Record.readExpr()); C->setLParenLoc(Record.readSourceLocation()); diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 291bd8ea4bf18..b424b5aa7b0c6 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -3616,7 +3616,9 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { case STMT_OMP_FUSE_DIRECTIVE: { unsigned NumLoops = Record[ASTStmtReader::NumStmtFields]; unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; - S = OMPFuseDirective::CreateEmpty(Context, NumClauses, NumLoops); + unsigned NumLoopNests = Record[ASTStmtReader::NumStmtFields + 2]; + S = OMPFuseDirective::CreateEmpty(Context, NumClauses, NumLoops, + NumLoopNests); break; } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index af7229d748872..c99ffab64c6e6 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -7807,6 +7807,14 @@ void OMPClauseWriter::VisitOMPPartialClause(OMPPartialClause *C) { Record.AddSourceLocation(C->getLParenLoc()); } +void OMPClauseWriter::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) { + Record.AddStmt(C->getFirst()); + Record.AddStmt(C->getCount()); + Record.AddSourceLocation(C->getLParenLoc()); + Record.AddSourceLocation(C->getFirstLoc()); + Record.AddSourceLocation(C->getCountLoc()); +} + void OMPClauseWriter::VisitOMPAllocatorClause(OMPAllocatorClause *C) { Record.AddStmt(C->getAllocator()); Record.AddSourceLocation(C->getLParenLoc()); diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp index 43ce815dab024..ac4f0d38a9c68 100644 --- a/clang/test/OpenMP/fuse_ast_print.cpp +++ b/clang/test/OpenMP/fuse_ast_print.cpp @@ -271,6 +271,73 @@ void foo7() { } +// PRINT-LABEL: void foo8( +// DUMP-LABEL: FunctionDecl {{.*}} foo8 +void foo8() { + // PRINT: #pragma omp fuse looprange(2,2) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(2,2) + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + + } + +} + +//PRINT-LABEL: void foo9( +//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo9 +//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} F +//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} C +template +void foo9() { + // PRINT: #pragma omp fuse looprange(F,C) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(F,C) + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + + } +} + +// Also test instantiating the template. +void tfoo9() { + foo9<1, 2>(); +} + diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp index 6c1e21092da43..d9500bed3ce31 100644 --- a/clang/test/OpenMP/fuse_codegen.cpp +++ b/clang/test/OpenMP/fuse_codegen.cpp @@ -53,6 +53,18 @@ extern "C" void foo3() { } } +extern "C" void foo4() { + double arr[256]; + + #pragma omp fuse looprange(2,2) + { + for(int i = 0; i < 128; ++i) body(i); + for(int j = 0; j < 256; j+=2) body(j); + for(int k = 0; k < 64; ++k) body(k); + for(int c = 42; auto &&v: arr) body(c,v); + } +} + #endif // CHECK1-LABEL: define dso_local void @body( @@ -777,6 +789,157 @@ extern "C" void foo3() { // CHECK1-NEXT: ret void // // +// CHECK1-LABEL: define dso_local void @foo4( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK1-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128 +// CHECK1-NEXT: br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP6]]) +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND2:.*]] +// CHECK1: [[FOR_COND2]]: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]] +// CHECK1: [[FOR_BODY4]]: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP16]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]] +// CHECK1: [[IF_THEN9]]: +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]] +// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK1-NEXT: store i32 [[ADD13]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP23]]) +// CHECK1-NEXT: br label %[[IF_END14]] +// CHECK1: [[IF_END14]]: +// CHECK1-NEXT: br label %[[FOR_INC15:.*]] +// CHECK1: [[FOR_INC15]]: +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1: [[FOR_END17]]: +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND19:.*]] +// CHECK1: [[FOR_COND19]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]] +// CHECK1: [[FOR_BODY21]]: +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP29]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]]) +// CHECK1-NEXT: br label %[[FOR_INC22:.*]] +// CHECK1: [[FOR_INC22]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1 +// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND19]] +// CHECK1: [[FOR_END23]]: +// CHECK1-NEXT: ret void +// +// // CHECK2-LABEL: define dso_local void @body( // CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: [[ENTRY:.*:]] @@ -1259,6 +1422,157 @@ extern "C" void foo3() { // CHECK2-NEXT: ret void // // +// CHECK2-LABEL: define dso_local void @foo4( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK2-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128 +// CHECK2-NEXT: br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP6]]) +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND2:.*]] +// CHECK2: [[FOR_COND2]]: +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]] +// CHECK2: [[FOR_BODY4]]: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK2-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP16]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]] +// CHECK2: [[IF_THEN9]]: +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]] +// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK2-NEXT: store i32 [[ADD13]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP23]]) +// CHECK2-NEXT: br label %[[IF_END14]] +// CHECK2: [[IF_END14]]: +// CHECK2-NEXT: br label %[[FOR_INC15:.*]] +// CHECK2: [[FOR_INC15]]: +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK2-NEXT: store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2: [[FOR_END17]]: +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND19:.*]] +// CHECK2: [[FOR_COND19]]: +// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]] +// CHECK2-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]] +// CHECK2: [[FOR_BODY21]]: +// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP29]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]]) +// CHECK2-NEXT: br label %[[FOR_INC22:.*]] +// CHECK2: [[FOR_INC22]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1 +// CHECK2-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND19]] +// CHECK2: [[FOR_END23]]: +// CHECK2-NEXT: ret void +// +// // CHECK2-LABEL: define dso_local void @tfoo2( // CHECK2-SAME: ) #[[ATTR0]] { // CHECK2-NEXT: [[ENTRY:.*:]] @@ -1494,7 +1808,7 @@ extern "C" void foo3() { // CHECK2-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK2: [[FOR_END]]: // CHECK2-NEXT: ret void // @@ -1503,9 +1817,13 @@ extern "C" void foo3() { // CHECK1: [[META4]] = !{!"llvm.loop.mustprogress"} // CHECK1: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} // CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +// CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} +// CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} //. // CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} // CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"} // CHECK2: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} // CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +// CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} +// CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} //. diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp index 50dedfd2c0dc6..2a2491d008a0b 100644 --- a/clang/test/OpenMP/fuse_messages.cpp +++ b/clang/test/OpenMP/fuse_messages.cpp @@ -33,6 +33,8 @@ void func() { { for (int i = 0; i < 7; ++i) ; + for(int j = 0; j < 100; ++j); + } @@ -41,6 +43,8 @@ void func() { { for (int i = 0; i < 7; ++i) ; + for(int j = 0; j < 100; ++j); + } //expected-error@+4 {{loop after '#pragma omp fuse' is not in canonical form}} @@ -50,6 +54,7 @@ void func() { for(int i = 0; i < 10; i*=2) { ; } + for(int j = 0; j < 100; ++j); } //expected-error@+2 {{loop sequence after '#pragma omp fuse' must contain at least 1 canonical loop or loop-generating construct}} @@ -73,4 +78,109 @@ void func() { for(unsigned int j = 0; j < 10; ++j); for(long long k = 0; k < 100; ++k); } -} \ No newline at end of file + + //expected-warning@+2 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + #pragma omp fuse + { + for(int i = 0; i < 10; ++i); + } + + //expected-warning@+1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + #pragma omp fuse looprange(1, 1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + } + + //expected-error@+1 {{argument to 'looprange' clause must be a strictly positive integer value}} + #pragma omp fuse looprange(1, -1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + } + + //expected-error@+1 {{argument to 'looprange' clause must be a strictly positive integer value}} + #pragma omp fuse looprange(1, 0) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + } + + const int x = 1; + constexpr int y = 4; + //expected-error@+1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '4' is greater than the total number of loops '3'}} + #pragma omp fuse looprange(x,y) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + + //expected-error@+1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '420' is greater than the total number of loops '3'}} + #pragma omp fuse looprange(1,420) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } +} + +// In a template context, but expression itself not instantiation-dependent +template +static void templated_func() { + + //expected-warning@+1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + #pragma omp fuse looprange(2,1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + + //expected-error@+1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '5' is greater than the total number of loops '3'}} + #pragma omp fuse looprange(3,3) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + +} + +template +static void templated_func_value_dependent() { + + //expected-warning@+1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + #pragma omp fuse looprange(V,1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } +} + +template +static void templated_func_type_dependent() { + constexpr T s = 1; + + //expected-error@+1 {{argument to 'looprange' clause must be a strictly positive integer value}} + #pragma omp fuse looprange(s,s-1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } +} + + +void template_inst() { + // expected-note@+1 {{in instantiation of function template specialization 'templated_func' requested here}} + templated_func(); + // expected-note@+1 {{in instantiation of function template specialization 'templated_func_value_dependent<1>' requested here}} + templated_func_value_dependent<1>(); + // expected-note@+1 {{in instantiation of function template specialization 'templated_func_type_dependent' requested here}} + templated_func_type_dependent(); + +} + + diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 3afa59b2f2d6c..deb6fdd68e6d3 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2417,6 +2417,11 @@ void OMPClauseEnqueue::VisitOMPPartialClause(const OMPPartialClause *C) { Visitor->AddStmt(C->getFactor()); } +void OMPClauseEnqueue::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) { + Visitor->AddStmt(C->getFirst()); + Visitor->AddStmt(C->getCount()); +} + void OMPClauseEnqueue::VisitOMPAllocatorClause(const OMPAllocatorClause *C) { Visitor->AddStmt(C->getAllocator()); } diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h index de888ff86fe91..a2f258bbf745b 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h @@ -1233,6 +1233,15 @@ struct WriteT { using EmptyTrait = std::true_type; }; +// V6: [6.4.7] Looprange clause +template struct LoopRangeT { + using Begin = E; + using End = E; + + using TupleTrait = std::true_type; + std::tuple t; +}; + // --- template @@ -1263,9 +1272,10 @@ using TupleClausesT = DefaultmapT, DeviceT, DistScheduleT, DoacrossT, FromT, GrainsizeT, IfT, InitT, InReductionT, - LastprivateT, LinearT, MapT, - NumTasksT, OrderT, ReductionT, - ScheduleT, TaskReductionT, ToT>; + LastprivateT, LinearT, LoopRangeT, + MapT, NumTasksT, OrderT, + ReductionT, ScheduleT, + TaskReductionT, ToT>; template using UnionClausesT = std::variant>; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 6352be8069e9e..989b35a7caa2a 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -274,6 +274,9 @@ def OMPC_Linear : Clause<[Spelling<"linear">]> { def OMPC_Link : Clause<[Spelling<"link">]> { let flangClass = "OmpObjectList"; } +def OMPC_LoopRange : Clause<[Spelling<"looprange">]> { + let clangClass = "OMPLoopRangeClause"; +} def OMPC_Map : Clause<[Spelling<"map">]> { let clangClass = "OMPMapClause"; let flangClass = "OmpMapClause"; @@ -856,6 +859,9 @@ def OMP_For : Directive<[Spelling<"for">]> { let languages = [L_C]; } def OMP_Fuse : Directive<[Spelling<"fuse">]> { + let allowedOnceClauses = [ + VersionedClause + ]; let association = AS_Loop; let category = CA_Executable; } From c76888b9dd1f516512d2d64afa4766adaad4de1e Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:30:39 +0000 Subject: [PATCH 03/11] Added fuse to documentation --- clang/docs/OpenMPSupport.rst | 2 ++ clang/docs/ReleaseNotes.rst | 1 + 2 files changed, 3 insertions(+) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 986aaabe1eed4..12325e3294a64 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -376,6 +376,8 @@ implementation. +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | loop stripe transformation | :good:`done` | https://github.com/llvm/llvm-project/pull/119891 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| loop fuse transformation | :good:`done` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | work distribute construct | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | task_iteration | :none:`unclaimed` | :none:`unclaimed` | | diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index dd748ab06873d..c75cb25a4db73 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1128,6 +1128,7 @@ OpenMP Support - Fixed mapping of arrays of structs containing nested structs with user defined mappers, by using compiler-generated default mappers for the outer structs for such maps. +- Added support for 'omp fuse' directive. Improvements ^^^^^^^^^^^^ From 1c054673983282e7e6afef0f11c2a7fbe39181d7 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:43:41 +0000 Subject: [PATCH 04/11] Refactored preinits handling and improved coverage --- clang/docs/OpenMPSupport.rst | 2 +- clang/include/clang/AST/StmtOpenMP.h | 5 +- clang/include/clang/Sema/SemaOpenMP.h | 96 +- clang/lib/AST/StmtOpenMP.cpp | 13 + clang/lib/Basic/OpenMPKinds.cpp | 3 +- clang/lib/CodeGen/CGExpr.cpp | 2 + clang/lib/CodeGen/CodeGenFunction.h | 4 + clang/lib/Sema/SemaOpenMP.cpp | 588 ++++--- clang/test/OpenMP/fuse_ast_print.cpp | 55 + clang/test/OpenMP/fuse_codegen.cpp | 2117 +++++++++++++++---------- 10 files changed, 1862 insertions(+), 1023 deletions(-) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 12325e3294a64..8d62208e55f75 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -376,7 +376,7 @@ implementation. +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | loop stripe transformation | :good:`done` | https://github.com/llvm/llvm-project/pull/119891 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| loop fuse transformation | :good:`done` | :none:`unclaimed` | | +| loop fuse transformation | :good:`prototyped` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | work distribute construct | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index f5115afd0753e..6425f6616a558 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -1005,8 +1005,7 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { Stmt::StmtClass C = T->getStmtClass(); return C == OMPTileDirectiveClass || C == OMPUnrollDirectiveClass || C == OMPReverseDirectiveClass || C == OMPInterchangeDirectiveClass || - C == OMPStripeDirectiveClass || - C == OMPFuseDirectiveClass; + C == OMPStripeDirectiveClass || C == OMPFuseDirectiveClass; } }; @@ -5653,6 +5652,8 @@ class OMPStripeDirective final : public OMPLoopTransformationDirective { llvm::omp::OMPD_stripe, StartLoc, EndLoc, NumLoops) { setNumGeneratedLoops(2 * NumLoops); + // Similar to Tile, it only generates a single top level loop nest + setNumGeneratedLoopNests(1); } void setPreInits(Stmt *PreInits) { diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index 0c28aaf6ab21a..547ea95c6cd5d 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -1494,16 +1494,96 @@ class SemaOpenMP : public SemaBase { SmallVectorImpl &LoopHelpers, Stmt *&Body, SmallVectorImpl> &OriginalInits); - /// Analyzes and checks a loop sequence for use by a loop transformation + /// @brief Categories of loops encountered during semantic OpenMP loop + /// analysis + /// + /// This enumeration identifies the structural category of a loop or sequence + /// of loops analyzed in the context of OpenMP transformations and directives. + /// This categorization helps differentiate between original source loops + /// and the structures resulting from applying OpenMP loop transformations. + enum class OMPLoopCategory { + + /// @var OMPLoopCategory::RegularLoop + /// Represents a standard canonical loop nest found in the + /// original source code or an intact loop after transformations + /// (i.e Post/Pre loops of a loopranged fusion) + RegularLoop, + + /// @var OMPLoopCategory::TransformSingleLoop + /// Represents the resulting loop structure when an OpenMP loop + // transformation, generates a single, top-level loop + TransformSingleLoop, + + /// @var OMPLoopCategory::TransformLoopSequence + /// Represents the resulting loop structure when an OpenMP loop + /// transformation + /// generates a sequence of two or more canonical loop nests + TransformLoopSequence + }; + + /// The main recursive process of `checkTransformableLoopSequence` that + /// performs grammatical parsing of a canonical loop sequence. It extracts + /// key information, such as the number of top-level loops, loop statements, + /// helper expressions, and other relevant loop-related data, all in a single + /// execution to avoid redundant traversals. This analysis flattens inner + /// Loop Sequences + /// + /// \param LoopSeqStmt The AST of the original statement. + /// \param LoopSeqSize [out] Number of top level canonical loops. + /// \param NumLoops [out] Number of total canonical loops (nested too). + /// \param LoopHelpers [out] The multiple loop analyses results. + /// \param ForStmts [out] The multiple Stmt of each For loop. + /// \param OriginalInits [out] The raw original initialization statements + /// of each belonging to a loop of the loop sequence + /// \param TransformPreInits [out] The multiple collection of statements and + /// declarations that must have been executed/declared + /// before entering the loop (each belonging to a + /// particular loop transformation, nullptr otherwise) + /// \param LoopSequencePreInits [out] Additional general collection of loop + /// transformation related statements and declarations + /// not bounded to a particular loop that must be + /// executed before entering the loop transformation + /// \param LoopCategories [out] A sequence of OMPLoopCategory values, + /// one for each loop or loop transformation node + /// successfully analyzed. + /// \param Context + /// \param Kind The loop transformation directive kind. + /// \return Whether the original statement is both syntactically and + /// semantically correct according to OpenMP 6.0 canonical loop + /// sequence definition. + bool analyzeLoopSequence( + Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops, + SmallVectorImpl &LoopHelpers, + SmallVectorImpl &ForStmts, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl &LoopCategories, ASTContext &Context, + OpenMPDirectiveKind Kind); + + /// Validates and checks whether a loop sequence can be transformed according + /// to the given directive, providing necessary setup and initialization + /// (Driver function) before recursion using `analyzeLoopSequence`. /// /// \param Kind The loop transformation directive kind. - /// \param NumLoops [out] Number of total canonical loops - /// \param LoopSeqSize [out] Number of top level canonical loops + /// \param AStmt The AST of the original statement + /// \param LoopSeqSize [out] Number of top level canonical loops. + /// \param NumLoops [out] Number of total canonical loops (nested too) /// \param LoopHelpers [out] The multiple loop analyses results. - /// \param LoopStmts [out] The multiple Stmt of each For loop. - /// \param OriginalInits [out] The multiple collection of statements and + /// \param ForStmts [out] The multiple Stmt of each For loop. + /// \param OriginalInits [out] The raw original initialization statements + /// of each belonging to a loop of the loop sequence + /// \param TransformsPreInits [out] The multiple collection of statements and /// declarations that must have been executed/declared - /// before entering the loop. + /// before entering the loop (each belonging to a + /// particular loop transformation, nullptr otherwise) + /// \param LoopSequencePreInits [out] Additional general collection of loop + /// transformation related statements and declarations + /// not bounded to a particular loop that must be + /// executed before entering the loop transformation + /// \param LoopCategories [out] A sequence of OMPLoopCategory values, + /// one for each loop or loop transformation node + /// successfully analyzed. /// \param Context /// \return Whether there was an absence of errors or not bool checkTransformableLoopSequence( @@ -1512,7 +1592,9 @@ class SemaOpenMP : public SemaBase { SmallVectorImpl &LoopHelpers, SmallVectorImpl &ForStmts, SmallVectorImpl> &OriginalInits, - ASTContext &Context); + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl &LoopCategories, ASTContext &Context); /// Helper to keep information about the current `omp begin/end declare /// variant` nesting. diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index c5a6732cc2217..f527e6361b5e5 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -457,6 +457,8 @@ OMPUnrollDirective::Create(const ASTContext &C, SourceLocation StartLoc, C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc); Dir->setNumGeneratedLoops(NumGeneratedLoops); // The number of generated loops and loop nests during unroll matches + // given that unroll only generates top level canonical loop nests + // so each generated loop is a top level canonical loop nest Dir->setNumGeneratedLoopNests(NumGeneratedLoops); Dir->setTransformedStmt(TransformedStmt); Dir->setPreInits(PreInits); @@ -520,6 +522,17 @@ OMPFuseDirective *OMPFuseDirective::Create( NumLoops); Dir->setTransformedStmt(TransformedStmt); Dir->setPreInits(PreInits); + // The number of top level canonical nests could + // not match the total number of generated loops + // Example: + // Before fusion: + // for (int i = 0; i < N; ++i) + // for (int j = 0; j < M; ++j) + // A[i][j] = i + j; + // + // for (int k = 0; k < P; ++k) + // B[k] = k * 2; + // Here, NumLoopNests = 2, but NumLoops = 3. Dir->setNumGeneratedLoopNests(NumLoopNests); Dir->setNumGeneratedLoops(NumLoops); return Dir; diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 18330181f1509..53a9f80e6d3b7 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -704,7 +704,8 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) { bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) { return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse || - DKind == OMPD_interchange || DKind == OMPD_stripe || DKind == OMPD_fuse; + DKind == OMPD_interchange || DKind == OMPD_stripe || + DKind == OMPD_fuse; } bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) { diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 6cb348ffdf55f..08049d4d4e37d 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -3257,6 +3257,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { // No other cases for now. } else { + llvm::dbgs() << "THE DAMN DECLREFEXPR HASN'T BEEN ENTERED IN LOCALDECLMAP\n"; + VD->dumpColor(); llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?"); } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index fe753e5b688b1..bfe24213ed377 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -5363,6 +5363,10 @@ class CodeGenFunction : public CodeGenTypeCache { /// Set the address of a local variable. void setAddrOfLocalVar(const VarDecl *VD, Address Addr) { + if (LocalDeclMap.count(VD)) { + llvm::errs() << "Warning: VarDecl already exists in map: "; + VD->dumpColor(); + } assert(!LocalDeclMap.count(VD) && "Decl already exists in LocalDeclMap!"); LocalDeclMap.insert({VD, Addr}); } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 3ec3f2ad31e78..3ce256f3ec23b 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -22,6 +22,7 @@ #include "clang/AST/DeclOpenMP.h" #include "clang/AST/DynamicRecursiveASTVisitor.h" #include "clang/AST/OpenMPClause.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/StmtVisitor.h" @@ -47,6 +48,7 @@ #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/IR/Assumptions.h" #include +#include using namespace clang; using namespace llvm::omp; @@ -14194,6 +14196,45 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective( getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } +// Overloaded base case function +template +static bool tryHandleAs(T *t, F &&) { + return false; +} + +/** + * Tries to recursively cast `t` to one of the given types and invokes `f` if successful. + * + * @tparam Class The first type to check. + * @tparam Rest The remaining types to check. + * @tparam T The base type of `t`. + * @tparam F The callable type for the function to invoke upon a successful cast. + * @param t The object to be checked. + * @param f The function to invoke if `t` matches `Class`. + * @return `true` if `t` matched any type and `f` was called, otherwise `false`. + */ +template +static bool tryHandleAs(T *t, F &&f) { + if (Class *c = dyn_cast(t)) { + f(c); + return true; + } else { + return tryHandleAs(t, std::forward(f)); + } +} + +// Updates OriginalInits by checking Transform against loop transformation +// directives and appending their pre-inits if a match is found. +static void updatePreInits(OMPLoopBasedDirective *Transform, + SmallVectorImpl> &PreInits) { + if (!tryHandleAs( + Transform, [&PreInits](auto *Dir) { + appendFlattenedStmtList(PreInits.back(), Dir->getPreInits()); + })) + llvm_unreachable("Unhandled loop transformation"); +} + bool SemaOpenMP::checkTransformableLoopNest( OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops, SmallVectorImpl &LoopHelpers, @@ -14224,121 +14265,106 @@ bool SemaOpenMP::checkTransformableLoopNest( return false; }, [&OriginalInits](OMPLoopBasedDirective *Transform) { - Stmt *DependentPreInits; - if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else - llvm_unreachable("Unhandled loop transformation"); - - appendFlattenedStmtList(OriginalInits.back(), DependentPreInits); + updatePreInits(Transform, OriginalInits); }); assert(OriginalInits.back().empty() && "No preinit after innermost loop"); OriginalInits.pop_back(); return Result; } -class NestedLoopCounterVisitor - : public clang::RecursiveASTVisitor { +// Counts the total number of nested loops, including the outermost loop (the +// original loop). PRECONDITION of this visitor is that it must be invoked from +// the original loop to be analyzed. The traversal is stop for Decl's and +// Expr's given that they may contain inner loops that must not be counted. +// +// Example AST structure for the code: +// +// int main() { +// #pragma omp fuse +// { +// for (int i = 0; i < 100; i++) { <-- Outer loop +// []() { +// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP +// }; +// for(int j = 0; j < 5; ++j) {} <-- Inner loop +// } +// for (int r = 0; i < 100; i++) { <-- Outer loop +// struct LocalClass { +// void bar() { +// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP +// } +// }; +// for(int k = 0; k < 10; ++k) {} <-- Inner loop +// {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP +// } +// } +// } +// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops +class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor { +private: + unsigned NestedLoopCount = 0; + public: - explicit NestedLoopCounterVisitor() : NestedLoopCount(0) {} + explicit NestedLoopCounterVisitor() {} - bool VisitForStmt(clang::ForStmt *FS) { - ++NestedLoopCount; - return true; + unsigned getNestedLoopCount() const { return NestedLoopCount; } + + bool VisitForStmt(ForStmt *FS) override { + ++NestedLoopCount; + return true; } - bool VisitCXXForRangeStmt(clang::CXXForRangeStmt *FRS) { - ++NestedLoopCount; - return true; + bool VisitCXXForRangeStmt(CXXForRangeStmt *FRS) override { + ++NestedLoopCount; + return true; } - unsigned getNestedLoopCount() const { return NestedLoopCount; } + bool TraverseStmt(Stmt *S) override { + if (!S) + return true; -private: - unsigned NestedLoopCount; + // Skip traversal of all expressions, including special cases like + // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions + // may contain inner statements (and even loops), but they are not part + // of the syntactic body of the surrounding loop structure. + // Therefore must not be counted + if (isa(S)) + return true; + + // Only recurse into CompoundStmt (block {}) and loop bodies + if (isa(S) || isa(S) || + isa(S)) { + return DynamicRecursiveASTVisitor::TraverseStmt(S); + } + + // Stop traversal of the rest of statements, that break perfect + // loop nesting, such as control flow (IfStmt, SwitchStmt...) + return true; + } + + bool TraverseDecl(Decl *D) override { + // Stop in the case of finding a declaration, it is not important + // in order to find nested loops (Possible CXXRecordDecl, RecordDecl, + // FunctionDecl...) + return true; + } }; -bool SemaOpenMP::checkTransformableLoopSequence( - OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize, - unsigned &NumLoops, +bool SemaOpenMP::analyzeLoopSequence( + Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops, SmallVectorImpl &LoopHelpers, SmallVectorImpl &ForStmts, SmallVectorImpl> &OriginalInits, - ASTContext &Context) { + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl &LoopCategories, ASTContext &Context, + OpenMPDirectiveKind Kind) { - // Checks whether the given statement is a compound statement VarsWithInheritedDSAType TmpDSA; - if (!isa(AStmt)) { - Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence) - << getOpenMPDirectiveName(Kind); - return false; - } - // Callback for updating pre-inits in case there are even more - // loop-sequence-generating-constructs inside of the main compound stmt - auto OnTransformationCallback = - [&OriginalInits](OMPLoopBasedDirective *Transform) { - Stmt *DependentPreInits; - if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else - llvm_unreachable("Unhandled loop transformation"); - - appendFlattenedStmtList(OriginalInits.back(), DependentPreInits); - }; - - // Number of top level canonical loop nests observed (And acts as index) - LoopSeqSize = 0; - // Number of total observed loops - NumLoops = 0; - - // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows - // the grammar: - // - // canonical-loop-sequence: - // { - // loop-sequence+ - // } - // where loop-sequence can be any of the following: - // 1. canonical-loop-sequence - // 2. loop-nest - // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective) - // - // To recognise and traverse this structure the following helper functions - // have been defined. handleLoopSequence serves as the recurisve entry point - // and tries to match the input AST to the canonical loop sequence grammar - // structure - - // Helper functions to validate canonical loop sequence grammar is valid - auto isLoopSequenceDerivation = [](auto *Child) { - return isa(Child) || isa(Child) || - isa(Child); - }; - auto isLoopGeneratingStmt = [](auto *Child) { - return isa(Child); - }; - + QualType BaseInductionVarType; // Helper Lambda to handle storing initialization and body statements for both // ForStmt and CXXForRangeStmt and checks for any possible mismatch between // induction variables types - QualType BaseInductionVarType; auto storeLoopStatements = [&OriginalInits, &ForStmts, &BaseInductionVarType, this, &Context](Stmt *LoopStmt) { if (auto *For = dyn_cast(LoopStmt)) { @@ -14361,33 +14387,35 @@ bool SemaOpenMP::checkTransformableLoopSequence( } } } - } else { - assert(isa(LoopStmt) && - "Expected canonical for or range-based for loops."); - auto *CXXFor = dyn_cast(LoopStmt); + auto *CXXFor = cast(LoopStmt); OriginalInits.back().push_back(CXXFor->getBeginStmt()); ForStmts.push_back(CXXFor); } }; + // Helper lambda functions to encapsulate the processing of different // derivations of the canonical loop sequence grammar // // Modularized code for handling loop generation and transformations - auto handleLoopGeneration = [&storeLoopStatements, &LoopHelpers, - &OriginalInits, &LoopSeqSize, &NumLoops, Kind, - &TmpDSA, &OnTransformationCallback, - this](Stmt *Child) { + auto analyzeLoopGeneration = [&storeLoopStatements, &LoopHelpers, + &OriginalInits, &TransformsPreInits, + &LoopCategories, &LoopSeqSize, &NumLoops, Kind, + &TmpDSA, &ForStmts, &Context, + &LoopSequencePreInits, this](Stmt *Child) { auto LoopTransform = dyn_cast(Child); Stmt *TransformedStmt = LoopTransform->getTransformedStmt(); unsigned NumGeneratedLoopNests = LoopTransform->getNumGeneratedLoopNests(); - + unsigned NumGeneratedLoops = LoopTransform->getNumGeneratedLoops(); // Handle the case where transformed statement is not available due to // dependent contexts if (!TransformedStmt) { - if (NumGeneratedLoopNests > 0) + if (NumGeneratedLoopNests > 0) { + LoopSeqSize += NumGeneratedLoopNests; + NumLoops += NumGeneratedLoops; return true; - // Unroll full + } + // Unroll full (0 loops produced) else { Diag(Child->getBeginLoc(), diag::err_omp_not_for) << 0 << getOpenMPDirectiveName(Kind); @@ -14400,38 +14428,56 @@ bool SemaOpenMP::checkTransformableLoopSequence( Diag(Child->getBeginLoc(), diag::err_omp_not_for) << 0 << getOpenMPDirectiveName(Kind); return false; - // Future loop transformations that generate multiple canonical loops - } else if (NumGeneratedLoopNests > 1) { - llvm_unreachable("Multiple canonical loop generating transformations " - "like loop splitting are not yet supported"); } + // Loop transformatons such as split or loopranged fuse + else if (NumGeneratedLoopNests > 1) { + // Get the preinits related to this loop sequence generating + // loop transformation (i.e loopranged fuse, split...) + LoopSequencePreInits.emplace_back(); + // These preinits differ slightly from regular inits/pre-inits related + // to single loop generating loop transformations (interchange, unroll) + // given that they are not bounded to a particular loop nest + // so they need to be treated independently + updatePreInits(LoopTransform, LoopSequencePreInits); + return analyzeLoopSequence(TransformedStmt, LoopSeqSize, NumLoops, + LoopHelpers, ForStmts, OriginalInits, + TransformsPreInits, LoopSequencePreInits, + LoopCategories, Context, Kind); + } + // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all) + else { + // Process the transformed loop statement + OriginalInits.emplace_back(); + TransformsPreInits.emplace_back(); + LoopHelpers.emplace_back(); + LoopCategories.push_back(OMPLoopCategory::TransformSingleLoop); + + unsigned IsCanonical = + checkOpenMPLoop(Kind, nullptr, nullptr, TransformedStmt, SemaRef, + *DSAStack, TmpDSA, LoopHelpers[LoopSeqSize]); + + if (!IsCanonical) { + Diag(TransformedStmt->getBeginLoc(), diag::err_omp_not_canonical_loop) + << getOpenMPDirectiveName(Kind); + return false; + } + storeLoopStatements(TransformedStmt); + updatePreInits(LoopTransform, TransformsPreInits); - // Process the transformed loop statement - Child = TransformedStmt; - OriginalInits.emplace_back(); - LoopHelpers.emplace_back(); - OnTransformationCallback(LoopTransform); - - unsigned IsCanonical = - checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack, - TmpDSA, LoopHelpers[LoopSeqSize]); - - if (!IsCanonical) { - Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop) - << getOpenMPDirectiveName(Kind); - return false; + NumLoops += NumGeneratedLoops; + ++LoopSeqSize; + return true; } - storeLoopStatements(TransformedStmt); - NumLoops += LoopTransform->getNumGeneratedLoops(); - return true; }; // Modularized code for handling regular canonical loops - auto handleRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits, - &LoopSeqSize, &NumLoops, Kind, &TmpDSA, - this](Stmt *Child) { + auto analyzeRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits, + &LoopSeqSize, &NumLoops, Kind, &TmpDSA, + &LoopCategories, this](Stmt *Child) { OriginalInits.emplace_back(); LoopHelpers.emplace_back(); + LoopCategories.push_back(OMPLoopCategory::RegularLoop); + unsigned IsCanonical = checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack, TmpDSA, LoopHelpers[LoopSeqSize]); @@ -14449,57 +14495,114 @@ bool SemaOpenMP::checkTransformableLoopSequence( return true; }; - // Helper function to process a Loop Sequence Recursively - auto handleLoopSequence = [&](Stmt *LoopSeqStmt, - auto &handleLoopSequenceCallback) -> bool { - for (auto *Child : LoopSeqStmt->children()) { - if (!Child) - continue; + // Helper functions to validate canonical loop sequence grammar is valid + auto isLoopSequenceDerivation = [](auto *Child) { + return isa(Child) || isa(Child) || + isa(Child); + }; + auto isLoopGeneratingStmt = [](auto *Child) { + return isa(Child); + }; + - // Skip over non-loop-sequence statements - if (!isLoopSequenceDerivation(Child)) { - Child = Child->IgnoreContainers(); + // High level grammar validation + for (auto *Child : LoopSeqStmt->children()) { - // Ignore empty compound statement if (!Child) - continue; + continue; - // In the case of a nested loop sequence ignoring containers would not - // be enough, a recurisve transversal of the loop sequence is required - if (isa(Child)) { - if (!handleLoopSequenceCallback(Child, handleLoopSequenceCallback)) - return false; - // Already been treated, skip this children - continue; + // Skip over non-loop-sequence statements + if (!isLoopSequenceDerivation(Child)) { + Child = Child->IgnoreContainers(); + + // Ignore empty compound statement + if (!Child) + continue; + + // In the case of a nested loop sequence ignoring containers would not + // be enough, a recurisve transversal of the loop sequence is required + if (isa(Child)) { + if (!analyzeLoopSequence(Child, LoopSeqSize, NumLoops, LoopHelpers, + ForStmts, OriginalInits, TransformsPreInits, + LoopSequencePreInits, LoopCategories, Context, + Kind)) + return false; + // Already been treated, skip this children + continue; + } + } + // Regular loop sequence handling + if (isLoopSequenceDerivation(Child)) { + if (isLoopGeneratingStmt(Child)) { + if (!analyzeLoopGeneration(Child)) { + return false; } + // analyzeLoopGeneration updates Loop Sequence size accordingly + + } else { + if (!analyzeRegularLoop(Child)) { + return false; + } + // Update the Loop Sequence size by one + ++LoopSeqSize; } - // Regular loop sequence handling - if (isLoopSequenceDerivation(Child)) { - if (isLoopGeneratingStmt(Child)) { - if (!handleLoopGeneration(Child)) { - return false; - } } else { - if (!handleRegularLoop(Child)) { - return false; - } + // Report error for invalid statement inside canonical loop sequence + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; } - ++LoopSeqSize; - } else { - // Report error for invalid statement inside canonical loop sequence - Diag(Child->getBeginLoc(), diag::err_omp_not_for) - << 0 << getOpenMPDirectiveName(Kind); + } + return true; +} + +bool SemaOpenMP::checkTransformableLoopSequence( + OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize, + unsigned &NumLoops, + SmallVectorImpl &LoopHelpers, + SmallVectorImpl &ForStmts, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl &LoopCategories, ASTContext &Context) { + + // Checks whether the given statement is a compound statement + if (!isa(AStmt)) { + Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence) + << getOpenMPDirectiveName(Kind); return false; - } - } - return true; - }; + } + // Number of top level canonical loop nests observed (And acts as index) + LoopSeqSize = 0; + // Number of total observed loops + NumLoops = 0; + + // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows + // the grammar: + // + // canonical-loop-sequence: + // { + // loop-sequence+ + // } + // where loop-sequence can be any of the following: + // 1. canonical-loop-sequence + // 2. loop-nest + // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective) + // + // To recognise and traverse this structure the following helper functions + // have been defined. analyzeLoopSequence serves as the recurisve entry point + // and tries to match the input AST to the canonical loop sequence grammar + // structure. This function will perform both a semantic and syntactical + // analysis of the given statement according to OpenMP 6.0 definition of + // the aforementioned canonical loop sequence // Recursive entry point to process the main loop sequence - if (!handleLoopSequence(AStmt, handleLoopSequence)) { - return false; + if (!analyzeLoopSequence(AStmt, LoopSeqSize, NumLoops, LoopHelpers, ForStmts, + OriginalInits, TransformsPreInits, + LoopSequencePreInits, LoopCategories, Context, + Kind)) { + return false; } - if (LoopSeqSize <= 0) { Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence) << getOpenMPDirectiveName(Kind); @@ -14531,9 +14634,7 @@ static void addLoopPreInits(ASTContext &Context, RangeEnd->getBeginLoc(), RangeEnd->getEndLoc())); } - llvm::append_range(PreInits, OriginalInit); - // List of OMPCapturedExprDecl, for __begin, __end, and NumIterations if (auto *PI = cast_or_null(LoopHelper.PreInits)) { PreInits.push_back(new (Context) DeclStmt( @@ -15214,7 +15315,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, Stmt *LoopStmt = nullptr; collectLoopStmts(AStmt, {LoopStmt}); - // Determine the PreInit declarations. + // Determine the PreInit declarations.e SmallVector PreInits; addLoopPreInits(Context, LoopHelper, LoopStmt, OriginalInits[0], PreInits); @@ -15781,28 +15882,35 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, if (!AStmt) { return StmtError(); } + + unsigned NumLoops = 1; + unsigned LoopSeqSize = 1; + + // Defer transformation in dependent contexts + // The NumLoopNests argument is set to a placeholder 1 (even though + // using looprange fuse could yield up to 3 top level loop nests) + // because a dependent context could prevent determining its true value + if (CurrContext->isDependentContext()) { + return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, + NumLoops, LoopSeqSize, AStmt, nullptr, + nullptr); + } + // Validate that the potential loop sequence is transformable for fusion // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops SmallVector LoopHelpers; SmallVector LoopStmts; SmallVector> OriginalInits; - - unsigned NumLoops; - unsigned LoopSeqSize; + SmallVector> TransformsPreInits; + SmallVector> LoopSequencePreInits; + SmallVector LoopCategories; if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops, LoopHelpers, LoopStmts, OriginalInits, - Context)) { + TransformsPreInits, LoopSequencePreInits, + LoopCategories, Context)) { return StmtError(); } - // Defer transformation in dependent contexts - // The NumLoopNests argument is set to a placeholder (0) - // because a dependent context could prevent determining its true value - if (CurrContext->isDependentContext()) { - return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, - NumLoops, 0, AStmt, nullptr, nullptr); - } - // Handle clauses, which can be any of the following: [looprange, apply] const OMPLoopRangeClause *LRC = OMPExecutableDirective::getSingleClause(Clauses); @@ -15864,11 +15972,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, "Expecting loop iteration space dimensionality to match number of " "affected loops"); - // PreInits hold a sequence of variable declarations that must be executed - // before the fused loop begins. These include bounds, strides, and other - // helper variables required for the transformation. - SmallVector PreInits; - // Select the type with the largest bit width among all induction variables QualType IVType = LoopHelpers[FirstVal - 1].IterationVarRef->getType(); for (unsigned int I = FirstVal; I < LastVal; ++I) { @@ -15880,7 +15983,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, uint64_t IVBitWidth = Context.getIntWidth(IVType); // Create pre-init declarations for all loops lower bounds, upper bounds, - // strides and num-iterations + // strides and num-iterations for every top level loop in the fusion SmallVector LBVarDecls; SmallVector STVarDecls; SmallVector NIVarDecls; @@ -15918,12 +16021,62 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, return std::make_pair(VD, DeclStmt); }; + // PreInits hold a sequence of variable declarations that must be executed + // before the fused loop begins. These include bounds, strides, and other + // helper variables required for the transformation. Other loop transforms + // also contain their own preinits + SmallVector PreInits; + // Iterator to keep track of loop transformations + unsigned int TransformIndex = 0; + + // Update the general preinits using the preinits generated by loop sequence + // generating loop transformations. These preinits differ slightly from + // single-loop transformation preinits, as they can be detached from a + // specific loop inside the multiple generated loop nests. This happens + // because certain helper variables, like '.omp.fuse.max', are introduced to + // handle fused iteration spaces and may not be directly tied to a single + // original loop. the preinit structure must ensure that hidden variables + // like '.omp.fuse.max' are still properly handled. + // Transformations that apply this concept: Loopranged Fuse, Split + if (!LoopSequencePreInits.empty()) { + for (const auto <PreInits : LoopSequencePreInits) { + if (!LTPreInits.empty()) { + llvm::append_range(PreInits, LTPreInits); + } + } + } + // Process each single loop to generate and collect declarations - // and statements for all helper expressions + // and statements for all helper expressions related to + // particular single loop nests + + // Also In the case of the fused loops, we keep track of their original + // inits by appending them to their preinits statement, and in the case of + // transformations, also append their preinits (which contain the original + // loop initialization statement or other statements) + + // Firstly we need to update TransformIndex to match the begining of the + // looprange section + for (unsigned int I = 0; I < FirstVal - 1; ++I) { + if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) + ++TransformIndex; + } for (unsigned int I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { - addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], - PreInits); + if (LoopCategories[I] == OMPLoopCategory::RegularLoop) { + addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], + PreInits); + } else if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) { + // For transformed loops, insert both pre-inits and original inits. + // Order matters: pre-inits may define variables used in the original + // inits such as upper bounds... + auto TransformPreInit = TransformsPreInits[TransformIndex++]; + if (!TransformPreInit.empty()) { + llvm::append_range(PreInits, TransformPreInit); + } + addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], + PreInits); + } auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", J); auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", J); auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", J); @@ -15942,7 +16095,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, NIVarDecls.push_back(NIVD); IVVarDecls.push_back(IVVD); - PreInits.push_back(UBDStmt.get()); PreInits.push_back(LBDStmt.get()); PreInits.push_back(STDStmt.get()); PreInits.push_back(NIDStmt.get()); @@ -16118,6 +16270,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, BodyStmts.push_back(IdxExpr.get()); llvm::append_range(BodyStmts, LoopHelpers[I].Updates); + // If the loop is a CXXForRangeStmt then the iterator variable is needed if (auto *SourceCXXFor = dyn_cast(LoopStmts[I])) BodyStmts.push_back(SourceCXXFor->getLoopVarStmt()); @@ -16152,21 +16305,50 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(), IncrExpr.get()->getEndLoc()); - // In the case of looprange, the result of fuse won't simply - // be a single loop (ForStmt), but rather a loop sequence - // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop - // and the post-fusion loops, preserving its original order. + // In the case of looprange, the result of fuse won't simply + // be a single loop (ForStmt), but rather a loop sequence + // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop + // and the post-fusion loops, preserving its original order. + // + // Note: If looprange clause produces a single fused loop nest then + // this compound statement wrapper is unnecessary (Therefore this + // treatment is skipped) + Stmt *FusionStmt = FusedForStmt; - if (LRC) { + if (LRC && CountVal != LoopSeqSize) { SmallVector FinalLoops; - // Gather all the pre-fusion loops - for (unsigned I = 0; I < FirstVal - 1; ++I) - FinalLoops.push_back(LoopStmts[I]); - // Gather the fused loop - FinalLoops.push_back(FusedForStmt); - // Gather all the post-fusion loops - for (unsigned I = FirstVal + CountVal - 1; I < LoopSeqSize; ++I) + // Reset the transform index + TransformIndex = 0; + + // Collect all non-fused loops before and after the fused region. + // Pre-fusion and post-fusion loops are inserted in order exploiting their + // symmetry, along with their corresponding transformation pre-inits if + // needed. The fused loop is added between the two regions. + for (unsigned I = 0; I < LoopSeqSize; ++I) { + if (I >= FirstVal - 1 && I < FirstVal + CountVal - 1) { + // Update the Transformation counter to skip already treated + // loop transformations + if (LoopCategories[I] != OMPLoopCategory::TransformSingleLoop) + ++TransformIndex; + continue; + } + + // No need to handle: + // Regular loops: they are kept intact as-is. + // Loop-sequence-generating transformations: already handled earlier. + // Only TransformSingleLoop requires inserting pre-inits here + + if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) { + auto TransformPreInit = TransformsPreInits[TransformIndex++]; + if (!TransformPreInit.empty()) { + llvm::append_range(PreInits, TransformPreInit); + } + } + FinalLoops.push_back(LoopStmts[I]); + } + + FinalLoops.insert(FinalLoops.begin() + (FirstVal - 1), FusedForStmt); FusionStmt = CompoundStmt::Create(Context, FinalLoops, FPOptionsOverride(), SourceLocation(), SourceLocation()); } diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp index ac4f0d38a9c68..9d85bd1172948 100644 --- a/clang/test/OpenMP/fuse_ast_print.cpp +++ b/clang/test/OpenMP/fuse_ast_print.cpp @@ -338,6 +338,61 @@ void tfoo9() { foo9<1, 2>(); } +// PRINT-LABEL: void foo10( +// DUMP-LABEL: FunctionDecl {{.*}} foo10 +void foo10() { + // PRINT: #pragma omp fuse looprange(2,2) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(2,2) + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int ii = 0; ii < 10; ii += 2) + // DUMP: ForStmt + for (int ii = 0; ii < 10; ii += 2) + // PRINT: body(ii) + // DUMP: CallExpr + body(ii); + // PRINT: #pragma omp fuse looprange(2,2) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(2,2) + { + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + // PRINT: for (int jj = 10; jj > 0; --jj) + // DUMP: ForStmt + for (int jj = 10; jj > 0; --jj) + // PRINT: body(jj) + // DUMP: CallExpr + body(jj); + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + // PRINT: for (int kk = 0; kk <= 10; ++kk) + // DUMP: ForStmt + for (int kk = 0; kk <= 10; ++kk) + // PRINT: body(kk) + // DUMP: CallExpr + body(kk); + } + } + +} diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp index d9500bed3ce31..742c280ed0172 100644 --- a/clang/test/OpenMP/fuse_codegen.cpp +++ b/clang/test/OpenMP/fuse_codegen.cpp @@ -65,6 +65,23 @@ extern "C" void foo4() { } } +// This exemplifies the usage of loop transformations that generate +// more than top level canonical loop nests (e.g split, loopranged fuse...) +extern "C" void foo5() { + double arr[256]; + #pragma omp fuse looprange(2,2) + { + #pragma omp fuse looprange(2,2) + { + for(int i = 0; i < 128; ++i) body(i); + for(int j = 0; j < 256; j+=2) body(j); + for(int k = 0; k < 512; ++k) body(k); + } + for(int c = 42; auto &&v: arr) body(c,v); + for(int cc = 37; auto &&vv: arr) body(cc, vv); + } +} + #endif // CHECK1-LABEL: define dso_local void @body( @@ -88,7 +105,6 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 @@ -97,7 +113,6 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -129,107 +144,103 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] // CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 // CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] // CHECK1: [[COND_TRUE]]: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 // CHECK1-NEXT: br label %[[COND_END:.*]] // CHECK1: [[COND_FALSE]]: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 // CHECK1-NEXT: br label %[[COND_END]] // CHECK1: [[COND_END]]: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK1-NEXT: br label %[[FOR_COND:.*]] // CHECK1: [[FOR_COND]]: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 -// CHECK1-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] // CHECK1: [[FOR_BODY]]: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] // CHECK1: [[IF_THEN]]: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]] // CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]] // CHECK1-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP35]]) +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP33]]) // CHECK1-NEXT: br label %[[IF_END]] // CHECK1: [[IF_END]]: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK1-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]] // CHECK1: [[IF_THEN22]]: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]] -// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]] // CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]] -// CHECK1-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]] // CHECK1-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP42]]) // CHECK1-NEXT: br label %[[IF_END27]] // CHECK1: [[IF_END27]]: // CHECK1-NEXT: br label %[[FOR_INC:.*]] // CHECK1: [[FOR_INC]]: -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP43]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK1: [[FOR_END]]: @@ -256,7 +267,6 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 @@ -265,7 +275,6 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -274,7 +283,6 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4 @@ -304,172 +312,166 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] // CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 // CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 -// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 -// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 -// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]] // CHECK1-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1 // CHECK1-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 -// CHECK1-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 -// CHECK1-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK1-NEXT: [[ADD28:%.*]] = add i32 [[TMP29]], 1 // CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]] // CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] // CHECK1: [[COND_TRUE]]: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 // CHECK1-NEXT: br label %[[COND_END:.*]] // CHECK1: [[COND_FALSE]]: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 // CHECK1-NEXT: br label %[[COND_END]] // CHECK1: [[COND_END]]: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 -// CHECK1-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]] // CHECK1-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]] // CHECK1: [[COND_TRUE30]]: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 // CHECK1-NEXT: br label %[[COND_END32:.*]] // CHECK1: [[COND_FALSE31]]: -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 // CHECK1-NEXT: br label %[[COND_END32]] // CHECK1: [[COND_END32]]: -// CHECK1-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ] +// CHECK1-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ] // CHECK1-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK1-NEXT: br label %[[FOR_COND:.*]] // CHECK1: [[FOR_COND]]: -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 -// CHECK1-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]] // CHECK1-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] // CHECK1: [[FOR_BODY]]: -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK1-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]] // CHECK1-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] // CHECK1: [[IF_THEN]]: -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]] -// CHECK1-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]] +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]] +// CHECK1-NEXT: [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]] // CHECK1-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]] -// CHECK1-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]] +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]] +// CHECK1-NEXT: [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]] // CHECK1-NEXT: store i32 [[ADD38]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP49]]) // CHECK1-NEXT: br label %[[IF_END]] // CHECK1: [[IF_END]]: -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK1-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]] // CHECK1-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]] // CHECK1: [[IF_THEN40]]: -// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 -// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]] -// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]] +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]] // CHECK1-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4 -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]] -// CHECK1-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]] +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]] +// CHECK1-NEXT: [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]] // CHECK1-NEXT: store i32 [[SUB44]], ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP61]]) +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP58]]) // CHECK1-NEXT: br label %[[IF_END45]] // CHECK1: [[IF_END45]]: -// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 -// CHECK1-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]] +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]] // CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] // CHECK1: [[IF_THEN47]]: -// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 -// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 -// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]] -// CHECK1-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]] +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]] +// CHECK1-NEXT: [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]] // CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4 -// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 -// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 -// CHECK1-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 -// CHECK1-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]] -// CHECK1-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]] +// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]] // CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP70]]) +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP67]]) // CHECK1-NEXT: br label %[[IF_END52]] // CHECK1: [[IF_END52]]: // CHECK1-NEXT: br label %[[FOR_INC:.*]] // CHECK1: [[FOR_INC]]: -// CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1 +// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP68]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: [[FOR_END]]: @@ -481,13 +483,11 @@ extern "C" void foo4() { // CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -497,48 +497,43 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[__END224:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__RANGE221:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END222:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN225:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 // CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 // CHECK1-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 // CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4 @@ -565,225 +560,219 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 -// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8 +// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 // CHECK1-NEXT: store i32 42, ptr [[C]], align 4 // CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 // CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8 -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 -// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64 +// CHECK1-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 // CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] // CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1 -// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1 -// CHECK1-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1 -// CHECK1-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 -// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8 -// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8 -// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 -// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1 -// CHECK1-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK1-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 // CHECK1-NEXT: store i32 37, ptr [[CC]], align 4 -// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 -// CHECK1-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256 -// CHECK1-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 -// CHECK1-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64 -// CHECK1-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64 -// CHECK1-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]] -// CHECK1-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8 -// CHECK1-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1 -// CHECK1-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1 -// CHECK1-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1 -// CHECK1-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1 -// CHECK1-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 -// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR24]], ptr [[__END222]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8 +// CHECK1-NEXT: store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]] +// CHECK1-NEXT: [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8 +// CHECK1-NEXT: [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1 +// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1 +// CHECK1-NEXT: [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1 +// CHECK1-NEXT: [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1 +// CHECK1-NEXT: store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 -// CHECK1-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1 -// CHECK1-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 -// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 -// CHECK1-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] -// CHECK1-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]] -// CHECK1: [[COND_TRUE44]]: -// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 -// CHECK1-NEXT: br label %[[COND_END46:.*]] -// CHECK1: [[COND_FALSE45]]: -// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 -// CHECK1-NEXT: br label %[[COND_END46]] -// CHECK1: [[COND_END46]]: -// CHECK1-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ] -// CHECK1-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 -// CHECK1-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]] -// CHECK1: [[COND_TRUE50]]: -// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 -// CHECK1-NEXT: br label %[[COND_END52:.*]] -// CHECK1: [[COND_FALSE51]]: -// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 -// CHECK1-NEXT: br label %[[COND_END52]] -// CHECK1: [[COND_END52]]: -// CHECK1-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ] -// CHECK1-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8 -// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8 +// CHECK1-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1 +// CHECK1-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]] +// CHECK1: [[COND_TRUE42]]: +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK1-NEXT: br label %[[COND_END44:.*]] +// CHECK1: [[COND_FALSE43]]: +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: br label %[[COND_END44]] +// CHECK1: [[COND_END44]]: +// CHECK1-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ] +// CHECK1-NEXT: store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]] +// CHECK1: [[COND_TRUE48]]: +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: br label %[[COND_END50:.*]] +// CHECK1: [[COND_FALSE49]]: +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: br label %[[COND_END50]] +// CHECK1: [[COND_END50]]: +// CHECK1-NEXT: [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ] +// CHECK1-NEXT: store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8 // CHECK1-NEXT: br label %[[FOR_COND:.*]] // CHECK1: [[FOR_COND]]: -// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8 -// CHECK1-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]] -// CHECK1-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK1-NEXT: [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] // CHECK1: [[FOR_BODY]]: -// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 -// CHECK1-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]] // CHECK1: [[IF_THEN]]: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4 -// CHECK1-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4 -// CHECK1-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64 -// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]] -// CHECK1-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]] -// CHECK1-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32 -// CHECK1-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4 -// CHECK1-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1 -// CHECK1-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]] -// CHECK1-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK1-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]] -// CHECK1-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]] -// CHECK1: [[IF_THEN64]]: -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]] -// CHECK1-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]] -// CHECK1-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 -// CHECK1-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1 -// CHECK1-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]] -// CHECK1-NEXT: store i32 [[ADD68]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP48]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: [[CONV55:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[CONV56:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]] +// CHECK1-NEXT: [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]] +// CHECK1-NEXT: [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32 +// CHECK1-NEXT: store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1 +// CHECK1-NEXT: [[ADD60:%.*]] = add nsw i32 0, [[MUL59]] +// CHECK1-NEXT: store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN62]]: +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]] +// CHECK1-NEXT: store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1 +// CHECK1-NEXT: [[ADD66:%.*]] = add nsw i32 0, [[MUL65]] +// CHECK1-NEXT: store i32 [[ADD66]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP45]]) // CHECK1-NEXT: br label %[[IF_END]] // CHECK1: [[IF_END]]: -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK1-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]] -// CHECK1-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]] -// CHECK1: [[IF_THEN70]]: -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]] -// CHECK1-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]] -// CHECK1-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 -// CHECK1-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2 -// CHECK1-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]] -// CHECK1-NEXT: store i32 [[ADD74]], ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP55]]) -// CHECK1-NEXT: br label %[[IF_END75]] -// CHECK1: [[IF_END75]]: -// CHECK1-NEXT: br label %[[IF_END76]] -// CHECK1: [[IF_END76]]: -// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 -// CHECK1-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]] -// CHECK1-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]] -// CHECK1: [[IF_THEN78]]: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]] -// CHECK1-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]] -// CHECK1-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8 -// CHECK1-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1 -// CHECK1-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]] -// CHECK1-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 -// CHECK1-NEXT: store ptr [[TMP63]], ptr [[V]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4 -// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]]) -// CHECK1-NEXT: br label %[[IF_END83]] -// CHECK1: [[IF_END83]]: -// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 -// CHECK1-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]] -// CHECK1-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]] -// CHECK1: [[IF_THEN85]]: -// CHECK1-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 -// CHECK1-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]] -// CHECK1-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]] -// CHECK1-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8 -// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 -// CHECK1-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 -// CHECK1-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1 -// CHECK1-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]] -// CHECK1-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8 -// CHECK1-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8 -// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4 -// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8 -// CHECK1-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]]) -// CHECK1-NEXT: br label %[[IF_END90]] -// CHECK1: [[IF_END90]]: +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]] +// CHECK1-NEXT: br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]] +// CHECK1: [[IF_THEN68]]: +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]] +// CHECK1-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]] +// CHECK1-NEXT: store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2 +// CHECK1-NEXT: [[ADD72:%.*]] = add nsw i32 0, [[MUL71]] +// CHECK1-NEXT: store i32 [[ADD72]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK1-NEXT: br label %[[IF_END73]] +// CHECK1: [[IF_END73]]: +// CHECK1-NEXT: br label %[[IF_END74]] +// CHECK1: [[IF_END74]]: +// CHECK1-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]] +// CHECK1: [[IF_THEN76]]: +// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]] +// CHECK1-NEXT: [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]] +// CHECK1-NEXT: store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1 +// CHECK1-NEXT: [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]] +// CHECK1-NEXT: store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP60]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]]) +// CHECK1-NEXT: br label %[[IF_END81]] +// CHECK1: [[IF_END81]]: +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]] +// CHECK1-NEXT: br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]] +// CHECK1: [[IF_THEN83]]: +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]] +// CHECK1-NEXT: [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]] +// CHECK1-NEXT: store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 +// CHECK1-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1 +// CHECK1-NEXT: [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]] +// CHECK1-NEXT: store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8 +// CHECK1-NEXT: store ptr [[TMP71]], ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]]) +// CHECK1-NEXT: br label %[[IF_END88]] +// CHECK1: [[IF_END88]]: // CHECK1-NEXT: br label %[[FOR_INC:.*]] // CHECK1: [[FOR_INC]]: -// CHECK1-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1 -// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP75]], 1 +// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8 // CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: [[FOR_END]]: // CHECK1-NEXT: ret void @@ -794,13 +783,11 @@ extern "C" void foo4() { // CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -815,12 +802,10 @@ extern "C" void foo4() { // CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 // CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 // CHECK1-NEXT: store i32 0, ptr [[K]], align 4 -// CHECK1-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 // CHECK1-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4 @@ -940,6 +925,277 @@ extern "C" void foo4() { // CHECK1-NEXT: ret void // // +// CHECK1-LABEL: define dso_local void @foo5( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE264:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN265:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END267:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: store i32 512, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK1-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]] +// CHECK1: [[COND_TRUE24]]: +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK1-NEXT: br label %[[COND_END26:.*]] +// CHECK1: [[COND_FALSE25]]: +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: br label %[[COND_END26]] +// CHECK1: [[COND_END26]]: +// CHECK1-NEXT: [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ] +// CHECK1-NEXT: store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128 +// CHECK1-NEXT: br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP21]]) +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND30:.*]] +// CHECK1: [[FOR_COND30]]: +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK1-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]] +// CHECK1: [[FOR_BODY32]]: +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: [[CONV34:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[CONV35:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]] +// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]] +// CHECK1-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 +// CHECK1-NEXT: store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]] +// CHECK1-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN41]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]] +// CHECK1-NEXT: store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2 +// CHECK1-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] +// CHECK1-NEXT: store i32 [[ADD45]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP37]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK1: [[IF_THEN47]]: +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]] +// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1 +// CHECK1-NEXT: [[ADD51:%.*]] = add nsw i32 0, [[MUL50]] +// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK1-NEXT: br label %[[IF_END52]] +// CHECK1: [[IF_END52]]: +// CHECK1-NEXT: br label %[[IF_END53]] +// CHECK1: [[IF_END53]]: +// CHECK1-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]] +// CHECK1-NEXT: br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]] +// CHECK1: [[IF_THEN55]]: +// CHECK1-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]] +// CHECK1-NEXT: [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]] +// CHECK1-NEXT: store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1 +// CHECK1-NEXT: [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]] +// CHECK1-NEXT: store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP52]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]]) +// CHECK1-NEXT: br label %[[IF_END60]] +// CHECK1: [[IF_END60]]: +// CHECK1-NEXT: br label %[[FOR_INC61:.*]] +// CHECK1: [[FOR_INC61]]: +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[INC62:%.*]] = add nsw i64 [[TMP56]], 1 +// CHECK1-NEXT: store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND30]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1: [[FOR_END63]]: +// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE264]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR69]], ptr [[__END267]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND70:.*]] +// CHECK1: [[FOR_COND70]]: +// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8 +// CHECK1-NEXT: [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]] +// CHECK1-NEXT: br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]] +// CHECK1: [[FOR_BODY72]]: +// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: store ptr [[TMP61]], ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]]) +// CHECK1-NEXT: br label %[[FOR_INC73:.*]] +// CHECK1: [[FOR_INC73]]: +// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1 +// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND70]] +// CHECK1: [[FOR_END74]]: +// CHECK1-NEXT: ret void +// +// // CHECK2-LABEL: define dso_local void @body( // CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: [[ENTRY:.*:]] @@ -961,7 +1217,6 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 @@ -970,7 +1225,6 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -1002,107 +1256,103 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] // CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 // CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] // CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] // CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 // CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 // CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]] // CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] // CHECK2: [[COND_TRUE]]: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 // CHECK2-NEXT: br label %[[COND_END:.*]] // CHECK2: [[COND_FALSE]]: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 // CHECK2-NEXT: br label %[[COND_END]] // CHECK2: [[COND_END]]: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK2-NEXT: br label %[[FOR_COND:.*]] // CHECK2: [[FOR_COND]]: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 -// CHECK2-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] // CHECK2: [[FOR_BODY]]: -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] // CHECK2: [[IF_THEN]]: -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]] -// CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]] +// CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]] // CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]] -// CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]] +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]] // CHECK2-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP35]]) +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP33]]) // CHECK2-NEXT: br label %[[IF_END]] // CHECK2: [[IF_END]]: -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK2-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]] // CHECK2-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]] // CHECK2: [[IF_THEN22]]: -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]] -// CHECK2-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]] // CHECK2-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]] -// CHECK2-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]] +// CHECK2-NEXT: [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]] // CHECK2-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP42]]) // CHECK2-NEXT: br label %[[IF_END27]] // CHECK2: [[IF_END27]]: // CHECK2-NEXT: br label %[[FOR_INC:.*]] // CHECK2: [[FOR_INC]]: -// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP43]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK2: [[FOR_END]]: @@ -1114,13 +1364,11 @@ extern "C" void foo4() { // CHECK2-NEXT: [[ENTRY:.*:]] // CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -1130,48 +1378,43 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[__END224:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__RANGE221:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END222:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN225:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 // CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 // CHECK2-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 // CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4 @@ -1198,225 +1441,219 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4 -// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4 -// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 -// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8 +// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 // CHECK2-NEXT: store i32 42, ptr [[C]], align 4 // CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 // CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 // CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 // CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK2-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK2-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8 -// CHECK2-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 -// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64 +// CHECK2-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 // CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] // CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 -// CHECK2-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1 -// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1 -// CHECK2-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1 -// CHECK2-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 -// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8 -// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8 -// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 -// CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1 -// CHECK2-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8 +// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK2-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 // CHECK2-NEXT: store i32 37, ptr [[CC]], align 4 -// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8 -// CHECK2-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 -// CHECK2-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256 -// CHECK2-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8 -// CHECK2-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8 -// CHECK2-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8 -// CHECK2-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 -// CHECK2-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64 -// CHECK2-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64 -// CHECK2-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]] -// CHECK2-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8 -// CHECK2-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1 -// CHECK2-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1 -// CHECK2-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1 -// CHECK2-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1 -// CHECK2-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 -// CHECK2-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR24]], ptr [[__END222]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8 +// CHECK2-NEXT: store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]] +// CHECK2-NEXT: [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8 +// CHECK2-NEXT: [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1 +// CHECK2-NEXT: [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1 +// CHECK2-NEXT: [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1 +// CHECK2-NEXT: [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1 +// CHECK2-NEXT: store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8 // CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8 // CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 -// CHECK2-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1 -// CHECK2-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8 -// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 -// CHECK2-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 -// CHECK2-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] -// CHECK2-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]] -// CHECK2: [[COND_TRUE44]]: -// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 -// CHECK2-NEXT: br label %[[COND_END46:.*]] -// CHECK2: [[COND_FALSE45]]: -// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 -// CHECK2-NEXT: br label %[[COND_END46]] -// CHECK2: [[COND_END46]]: -// CHECK2-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ] -// CHECK2-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8 -// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 -// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 -// CHECK2-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]] -// CHECK2-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]] -// CHECK2: [[COND_TRUE50]]: -// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 -// CHECK2-NEXT: br label %[[COND_END52:.*]] -// CHECK2: [[COND_FALSE51]]: -// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 -// CHECK2-NEXT: br label %[[COND_END52]] -// CHECK2: [[COND_END52]]: -// CHECK2-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ] -// CHECK2-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8 -// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8 +// CHECK2-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1 +// CHECK2-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]] +// CHECK2: [[COND_TRUE42]]: +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK2-NEXT: br label %[[COND_END44:.*]] +// CHECK2: [[COND_FALSE43]]: +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: br label %[[COND_END44]] +// CHECK2: [[COND_END44]]: +// CHECK2-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ] +// CHECK2-NEXT: store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]] +// CHECK2: [[COND_TRUE48]]: +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: br label %[[COND_END50:.*]] +// CHECK2: [[COND_FALSE49]]: +// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: br label %[[COND_END50]] +// CHECK2: [[COND_END50]]: +// CHECK2-NEXT: [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ] +// CHECK2-NEXT: store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8 // CHECK2-NEXT: br label %[[FOR_COND:.*]] // CHECK2: [[FOR_COND]]: -// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8 -// CHECK2-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]] -// CHECK2-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK2-NEXT: [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] // CHECK2: [[FOR_BODY]]: -// CHECK2-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 -// CHECK2-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]] -// CHECK2-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]] +// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]] // CHECK2: [[IF_THEN]]: -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4 -// CHECK2-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4 -// CHECK2-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64 -// CHECK2-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]] -// CHECK2-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]] -// CHECK2-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32 -// CHECK2-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4 -// CHECK2-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1 -// CHECK2-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]] -// CHECK2-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK2-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]] -// CHECK2-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]] -// CHECK2: [[IF_THEN64]]: -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 -// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]] -// CHECK2-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]] -// CHECK2-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4 -// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 -// CHECK2-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1 -// CHECK2-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]] -// CHECK2-NEXT: store i32 [[ADD68]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP48]]) +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: [[CONV55:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[CONV56:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]] +// CHECK2-NEXT: [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]] +// CHECK2-NEXT: [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32 +// CHECK2-NEXT: store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1 +// CHECK2-NEXT: [[ADD60:%.*]] = add nsw i32 0, [[MUL59]] +// CHECK2-NEXT: store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN62]]: +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]] +// CHECK2-NEXT: store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1 +// CHECK2-NEXT: [[ADD66:%.*]] = add nsw i32 0, [[MUL65]] +// CHECK2-NEXT: store i32 [[ADD66]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP45]]) // CHECK2-NEXT: br label %[[IF_END]] // CHECK2: [[IF_END]]: -// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK2-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]] -// CHECK2-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]] -// CHECK2: [[IF_THEN70]]: -// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 -// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 -// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]] -// CHECK2-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]] -// CHECK2-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4 -// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 -// CHECK2-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2 -// CHECK2-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]] -// CHECK2-NEXT: store i32 [[ADD74]], ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP55]]) -// CHECK2-NEXT: br label %[[IF_END75]] -// CHECK2: [[IF_END75]]: -// CHECK2-NEXT: br label %[[IF_END76]] -// CHECK2: [[IF_END76]]: -// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 -// CHECK2-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]] -// CHECK2-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]] -// CHECK2: [[IF_THEN78]]: -// CHECK2-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8 -// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8 -// CHECK2-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]] -// CHECK2-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]] -// CHECK2-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8 -// CHECK2-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK2-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8 -// CHECK2-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1 -// CHECK2-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]] -// CHECK2-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8 -// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 -// CHECK2-NEXT: store ptr [[TMP63]], ptr [[V]], align 8 -// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4 -// CHECK2-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8 -// CHECK2-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]]) -// CHECK2-NEXT: br label %[[IF_END83]] -// CHECK2: [[IF_END83]]: -// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 -// CHECK2-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]] -// CHECK2-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]] -// CHECK2: [[IF_THEN85]]: -// CHECK2-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 -// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 -// CHECK2-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]] -// CHECK2-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]] -// CHECK2-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8 -// CHECK2-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 -// CHECK2-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 -// CHECK2-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1 -// CHECK2-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]] -// CHECK2-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8 -// CHECK2-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8 -// CHECK2-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8 -// CHECK2-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4 -// CHECK2-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8 -// CHECK2-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]]) -// CHECK2-NEXT: br label %[[IF_END90]] -// CHECK2: [[IF_END90]]: +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]] +// CHECK2-NEXT: br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]] +// CHECK2: [[IF_THEN68]]: +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]] +// CHECK2-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]] +// CHECK2-NEXT: store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2 +// CHECK2-NEXT: [[ADD72:%.*]] = add nsw i32 0, [[MUL71]] +// CHECK2-NEXT: store i32 [[ADD72]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK2-NEXT: br label %[[IF_END73]] +// CHECK2: [[IF_END73]]: +// CHECK2-NEXT: br label %[[IF_END74]] +// CHECK2: [[IF_END74]]: +// CHECK2-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]] +// CHECK2-NEXT: br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]] +// CHECK2: [[IF_THEN76]]: +// CHECK2-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]] +// CHECK2-NEXT: [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]] +// CHECK2-NEXT: store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1 +// CHECK2-NEXT: [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]] +// CHECK2-NEXT: store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP60]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP62:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]]) +// CHECK2-NEXT: br label %[[IF_END81]] +// CHECK2: [[IF_END81]]: +// CHECK2-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]] +// CHECK2-NEXT: br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]] +// CHECK2: [[IF_THEN83]]: +// CHECK2-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 +// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 +// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]] +// CHECK2-NEXT: [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]] +// CHECK2-NEXT: store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8 +// CHECK2-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 +// CHECK2-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1 +// CHECK2-NEXT: [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]] +// CHECK2-NEXT: store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8 +// CHECK2-NEXT: [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8 +// CHECK2-NEXT: store ptr [[TMP71]], ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP72:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK2-NEXT: [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]]) +// CHECK2-NEXT: br label %[[IF_END88]] +// CHECK2: [[IF_END88]]: // CHECK2-NEXT: br label %[[FOR_INC:.*]] // CHECK2: [[FOR_INC]]: -// CHECK2-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1 -// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP75]], 1 +// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8 // CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK2: [[FOR_END]]: // CHECK2-NEXT: ret void @@ -1427,13 +1664,11 @@ extern "C" void foo4() { // CHECK2-NEXT: [[ENTRY:.*:]] // CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -1448,12 +1683,10 @@ extern "C" void foo4() { // CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 // CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 // CHECK2-NEXT: store i32 0, ptr [[K]], align 4 -// CHECK2-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 // CHECK2-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4 @@ -1573,6 +1806,277 @@ extern "C" void foo4() { // CHECK2-NEXT: ret void // // +// CHECK2-LABEL: define dso_local void @foo5( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE264:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN265:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END267:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: store i32 512, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK2-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]] +// CHECK2: [[COND_TRUE24]]: +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK2-NEXT: br label %[[COND_END26:.*]] +// CHECK2: [[COND_FALSE25]]: +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: br label %[[COND_END26]] +// CHECK2: [[COND_END26]]: +// CHECK2-NEXT: [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ] +// CHECK2-NEXT: store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128 +// CHECK2-NEXT: br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP21]]) +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND30:.*]] +// CHECK2: [[FOR_COND30]]: +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK2-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]] +// CHECK2: [[FOR_BODY32]]: +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: [[CONV34:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[CONV35:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]] +// CHECK2-NEXT: [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]] +// CHECK2-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 +// CHECK2-NEXT: store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1 +// CHECK2-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]] +// CHECK2-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN41]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]] +// CHECK2-NEXT: store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2 +// CHECK2-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] +// CHECK2-NEXT: store i32 [[ADD45]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP37]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK2: [[IF_THEN47]]: +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]] +// CHECK2-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]] +// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1 +// CHECK2-NEXT: [[ADD51:%.*]] = add nsw i32 0, [[MUL50]] +// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK2-NEXT: br label %[[IF_END52]] +// CHECK2: [[IF_END52]]: +// CHECK2-NEXT: br label %[[IF_END53]] +// CHECK2: [[IF_END53]]: +// CHECK2-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]] +// CHECK2-NEXT: br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]] +// CHECK2: [[IF_THEN55]]: +// CHECK2-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]] +// CHECK2-NEXT: [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]] +// CHECK2-NEXT: store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1 +// CHECK2-NEXT: [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]] +// CHECK2-NEXT: store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP52]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]]) +// CHECK2-NEXT: br label %[[IF_END60]] +// CHECK2: [[IF_END60]]: +// CHECK2-NEXT: br label %[[FOR_INC61:.*]] +// CHECK2: [[FOR_INC61]]: +// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[INC62:%.*]] = add nsw i64 [[TMP56]], 1 +// CHECK2-NEXT: store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND30]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2: [[FOR_END63]]: +// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE264]], align 8 +// CHECK2-NEXT: [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR69]], ptr [[__END267]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND70:.*]] +// CHECK2: [[FOR_COND70]]: +// CHECK2-NEXT: [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8 +// CHECK2-NEXT: [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]] +// CHECK2-NEXT: br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]] +// CHECK2: [[FOR_BODY72]]: +// CHECK2-NEXT: [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: store ptr [[TMP61]], ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]]) +// CHECK2-NEXT: br label %[[FOR_INC73:.*]] +// CHECK2: [[FOR_INC73]]: +// CHECK2-NEXT: [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1 +// CHECK2-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND70]] +// CHECK2: [[FOR_END74]]: +// CHECK2-NEXT: ret void +// +// // CHECK2-LABEL: define dso_local void @tfoo2( // CHECK2-SAME: ) #[[ATTR0]] { // CHECK2-NEXT: [[ENTRY:.*:]] @@ -1593,7 +2097,6 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 @@ -1602,7 +2105,6 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -1611,7 +2113,6 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4 @@ -1641,174 +2142,168 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] // CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 // CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] // CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] // CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 // CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 // CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK2-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 -// CHECK2-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 -// CHECK2-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]] -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 -// CHECK2-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]] +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]] // CHECK2-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1 // CHECK2-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 -// CHECK2-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 -// CHECK2-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK2-NEXT: [[ADD28:%.*]] = add i32 [[TMP29]], 1 // CHECK2-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK2-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]] // CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] // CHECK2: [[COND_TRUE]]: -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 // CHECK2-NEXT: br label %[[COND_END:.*]] // CHECK2: [[COND_FALSE]]: -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 // CHECK2-NEXT: br label %[[COND_END]] // CHECK2: [[COND_END]]: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 -// CHECK2-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]] // CHECK2-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]] // CHECK2: [[COND_TRUE30]]: -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 // CHECK2-NEXT: br label %[[COND_END32:.*]] // CHECK2: [[COND_FALSE31]]: -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 // CHECK2-NEXT: br label %[[COND_END32]] // CHECK2: [[COND_END32]]: -// CHECK2-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ] +// CHECK2-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ] // CHECK2-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK2-NEXT: br label %[[FOR_COND:.*]] // CHECK2: [[FOR_COND]]: -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 -// CHECK2-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]] // CHECK2-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] // CHECK2: [[FOR_BODY]]: -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK2-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]] +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]] // CHECK2-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] // CHECK2: [[IF_THEN]]: -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 -// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]] -// CHECK2-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]] +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]] +// CHECK2-NEXT: [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]] // CHECK2-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4 -// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 -// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]] -// CHECK2-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]] +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]] +// CHECK2-NEXT: [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]] // CHECK2-NEXT: store i32 [[ADD38]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP49]]) // CHECK2-NEXT: br label %[[IF_END]] // CHECK2: [[IF_END]]: -// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK2-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]] +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]] // CHECK2-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]] // CHECK2: [[IF_THEN40]]: -// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 -// CHECK2-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 -// CHECK2-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]] -// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]] +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]] +// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]] // CHECK2-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4 -// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 -// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]] -// CHECK2-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]] +// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]] +// CHECK2-NEXT: [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]] // CHECK2-NEXT: store i32 [[SUB44]], ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP61]]) +// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP58]]) // CHECK2-NEXT: br label %[[IF_END45]] // CHECK2: [[IF_END45]]: -// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 -// CHECK2-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]] +// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]] // CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] // CHECK2: [[IF_THEN47]]: -// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 -// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 -// CHECK2-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]] -// CHECK2-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]] +// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 +// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]] +// CHECK2-NEXT: [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]] // CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4 -// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 -// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 -// CHECK2-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 -// CHECK2-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]] -// CHECK2-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]] +// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 +// CHECK2-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]] +// CHECK2-NEXT: [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]] // CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 -// CHECK2-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP70]]) +// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP67]]) // CHECK2-NEXT: br label %[[IF_END52]] // CHECK2: [[IF_END52]]: // CHECK2-NEXT: br label %[[FOR_INC:.*]] // CHECK2: [[FOR_INC]]: -// CHECK2-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1 +// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP68]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: [[FOR_END]]: // CHECK2-NEXT: ret void // @@ -1819,6 +2314,8 @@ extern "C" void foo4() { // CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} // CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} // CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} +// CHECK1: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]} +// CHECK1: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]} //. // CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} // CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"} @@ -1826,4 +2323,6 @@ extern "C" void foo4() { // CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} // CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} // CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} +// CHECK2: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]} +// CHECK2: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]} //. From 860fcd94d930c9644b4d0427471f2873e7afcf8b Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:44:48 +0000 Subject: [PATCH 05/11] Fixed missing diagnostic groups in warnings --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 002aa7a774fbe..e85cd32d78b5c 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11613,7 +11613,8 @@ def note_omp_implicit_dsa : Note< def err_omp_loop_var_dsa : Error< "loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">; def warn_omp_different_loop_ind_var_types : Warning < - "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">; + "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">, + InGroup; def err_omp_not_canonical_loop : Error < "loop after '#pragma omp %0' is not in canonical form">; def err_omp_not_a_loop_sequence : Error < @@ -11624,7 +11625,8 @@ def err_omp_invalid_looprange : Error < "loop range in '#pragma omp %0' exceeds the number of available loops: " "range end '%1' is greater than the total number of loops '%2'">; def warn_omp_redundant_fusion : Warning < - "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">; + "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">, + InGroup; def err_omp_not_for : Error< "%select{statement after '#pragma omp %1' must be a for loop|" "expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">; From 65cbfeb945e6b8016696906db43dd590adb285b2 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:49:50 +0000 Subject: [PATCH 06/11] Fixed formatting and comments --- clang/lib/Sema/SemaOpenMP.cpp | 112 ++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 54 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 3ce256f3ec23b..2985b256cf153 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14197,42 +14197,43 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective( } // Overloaded base case function -template -static bool tryHandleAs(T *t, F &&) { - return false; +template static bool tryHandleAs(T *t, F &&) { + return false; } /** - * Tries to recursively cast `t` to one of the given types and invokes `f` if successful. + * Tries to recursively cast `t` to one of the given types and invokes `f` if + * successful. * * @tparam Class The first type to check. * @tparam Rest The remaining types to check. * @tparam T The base type of `t`. - * @tparam F The callable type for the function to invoke upon a successful cast. + * @tparam F The callable type for the function to invoke upon a successful + * cast. * @param t The object to be checked. * @param f The function to invoke if `t` matches `Class`. * @return `true` if `t` matched any type and `f` was called, otherwise `false`. */ template static bool tryHandleAs(T *t, F &&f) { - if (Class *c = dyn_cast(t)) { - f(c); - return true; - } else { - return tryHandleAs(t, std::forward(f)); - } + if (Class *c = dyn_cast(t)) { + f(c); + return true; + } else { + return tryHandleAs(t, std::forward(f)); + } } // Updates OriginalInits by checking Transform against loop transformation // directives and appending their pre-inits if a match is found. static void updatePreInits(OMPLoopBasedDirective *Transform, SmallVectorImpl> &PreInits) { - if (!tryHandleAs( - Transform, [&PreInits](auto *Dir) { - appendFlattenedStmtList(PreInits.back(), Dir->getPreInits()); - })) - llvm_unreachable("Unhandled loop transformation"); + if (!tryHandleAs( + Transform, [&PreInits](auto *Dir) { + appendFlattenedStmtList(PreInits.back(), Dir->getPreInits()); + })) + llvm_unreachable("Unhandled loop transformation"); } bool SemaOpenMP::checkTransformableLoopNest( @@ -14310,43 +14311,42 @@ class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor { unsigned getNestedLoopCount() const { return NestedLoopCount; } bool VisitForStmt(ForStmt *FS) override { - ++NestedLoopCount; - return true; + ++NestedLoopCount; + return true; } bool VisitCXXForRangeStmt(CXXForRangeStmt *FRS) override { - ++NestedLoopCount; - return true; + ++NestedLoopCount; + return true; } bool TraverseStmt(Stmt *S) override { - if (!S) + if (!S) return true; - // Skip traversal of all expressions, including special cases like - // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions - // may contain inner statements (and even loops), but they are not part - // of the syntactic body of the surrounding loop structure. - // Therefore must not be counted - if (isa(S)) + // Skip traversal of all expressions, including special cases like + // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions + // may contain inner statements (and even loops), but they are not part + // of the syntactic body of the surrounding loop structure. + // Therefore must not be counted + if (isa(S)) return true; - // Only recurse into CompoundStmt (block {}) and loop bodies - if (isa(S) || isa(S) || - isa(S)) { + // Only recurse into CompoundStmt (block {}) and loop bodies + if (isa(S) || isa(S) || isa(S)) { return DynamicRecursiveASTVisitor::TraverseStmt(S); - } + } - // Stop traversal of the rest of statements, that break perfect - // loop nesting, such as control flow (IfStmt, SwitchStmt...) - return true; + // Stop traversal of the rest of statements, that break perfect + // loop nesting, such as control flow (IfStmt, SwitchStmt...) + return true; } bool TraverseDecl(Decl *D) override { - // Stop in the case of finding a declaration, it is not important - // in order to find nested loops (Possible CXXRecordDecl, RecordDecl, - // FunctionDecl...) - return true; + // Stop in the case of finding a declaration, it is not important + // in order to find nested loops (Possible CXXRecordDecl, RecordDecl, + // FunctionDecl...) + return true; } }; @@ -14504,15 +14504,14 @@ bool SemaOpenMP::analyzeLoopSequence( return isa(Child); }; - // High level grammar validation for (auto *Child : LoopSeqStmt->children()) { - if (!Child) + if (!Child) continue; - // Skip over non-loop-sequence statements - if (!isLoopSequenceDerivation(Child)) { + // Skip over non-loop-sequence statements + if (!isLoopSequenceDerivation(Child)) { Child = Child->IgnoreContainers(); // Ignore empty compound statement @@ -14530,9 +14529,9 @@ bool SemaOpenMP::analyzeLoopSequence( // Already been treated, skip this children continue; } - } - // Regular loop sequence handling - if (isLoopSequenceDerivation(Child)) { + } + // Regular loop sequence handling + if (isLoopSequenceDerivation(Child)) { if (isLoopGeneratingStmt(Child)) { if (!analyzeLoopGeneration(Child)) { return false; @@ -14546,12 +14545,12 @@ bool SemaOpenMP::analyzeLoopSequence( // Update the Loop Sequence size by one ++LoopSeqSize; } - } else { + } else { // Report error for invalid statement inside canonical loop sequence Diag(Child->getBeginLoc(), diag::err_omp_not_for) << 0 << getOpenMPDirectiveName(Kind); return false; - } + } } return true; } @@ -14568,9 +14567,9 @@ bool SemaOpenMP::checkTransformableLoopSequence( // Checks whether the given statement is a compound statement if (!isa(AStmt)) { - Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence) - << getOpenMPDirectiveName(Kind); - return false; + Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence) + << getOpenMPDirectiveName(Kind); + return false; } // Number of top level canonical loop nests observed (And acts as index) LoopSeqSize = 0; @@ -14601,7 +14600,7 @@ bool SemaOpenMP::checkTransformableLoopSequence( OriginalInits, TransformsPreInits, LoopSequencePreInits, LoopCategories, Context, Kind)) { - return false; + return false; } if (LoopSeqSize <= 0) { Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence) @@ -15315,7 +15314,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, Stmt *LoopStmt = nullptr; collectLoopStmts(AStmt, {LoopStmt}); - // Determine the PreInit declarations.e + // Determine the PreInit declarations. SmallVector PreInits; addLoopPreInits(Context, LoopHelper, LoopStmt, OriginalInits[0], PreInits); @@ -15931,13 +15930,18 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, CountVal = CountInt.getZExtValue(); }; - // Checks if the loop range is valid + // OpenMP [6.0, Restrictions] + // first + count - 1 must not evaluate to a value greater than the + // loop sequence length of the associated canonical loop sequence. auto ValidLoopRange = [](uint64_t FirstVal, uint64_t CountVal, unsigned NumLoops) -> bool { return FirstVal + CountVal - 1 <= NumLoops; }; uint64_t FirstVal = 1, CountVal = 0, LastVal = LoopSeqSize; + // Validates the loop range after evaluating the semantic information + // and ensures that the range is valid for the given loop sequence size. + // Expressions are evaluated at compile time to obtain constant values. if (LRC) { EvaluateLoopRangeArguments(LRC->getFirst(), LRC->getCount(), FirstVal, CountVal); From b0fb1b3e26f1d9ceaac4495dcfad84f54f96d2a2 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:58:54 +0000 Subject: [PATCH 07/11] Added minimal changes to enable flang future implementation --- flang/include/flang/Parser/dump-parse-tree.h | 1 + flang/include/flang/Parser/parse-tree.h | 9 +++++++++ flang/lib/Lower/OpenMP/Clauses.cpp | 5 +++++ flang/lib/Lower/OpenMP/Clauses.h | 1 + flang/lib/Parser/openmp-parsers.cpp | 7 +++++++ flang/lib/Parser/unparse.cpp | 7 +++++++ flang/lib/Semantics/check-omp-structure.cpp | 9 +++++++++ llvm/include/llvm/Frontend/OpenMP/OMP.td | 1 + 8 files changed, 40 insertions(+) diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index e3eed6aed8079..76aa3f7b90156 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -600,6 +600,7 @@ class ParseTreeDumper { NODE(OmpLinearClause, Modifier) NODE(parser, OmpLinearModifier) NODE_ENUM(OmpLinearModifier, Value) + NODE(parser, OmpLoopRangeClause) NODE(parser, OmpStepComplexModifier) NODE(parser, OmpStepSimpleModifier) NODE(parser, OmpLoopDirective) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 61f97b855b0e5..d32db62db2628 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4367,6 +4367,15 @@ struct OmpLinearClause { std::tuple t; }; +// Ref: [6.0:207-208] +// +// loop-range-clause -> +// LOOPRANGE(first, count) // since 6.0 +struct OmpLoopRangeClause { + TUPLE_CLASS_BOILERPLATE(OmpLoopRangeClause); + std::tuple t; +}; + // Ref: [4.5:216-219], [5.0:315-324], [5.1:347-355], [5.2:150-158] // // map-clause -> diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index b599d69a36272..a38249bf2b588 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -997,6 +997,11 @@ Link make(const parser::OmpClause::Link &inp, return Link{/*List=*/makeObjects(inp.v, semaCtx)}; } +LoopRange make(const parser::OmpClause::Looprange &inp, + semantics::SemanticsContext &semaCtx) { + llvm_unreachable("Unimplemented: looprange"); +} + Map make(const parser::OmpClause::Map &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpMapClause diff --git a/flang/lib/Lower/OpenMP/Clauses.h b/flang/lib/Lower/OpenMP/Clauses.h index d7ab21d428e32..bda8571e65f23 100644 --- a/flang/lib/Lower/OpenMP/Clauses.h +++ b/flang/lib/Lower/OpenMP/Clauses.h @@ -239,6 +239,7 @@ using Initializer = tomp::clause::InitializerT; using InReduction = tomp::clause::InReductionT; using IsDevicePtr = tomp::clause::IsDevicePtrT; using Lastprivate = tomp::clause::LastprivateT; +using LoopRange = tomp::clause::LoopRangeT; using Linear = tomp::clause::LinearT; using Link = tomp::clause::LinkT; using Map = tomp::clause::MapT; diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index c55642d969503..d53389746dbec 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -847,6 +847,11 @@ TYPE_PARSER( maybe(":"_tok >> nonemptyList(Parser{})), /*PostModified=*/pure(true))) +TYPE_PARSER( + construct(scalarIntConstantExpr, + "," >> scalarIntConstantExpr) +) + // OpenMPv5.2 12.5.2 detach-clause -> DETACH (event-handle) TYPE_PARSER(construct(Parser{})) @@ -1020,6 +1025,8 @@ TYPE_PARSER( // parenthesized(Parser{}))) || "LINK" >> construct(construct( parenthesized(Parser{}))) || + "LOOPRANGE" >> construct(construct( + parenthesized(Parser{}))) || "MAP" >> construct(construct( parenthesized(Parser{}))) || "MATCH" >> construct(construct( diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index ed0f227fd5b98..18e8a63ca68aa 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2318,6 +2318,13 @@ class UnparseVisitor { } } } + void Unparse(const OmpLoopRangeClause &x) { + Word("LOOPRANGE("); + Walk(std::get<0>(x.t)); + Put(", "); + Walk(std::get<1>(x.t)); + Put(")"); + } void Unparse(const OmpReductionClause &x) { using Modifier = OmpReductionClause::Modifier; Walk(std::get>>(x.t), ": "); diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 68cea6739830d..2a03c6a1fd0e4 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -4446,6 +4446,15 @@ CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Collapse, OMPC_collapse) CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Safelen, OMPC_safelen) CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Simdlen, OMPC_simdlen) +void OmpStructureChecker::Enter(const parser::OmpClause::Looprange &x) { + context_.Say(GetContext().clauseSource, + "LOOPRANGE clause is not implemented yet"_err_en_US, + ContextDirectiveAsFortran()); +} + +void OmpStructureChecker::Enter(const parser::OmpClause::FreeAgent &x) { + context_.Say(GetContext().clauseSource, + "FREE_AGENT clause is not implemented yet"_err_en_US, // Restrictions specific to each clause are implemented apart from the // generalized restrictions. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 989b35a7caa2a..f8acdc62aba3d 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -276,6 +276,7 @@ def OMPC_Link : Clause<[Spelling<"link">]> { } def OMPC_LoopRange : Clause<[Spelling<"looprange">]> { let clangClass = "OMPLoopRangeClause"; + let flangClass = "OmpLoopRangeClause"; } def OMPC_Map : Clause<[Spelling<"map">]> { let clangClass = "OMPMapClause"; From b252aa910ef7c5c278a86bd7195bbf3bb18dd18d Mon Sep 17 00:00:00 2001 From: eZWALT Date: Wed, 21 May 2025 13:14:22 +0000 Subject: [PATCH 08/11] Address basic PR feedback --- clang/include/clang/AST/OpenMPClause.h | 93 ++++---- clang/include/clang/AST/StmtOpenMP.h | 2 +- clang/include/clang/Sema/SemaOpenMP.h | 14 +- clang/lib/AST/OpenMPClause.cpp | 17 +- clang/lib/CodeGen/CGExpr.cpp | 5 +- clang/lib/CodeGen/CodeGenFunction.h | 4 - clang/lib/Sema/SemaOpenMP.cpp | 224 +++++++++----------- flang/lib/Semantics/check-omp-structure.cpp | 3 - 8 files changed, 166 insertions(+), 196 deletions(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 8f937cdef9cd0..3df5133a17fb4 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1153,82 +1153,73 @@ class OMPFullClause final : public OMPNoChildClause { /// for(int j = 0; j < 256; j+=2) /// for(int k = 127; k >= 0; --k) /// \endcode -class OMPLoopRangeClause final : public OMPClause { +class OMPLoopRangeClause final + : public OMPClause, + private llvm::TrailingObjects { friend class OMPClauseReader; - - explicit OMPLoopRangeClause() - : OMPClause(llvm::omp::OMPC_looprange, {}, {}) {} + friend class llvm::TrailingObjects; /// Location of '(' SourceLocation LParenLoc; - /// Location of 'first' - SourceLocation FirstLoc; - - /// Location of 'count' - SourceLocation CountLoc; - - /// Expr associated with 'first' argument - Expr *First = nullptr; - - /// Expr associated with 'count' argument - Expr *Count = nullptr; - - /// Set 'first' - void setFirst(Expr *First) { this->First = First; } + /// Location of first and count expressions + SourceLocation FirstLoc, CountLoc; - /// Set 'count' - void setCount(Expr *Count) { this->Count = Count; } + /// Number of looprange arguments (always 2: first, count) + unsigned NumArgs = 2; - /// Set location of '('. - void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } - - /// Set location of 'first' argument - void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; } + /// Set the argument expressions. + void setArgs(ArrayRef Args) { + assert(Args.size() == NumArgs && "Expected exactly 2 looprange arguments"); + std::copy(Args.begin(), Args.end(), getTrailingObjects()); + } - /// Set location of 'count' argument - void setCountLoc(SourceLocation Loc) { CountLoc = Loc; } + /// Build an empty clause for deserialization. + explicit OMPLoopRangeClause() + : OMPClause(llvm::omp::OMPC_looprange, {}, {}), NumArgs(2) {} public: - /// Build an AST node for a 'looprange' clause - /// - /// \param StartLoc Starting location of the clause. - /// \param LParenLoc Location of '('. - /// \param ModifierLoc Modifier location. - /// \param + /// Build a 'looprange' clause AST node. static OMPLoopRangeClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation FirstLoc, SourceLocation CountLoc, - SourceLocation EndLoc, Expr *First, Expr *Count); + SourceLocation EndLoc, ArrayRef Args); - /// Build an empty 'looprange' node for deserialization - /// - /// \param C Context of the AST. + /// Build an empty 'looprange' clause node. static OMPLoopRangeClause *CreateEmpty(const ASTContext &C); - /// Returns the location of '(' + // Location getters/setters SourceLocation getLParenLoc() const { return LParenLoc; } - - /// Returns the location of 'first' SourceLocation getFirstLoc() const { return FirstLoc; } - - /// Returns the location of 'count' SourceLocation getCountLoc() const { return CountLoc; } - /// Returns the argument 'first' or nullptr if not set - Expr *getFirst() const { return cast_or_null(First); } + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; } + void setCountLoc(SourceLocation Loc) { CountLoc = Loc; } - /// Returns the argument 'count' or nullptr if not set - Expr *getCount() const { return cast_or_null(Count); } + /// Get looprange arguments: first and count + Expr *getFirst() const { return getArgs()[0]; } + Expr *getCount() const { return getArgs()[1]; } - child_range children() { - return child_range(reinterpret_cast(&First), - reinterpret_cast(&Count) + 1); + /// Set looprange arguments: first and count + void setFirst(Expr *E) { getArgs()[0] = E; } + void setCount(Expr *E) { getArgs()[1] = E; } + + MutableArrayRef getArgs() { + return MutableArrayRef(getTrailingObjects(), NumArgs); + } + ArrayRef getArgs() const { + return ArrayRef(getTrailingObjects(), NumArgs); } + child_range children() { + return child_range(reinterpret_cast(getArgs().begin()), + reinterpret_cast(getArgs().end())); + } const_child_range children() const { - auto Children = const_cast(this)->children(); - return const_child_range(Children.begin(), Children.end()); + auto AR = getArgs(); + return const_child_range(reinterpret_cast(AR.begin()), + reinterpret_cast(AR.end())); } child_range used_children() { diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index 6425f6616a558..0421c06245cac 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -5883,7 +5883,7 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective { EndLoc, NumLoops) { // Interchange produces a single top-level canonical loop // nest, with the exact same amount of total loops - setNumGeneratedLoops(NumLoops); + setNumGeneratedLoops(3 * NumLoops); setNumGeneratedLoopNests(1); } diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index 547ea95c6cd5d..f848c4a7d715e 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -1492,7 +1492,7 @@ class SemaOpenMP : public SemaBase { bool checkTransformableLoopNest( OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops, SmallVectorImpl &LoopHelpers, - Stmt *&Body, SmallVectorImpl> &OriginalInits); + Stmt *&Body, SmallVectorImpl> &OriginalInits); /// @brief Categories of loops encountered during semantic OpenMP loop /// analysis @@ -1555,9 +1555,9 @@ class SemaOpenMP : public SemaBase { Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops, SmallVectorImpl &LoopHelpers, SmallVectorImpl &ForStmts, - SmallVectorImpl> &OriginalInits, - SmallVectorImpl> &TransformsPreInits, - SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, SmallVectorImpl &LoopCategories, ASTContext &Context, OpenMPDirectiveKind Kind); @@ -1591,9 +1591,9 @@ class SemaOpenMP : public SemaBase { unsigned &NumLoops, SmallVectorImpl &LoopHelpers, SmallVectorImpl &ForStmts, - SmallVectorImpl> &OriginalInits, - SmallVectorImpl> &TransformsPreInits, - SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, SmallVectorImpl &LoopCategories, ASTContext &Context); /// Helper to keep information about the current `omp begin/end declare diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 0b5808eb100e4..e0570262b2a05 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1026,22 +1026,25 @@ OMPPartialClause *OMPPartialClause::CreateEmpty(const ASTContext &C) { OMPLoopRangeClause * OMPLoopRangeClause::Create(const ASTContext &C, SourceLocation StartLoc, - SourceLocation LParenLoc, SourceLocation EndLoc, - SourceLocation FirstLoc, SourceLocation CountLoc, - Expr *First, Expr *Count) { + SourceLocation LParenLoc, SourceLocation FirstLoc, + SourceLocation CountLoc, SourceLocation EndLoc, + ArrayRef Args) { + + assert(Args.size() == 2 && + "looprange clause must have exactly two arguments"); OMPLoopRangeClause *Clause = CreateEmpty(C); Clause->setLocStart(StartLoc); Clause->setLParenLoc(LParenLoc); - Clause->setLocEnd(EndLoc); Clause->setFirstLoc(FirstLoc); Clause->setCountLoc(CountLoc); - Clause->setFirst(First); - Clause->setCount(Count); + Clause->setLocEnd(EndLoc); + Clause->setArgs(Args); return Clause; } OMPLoopRangeClause *OMPLoopRangeClause::CreateEmpty(const ASTContext &C) { - return new (C) OMPLoopRangeClause(); + void *Mem = C.Allocate(totalSizeToAlloc(2)); + return new (Mem) OMPLoopRangeClause(); } OMPAllocateClause *OMPAllocateClause::Create( diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 08049d4d4e37d..f983b88eb61ec 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -3256,11 +3256,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { var, ConvertTypeForMem(VD->getType()), getContext().getDeclAlign(VD)); // No other cases for now. - } else { - llvm::dbgs() << "THE DAMN DECLREFEXPR HASN'T BEEN ENTERED IN LOCALDECLMAP\n"; - VD->dumpColor(); + } else llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?"); - } // Handle threadlocal function locals. if (VD->getTLSKind() != VarDecl::TLS_None) diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index bfe24213ed377..fe753e5b688b1 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -5363,10 +5363,6 @@ class CodeGenFunction : public CodeGenTypeCache { /// Set the address of a local variable. void setAddrOfLocalVar(const VarDecl *VD, Address Addr) { - if (LocalDeclMap.count(VD)) { - llvm::errs() << "Warning: VarDecl already exists in map: "; - VD->dumpColor(); - } assert(!LocalDeclMap.count(VD) && "Decl already exists in LocalDeclMap!"); LocalDeclMap.insert({VD, Addr}); } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 2985b256cf153..9819dcfe60360 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14196,38 +14196,37 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective( getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -// Overloaded base case function +/// Overloaded base case function template static bool tryHandleAs(T *t, F &&) { return false; } -/** - * Tries to recursively cast `t` to one of the given types and invokes `f` if - * successful. - * - * @tparam Class The first type to check. - * @tparam Rest The remaining types to check. - * @tparam T The base type of `t`. - * @tparam F The callable type for the function to invoke upon a successful - * cast. - * @param t The object to be checked. - * @param f The function to invoke if `t` matches `Class`. - * @return `true` if `t` matched any type and `f` was called, otherwise `false`. - */ +/// +/// Tries to recursively cast `t` to one of the given types and invokes `f` if +/// successful. +/// +/// @tparam Class The first type to check. +/// @tparam Rest The remaining types to check. +/// @tparam T The base type of `t`. +/// @tparam F The callable type for the function to invoke upon a successful +/// cast. +/// @param t The object to be checked. +/// @param f The function to invoke if `t` matches `Class`. +/// @return `true` if `t` matched any type and `f` was called, otherwise +/// `false`. template static bool tryHandleAs(T *t, F &&f) { if (Class *c = dyn_cast(t)) { f(c); return true; - } else { - return tryHandleAs(t, std::forward(f)); } + return tryHandleAs(t, std::forward(f)); } -// Updates OriginalInits by checking Transform against loop transformation -// directives and appending their pre-inits if a match is found. +/// Updates OriginalInits by checking Transform against loop transformation +/// directives and appending their pre-inits if a match is found. static void updatePreInits(OMPLoopBasedDirective *Transform, - SmallVectorImpl> &PreInits) { + SmallVectorImpl> &PreInits) { if (!tryHandleAs( Transform, [&PreInits](auto *Dir) { @@ -14239,7 +14238,7 @@ static void updatePreInits(OMPLoopBasedDirective *Transform, bool SemaOpenMP::checkTransformableLoopNest( OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops, SmallVectorImpl &LoopHelpers, - Stmt *&Body, SmallVectorImpl> &OriginalInits) { + Stmt *&Body, SmallVectorImpl> &OriginalInits) { OriginalInits.emplace_back(); bool Result = OMPLoopBasedDirective::doForAllLoops( AStmt->IgnoreContainers(), /*TryImperfectlyNestedLoops=*/false, NumLoops, @@ -14273,40 +14272,40 @@ bool SemaOpenMP::checkTransformableLoopNest( return Result; } -// Counts the total number of nested loops, including the outermost loop (the -// original loop). PRECONDITION of this visitor is that it must be invoked from -// the original loop to be analyzed. The traversal is stop for Decl's and -// Expr's given that they may contain inner loops that must not be counted. -// -// Example AST structure for the code: -// -// int main() { -// #pragma omp fuse -// { -// for (int i = 0; i < 100; i++) { <-- Outer loop -// []() { -// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP -// }; -// for(int j = 0; j < 5; ++j) {} <-- Inner loop -// } -// for (int r = 0; i < 100; i++) { <-- Outer loop -// struct LocalClass { -// void bar() { -// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP -// } -// }; -// for(int k = 0; k < 10; ++k) {} <-- Inner loop -// {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP -// } -// } -// } -// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops +/// Counts the total number of nested loops, including the outermost loop (the +/// original loop). PRECONDITION of this visitor is that it must be invoked from +/// the original loop to be analyzed. The traversal is stop for Decl's and +/// Expr's given that they may contain inner loops that must not be counted. +/// +/// Example AST structure for the code: +/// +/// int main() { +/// #pragma omp fuse +/// { +/// for (int i = 0; i < 100; i++) { <-- Outer loop +/// []() { +/// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP +/// }; +/// for(int j = 0; j < 5; ++j) {} <-- Inner loop +/// } +/// for (int r = 0; i < 100; i++) { <-- Outer loop +/// struct LocalClass { +/// void bar() { +/// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP +/// } +/// }; +/// for(int k = 0; k < 10; ++k) {} <-- Inner loop +/// {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP +/// } +/// } +/// } +/// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor { private: unsigned NestedLoopCount = 0; public: - explicit NestedLoopCounterVisitor() {} + explicit NestedLoopCounterVisitor() = default; unsigned getNestedLoopCount() const { return NestedLoopCount; } @@ -14333,7 +14332,7 @@ class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor { return true; // Only recurse into CompoundStmt (block {}) and loop bodies - if (isa(S) || isa(S) || isa(S)) { + if (isa(S)) { return DynamicRecursiveASTVisitor::TraverseStmt(S); } @@ -14354,19 +14353,18 @@ bool SemaOpenMP::analyzeLoopSequence( Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops, SmallVectorImpl &LoopHelpers, SmallVectorImpl &ForStmts, - SmallVectorImpl> &OriginalInits, - SmallVectorImpl> &TransformsPreInits, - SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, SmallVectorImpl &LoopCategories, ASTContext &Context, OpenMPDirectiveKind Kind) { VarsWithInheritedDSAType TmpDSA; QualType BaseInductionVarType; - // Helper Lambda to handle storing initialization and body statements for both - // ForStmt and CXXForRangeStmt and checks for any possible mismatch between - // induction variables types - auto storeLoopStatements = [&OriginalInits, &ForStmts, &BaseInductionVarType, - this, &Context](Stmt *LoopStmt) { + /// Helper Lambda to handle storing initialization and body statements for + /// both ForStmt and CXXForRangeStmt and checks for any possible mismatch + /// between induction variables types + auto StoreLoopStatements = [&](Stmt *LoopStmt) { if (auto *For = dyn_cast(LoopStmt)) { OriginalInits.back().push_back(For->getInit()); ForStmts.push_back(For); @@ -14394,16 +14392,11 @@ bool SemaOpenMP::analyzeLoopSequence( } }; - // Helper lambda functions to encapsulate the processing of different - // derivations of the canonical loop sequence grammar - // - // Modularized code for handling loop generation and transformations - auto analyzeLoopGeneration = [&storeLoopStatements, &LoopHelpers, - &OriginalInits, &TransformsPreInits, - &LoopCategories, &LoopSeqSize, &NumLoops, Kind, - &TmpDSA, &ForStmts, &Context, - &LoopSequencePreInits, this](Stmt *Child) { - auto LoopTransform = dyn_cast(Child); + /// Helper lambda functions to encapsulate the processing of different + /// derivations of the canonical loop sequence grammar + /// Modularized code for handling loop generation and transformations + auto AnalyzeLoopGeneration = [&](Stmt *Child) { + auto *LoopTransform = dyn_cast(Child); Stmt *TransformedStmt = LoopTransform->getTransformedStmt(); unsigned NumGeneratedLoopNests = LoopTransform->getNumGeneratedLoopNests(); unsigned NumGeneratedLoops = LoopTransform->getNumGeneratedLoops(); @@ -14414,9 +14407,8 @@ bool SemaOpenMP::analyzeLoopSequence( LoopSeqSize += NumGeneratedLoopNests; NumLoops += NumGeneratedLoops; return true; - } - // Unroll full (0 loops produced) - else { + } else { + // Unroll full (0 loops produced) Diag(Child->getBeginLoc(), diag::err_omp_not_for) << 0 << getOpenMPDirectiveName(Kind); return false; @@ -14443,9 +14435,8 @@ bool SemaOpenMP::analyzeLoopSequence( LoopHelpers, ForStmts, OriginalInits, TransformsPreInits, LoopSequencePreInits, LoopCategories, Context, Kind); - } - // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all) - else { + } else { + // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all) // Process the transformed loop statement OriginalInits.emplace_back(); TransformsPreInits.emplace_back(); @@ -14461,7 +14452,7 @@ bool SemaOpenMP::analyzeLoopSequence( << getOpenMPDirectiveName(Kind); return false; } - storeLoopStatements(TransformedStmt); + StoreLoopStatements(TransformedStmt); updatePreInits(LoopTransform, TransformsPreInits); NumLoops += NumGeneratedLoops; @@ -14470,10 +14461,8 @@ bool SemaOpenMP::analyzeLoopSequence( } }; - // Modularized code for handling regular canonical loops - auto analyzeRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits, - &LoopSeqSize, &NumLoops, Kind, &TmpDSA, - &LoopCategories, this](Stmt *Child) { + /// Modularized code for handling regular canonical loops + auto AnalyzeRegularLoop = [&](Stmt *Child) { OriginalInits.emplace_back(); LoopHelpers.emplace_back(); LoopCategories.push_back(OMPLoopCategory::RegularLoop); @@ -14488,19 +14477,19 @@ bool SemaOpenMP::analyzeLoopSequence( return false; } - storeLoopStatements(Child); + StoreLoopStatements(Child); auto NLCV = NestedLoopCounterVisitor(); NLCV.TraverseStmt(Child); NumLoops += NLCV.getNestedLoopCount(); return true; }; - // Helper functions to validate canonical loop sequence grammar is valid - auto isLoopSequenceDerivation = [](auto *Child) { - return isa(Child) || isa(Child) || - isa(Child); + /// Helper functions to validate loop sequence grammar derivations + auto IsLoopSequenceDerivation = [](auto *Child) { + return isa(Child); }; - auto isLoopGeneratingStmt = [](auto *Child) { + /// Helper functions to validate loop generating grammar derivations + auto IsLoopGeneratingStmt = [](auto *Child) { return isa(Child); }; @@ -14511,7 +14500,7 @@ bool SemaOpenMP::analyzeLoopSequence( continue; // Skip over non-loop-sequence statements - if (!isLoopSequenceDerivation(Child)) { + if (!IsLoopSequenceDerivation(Child)) { Child = Child->IgnoreContainers(); // Ignore empty compound statement @@ -14531,17 +14520,17 @@ bool SemaOpenMP::analyzeLoopSequence( } } // Regular loop sequence handling - if (isLoopSequenceDerivation(Child)) { - if (isLoopGeneratingStmt(Child)) { - if (!analyzeLoopGeneration(Child)) { + if (IsLoopSequenceDerivation(Child)) { + if (IsLoopGeneratingStmt(Child)) { + if (!AnalyzeLoopGeneration(Child)) return false; - } - // analyzeLoopGeneration updates Loop Sequence size accordingly + + // AnalyzeLoopGeneration updates Loop Sequence size accordingly } else { - if (!analyzeRegularLoop(Child)) { + if (!AnalyzeRegularLoop(Child)) return false; - } + // Update the Loop Sequence size by one ++LoopSeqSize; } @@ -14560,9 +14549,9 @@ bool SemaOpenMP::checkTransformableLoopSequence( unsigned &NumLoops, SmallVectorImpl &LoopHelpers, SmallVectorImpl &ForStmts, - SmallVectorImpl> &OriginalInits, - SmallVectorImpl> &TransformsPreInits, - SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, SmallVectorImpl &LoopCategories, ASTContext &Context) { // Checks whether the given statement is a compound statement @@ -14598,10 +14587,9 @@ bool SemaOpenMP::checkTransformableLoopSequence( // Recursive entry point to process the main loop sequence if (!analyzeLoopSequence(AStmt, LoopSeqSize, NumLoops, LoopHelpers, ForStmts, OriginalInits, TransformsPreInits, - LoopSequencePreInits, LoopCategories, Context, - Kind)) { + LoopSequencePreInits, LoopCategories, Context, Kind)) return false; - } + if (LoopSeqSize <= 0) { Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence) << getOpenMPDirectiveName(Kind); @@ -14693,7 +14681,7 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef Clauses, // Verify and diagnose loop nest. SmallVector LoopHelpers(NumLoops); Stmt *Body = nullptr; - SmallVector, 4> OriginalInits; + SmallVector, 4> OriginalInits; if (!checkTransformableLoopNest(OMPD_tile, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -14970,7 +14958,7 @@ StmtResult SemaOpenMP::ActOnOpenMPStripeDirective(ArrayRef Clauses, // Verify and diagnose loop nest. SmallVector LoopHelpers(NumLoops); Stmt *Body = nullptr; - SmallVector, 4> OriginalInits; + SmallVector, 4> OriginalInits; if (!checkTransformableLoopNest(OMPD_stripe, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15231,7 +15219,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, Stmt *Body = nullptr; SmallVector LoopHelpers( NumLoops); - SmallVector, NumLoops + 1> OriginalInits; + SmallVector, NumLoops + 1> OriginalInits; if (!checkTransformableLoopNest(OMPD_unroll, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15499,7 +15487,7 @@ StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt, Stmt *Body = nullptr; SmallVector LoopHelpers( NumLoops); - SmallVector, NumLoops + 1> OriginalInits; + SmallVector, NumLoops + 1> OriginalInits; if (!checkTransformableLoopNest(OMPD_reverse, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15691,7 +15679,7 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( // Verify and diagnose loop nest. SmallVector LoopHelpers(NumLoops); Stmt *Body = nullptr; - SmallVector, 2> OriginalInits; + SmallVector, 2> OriginalInits; if (!checkTransformableLoopNest(OMPD_interchange, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15878,9 +15866,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, CaptureVars CopyTransformer(SemaRef); // Ensure the structured block is not empty - if (!AStmt) { + if (!AStmt) return StmtError(); - } unsigned NumLoops = 1; unsigned LoopSeqSize = 1; @@ -15899,16 +15886,15 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops SmallVector LoopHelpers; SmallVector LoopStmts; - SmallVector> OriginalInits; - SmallVector> TransformsPreInits; - SmallVector> LoopSequencePreInits; + SmallVector> OriginalInits; + SmallVector> TransformsPreInits; + SmallVector> LoopSequencePreInits; SmallVector LoopCategories; if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops, LoopHelpers, LoopStmts, OriginalInits, TransformsPreInits, LoopSequencePreInits, - LoopCategories, Context)) { + LoopCategories, Context)) return StmtError(); - } // Handle clauses, which can be any of the following: [looprange, apply] const OMPLoopRangeClause *LRC = @@ -15998,9 +15984,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // expressions. Generates both the variable declaration and the corresponding // initialization statement. auto CreateHelperVarAndStmt = - [&SemaRef = this->SemaRef, &Context, &CopyTransformer, - &IVType](Expr *ExprToCopy, const std::string &BaseName, unsigned I, - bool NeedsNewVD = false) { + [&, &SemaRef = SemaRef](Expr *ExprToCopy, const std::string &BaseName, + unsigned I, bool NeedsNewVD = false) { Expr *TransformedExpr = AssertSuccess(CopyTransformer.TransformExpr(ExprToCopy)); if (!TransformedExpr) @@ -16044,9 +16029,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // Transformations that apply this concept: Loopranged Fuse, Split if (!LoopSequencePreInits.empty()) { for (const auto <PreInits : LoopSequencePreInits) { - if (!LTPreInits.empty()) { + if (!LTPreInits.empty()) llvm::append_range(PreInits, LTPreInits); - } } } @@ -16075,9 +16059,9 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // Order matters: pre-inits may define variables used in the original // inits such as upper bounds... auto TransformPreInit = TransformsPreInits[TransformIndex++]; - if (!TransformPreInit.empty()) { + if (!TransformPreInit.empty()) llvm::append_range(PreInits, TransformPreInit); - } + addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], PreInits); } @@ -17496,13 +17480,15 @@ OMPClause *SemaOpenMP::ActOnOpenMPLoopRangeClause( if (CountVal.isInvalid()) Count = nullptr; + SmallVector ArgsVec = {First, Count}; + // OpenMP [6.0, Restrictions] // first + count - 1 must not evaluate to a value greater than the // loop sequence length of the associated canonical loop sequence. // This check must be performed afterwards due to the delayed // parsing and computation of the associated loop sequence return OMPLoopRangeClause::Create(getASTContext(), StartLoc, LParenLoc, - FirstLoc, CountLoc, EndLoc, First, Count); + FirstLoc, CountLoc, EndLoc, ArgsVec); } OMPClause *SemaOpenMP::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc, diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 2a03c6a1fd0e4..ac4883c4f2a18 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -4452,9 +4452,6 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Looprange &x) { ContextDirectiveAsFortran()); } -void OmpStructureChecker::Enter(const parser::OmpClause::FreeAgent &x) { - context_.Say(GetContext().clauseSource, - "FREE_AGENT clause is not implemented yet"_err_en_US, // Restrictions specific to each clause are implemented apart from the // generalized restrictions. From e294777879dd46c1859a03c307e70dd03abe11b7 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Thu, 22 May 2025 10:39:39 +0000 Subject: [PATCH 09/11] Removed unncessary warning and updated tests accordingly --- .../clang/Basic/DiagnosticSemaKinds.td | 3 -- clang/lib/Sema/SemaOpenMP.cpp | 21 +-------- clang/test/OpenMP/fuse_messages.cpp | 43 +++++++++++++++---- 3 files changed, 35 insertions(+), 32 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index e85cd32d78b5c..2bd0f895204c9 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11612,9 +11612,6 @@ def note_omp_implicit_dsa : Note< "implicitly determined as %0">; def err_omp_loop_var_dsa : Error< "loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">; -def warn_omp_different_loop_ind_var_types : Warning < - "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">, - InGroup; def err_omp_not_canonical_loop : Error < "loop after '#pragma omp %0' is not in canonical form">; def err_omp_not_a_loop_sequence : Error < diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 9819dcfe60360..5f36d968c68fa 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14360,31 +14360,12 @@ bool SemaOpenMP::analyzeLoopSequence( OpenMPDirectiveKind Kind) { VarsWithInheritedDSAType TmpDSA; - QualType BaseInductionVarType; /// Helper Lambda to handle storing initialization and body statements for - /// both ForStmt and CXXForRangeStmt and checks for any possible mismatch - /// between induction variables types + /// both ForStmt and CXXForRangeStmt auto StoreLoopStatements = [&](Stmt *LoopStmt) { if (auto *For = dyn_cast(LoopStmt)) { OriginalInits.back().push_back(For->getInit()); ForStmts.push_back(For); - // Extract induction variable - if (auto *InitStmt = dyn_cast_or_null(For->getInit())) { - if (auto *InitDecl = dyn_cast(InitStmt->getSingleDecl())) { - QualType InductionVarType = InitDecl->getType().getCanonicalType(); - - // Compare with first loop type - if (BaseInductionVarType.isNull()) { - BaseInductionVarType = InductionVarType; - } else if (!Context.hasSameType(BaseInductionVarType, - InductionVarType)) { - Diag(InitDecl->getBeginLoc(), - diag::warn_omp_different_loop_ind_var_types) - << getOpenMPDirectiveName(OMPD_fuse) << BaseInductionVarType - << InductionVarType; - } - } - } } else { auto *CXXFor = cast(LoopStmt); OriginalInits.back().push_back(CXXFor->getBeginStmt()); diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp index 2a2491d008a0b..4902d424373e5 100644 --- a/clang/test/OpenMP/fuse_messages.cpp +++ b/clang/test/OpenMP/fuse_messages.cpp @@ -70,15 +70,6 @@ void func() { for(int j = 0; j < 10; ++j); } - //expected-warning@+5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'unsigned int'}} - //expected-warning@+5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'long long'}} - #pragma omp fuse - { - for(int i = 0; i < 10; ++i); - for(unsigned int j = 0; j < 10; ++j); - for(long long k = 0; k < 100; ++k); - } - //expected-warning@+2 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} #pragma omp fuse { @@ -123,6 +114,40 @@ void func() { for(int j = 0; j < 100; ++j); for(int k = 0; k < 50; ++k); } + + //expected-error@+1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '6' is greater than the total number of loops '5'}} + #pragma omp fuse looprange(1,6) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + // This fusion results in 2 loops + #pragma omp fuse looprange(1,2) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + } + + //expected-error@+1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '4' is greater than the total number of loops '3'}} + #pragma omp fuse looprange(2,3) + { + #pragma omp unroll partial(2) + for(int i = 0; i < 10; ++i); + + #pragma omp reverse + for(int j = 0; j < 10; ++j); + + #pragma omp fuse + { + { + #pragma omp reverse + for(int j = 0; j < 10; ++j); + } + for(int k = 0; k < 50; ++k); + } + } } // In a template context, but expression itself not instantiation-dependent From 1c8f0fe23fdd78de1512505f128fa9e8bff655f9 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 20 Jun 2025 14:17:29 +0000 Subject: [PATCH 10/11] Address formatting issues --- clang/include/clang/Parse/Parser.h | 2 +- clang/lib/AST/StmtOpenMP.cpp | 10 +++++----- clang/lib/Parse/ParseOpenMP.cpp | 2 +- flang/lib/Lower/OpenMP/Clauses.cpp | 2 +- flang/lib/Parser/openmp-parsers.cpp | 8 +++----- 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 08bee0078b5ff..9364007f3cf41 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -6737,7 +6737,7 @@ class Parser : public CodeCompletionHandler { /// Parses the 'looprange' clause of a '#pragma omp fuse' directive. OMPClause *ParseOpenMPLoopRangeClause(); - + /// Parses the 'sizes' clause of a '#pragma omp tile' directive. OMPClause *ParseOpenMPSizesClause(); diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index f527e6361b5e5..1f49e9f2a0640 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -522,15 +522,15 @@ OMPFuseDirective *OMPFuseDirective::Create( NumLoops); Dir->setTransformedStmt(TransformedStmt); Dir->setPreInits(PreInits); - // The number of top level canonical nests could + // The number of top level canonical nests could // not match the total number of generated loops // Example: // Before fusion: - // for (int i = 0; i < N; ++i) - // for (int j = 0; j < M; ++j) + // for (int i = 0; i < N; ++i) + // for (int j = 0; j < M; ++j) // A[i][j] = i + j; - // - // for (int k = 0; k < P; ++k) + // + // for (int k = 0; k < P; ++k) // B[k] = k * 2; // Here, NumLoopNests = 2, but NumLoops = 3. Dir->setNumGeneratedLoopNests(NumLoopNests); diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 2d6d624c1ecc8..48d9c184131cd 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3520,7 +3520,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, break; case OMPC_looprange: Clause = ParseOpenMPLoopRangeClause(); - break; + break; default: break; } diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index a38249bf2b588..c94d56cb57756 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -998,7 +998,7 @@ Link make(const parser::OmpClause::Link &inp, } LoopRange make(const parser::OmpClause::Looprange &inp, - semantics::SemanticsContext &semaCtx) { + semantics::SemanticsContext &semaCtx) { llvm_unreachable("Unimplemented: looprange"); } diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index d53389746dbec..39978e402e63b 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -847,10 +847,8 @@ TYPE_PARSER( maybe(":"_tok >> nonemptyList(Parser{})), /*PostModified=*/pure(true))) -TYPE_PARSER( - construct(scalarIntConstantExpr, - "," >> scalarIntConstantExpr) -) +TYPE_PARSER(construct( + scalarIntConstantExpr, "," >> scalarIntConstantExpr)) // OpenMPv5.2 12.5.2 detach-clause -> DETACH (event-handle) TYPE_PARSER(construct(Parser{})) @@ -1026,7 +1024,7 @@ TYPE_PARSER( // "LINK" >> construct(construct( parenthesized(Parser{}))) || "LOOPRANGE" >> construct(construct( - parenthesized(Parser{}))) || + parenthesized(Parser{}))) || "MAP" >> construct(construct( parenthesized(Parser{}))) || "MATCH" >> construct(construct( From 009d8630c7ff97dedc543df04d0b18ab4579a503 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 20 Jun 2025 14:44:31 +0000 Subject: [PATCH 11/11] Address minor feedback part 2 --- clang/include/clang/AST/OpenMPClause.h | 8 ++++++-- clang/include/clang/AST/StmtOpenMP.h | 1 + clang/include/clang/Basic/DiagnosticSemaKinds.td | 10 +++++----- clang/lib/Sema/SemaOpenMP.cpp | 15 +++++---------- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 3df5133a17fb4..478c41322f34a 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1197,12 +1197,16 @@ class OMPLoopRangeClause final void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; } void setCountLoc(SourceLocation Loc) { CountLoc = Loc; } - /// Get looprange arguments: first and count + /// Get looprange 'first' expression Expr *getFirst() const { return getArgs()[0]; } + + /// Get looprange 'count' expression Expr *getCount() const { return getArgs()[1]; } - /// Set looprange arguments: first and count + /// Set looprange 'first' expression void setFirst(Expr *E) { getArgs()[0] = E; } + + /// Set looprange 'count' expression void setCount(Expr *E) { getArgs()[1] = E; } MutableArrayRef getArgs() { diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index 0421c06245cac..5ec3677fc7507 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -976,6 +976,7 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { /// Set the number of loops generated by this loop transformation. void setNumGeneratedLoops(unsigned Num) { NumGeneratedLoops = Num; } + /// Set the number of top level canonical loop nests generated by this loop /// transformation void setNumGeneratedLoopNests(unsigned Num) { NumGeneratedLoopNests = Num; } diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 2bd0f895204c9..d807b6b076724 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11612,16 +11612,16 @@ def note_omp_implicit_dsa : Note< "implicitly determined as %0">; def err_omp_loop_var_dsa : Error< "loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">; -def err_omp_not_canonical_loop : Error < +def err_omp_not_canonical_loop : Error< "loop after '#pragma omp %0' is not in canonical form">; -def err_omp_not_a_loop_sequence : Error < +def err_omp_not_a_loop_sequence : Error< "statement after '#pragma omp %0' must be a loop sequence containing canonical loops or loop-generating constructs">; -def err_omp_empty_loop_sequence : Error < +def err_omp_empty_loop_sequence : Error< "loop sequence after '#pragma omp %0' must contain at least 1 canonical loop or loop-generating construct">; -def err_omp_invalid_looprange : Error < +def err_omp_invalid_looprange : Error< "loop range in '#pragma omp %0' exceeds the number of available loops: " "range end '%1' is greater than the total number of loops '%2'">; -def warn_omp_redundant_fusion : Warning < +def warn_omp_redundant_fusion : Warning< "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">, InGroup; def err_omp_not_for : Error< diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 5f36d968c68fa..8aa21c5c01220 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -22,7 +22,6 @@ #include "clang/AST/DeclOpenMP.h" #include "clang/AST/DynamicRecursiveASTVisitor.h" #include "clang/AST/OpenMPClause.h" -#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/StmtVisitor.h" @@ -48,7 +47,6 @@ #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/IR/Assumptions.h" #include -#include using namespace clang; using namespace llvm::omp; @@ -14201,7 +14199,6 @@ template static bool tryHandleAs(T *t, F &&) { return false; } -/// /// Tries to recursively cast `t` to one of the given types and invokes `f` if /// successful. /// @@ -14274,7 +14271,7 @@ bool SemaOpenMP::checkTransformableLoopNest( /// Counts the total number of nested loops, including the outermost loop (the /// original loop). PRECONDITION of this visitor is that it must be invoked from -/// the original loop to be analyzed. The traversal is stop for Decl's and +/// the original loop to be analyzed. The traversal stops for Decl's and /// Expr's given that they may contain inner loops that must not be counted. /// /// Example AST structure for the code: @@ -15945,7 +15942,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // Select the type with the largest bit width among all induction variables QualType IVType = LoopHelpers[FirstVal - 1].IterationVarRef->getType(); - for (unsigned int I = FirstVal; I < LastVal; ++I) { + for (unsigned I = FirstVal; I < LastVal; ++I) { QualType CurrentIVType = LoopHelpers[I].IterationVarRef->getType(); if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) { IVType = CurrentIVType; @@ -16054,9 +16051,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, auto [IVVD, IVDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", J); - if (!LBVD || !STVD || !NIVD || !IVVD) - assert(LBVD && STVD && NIVD && IVVD && - "OpenMP Fuse Helper variables creation failed"); + assert(LBVD && STVD && NIVD && IVVD && + "OpenMP Fuse Helper variables creation failed"); UBVarDecls.push_back(UBVD); LBVarDecls.push_back(LBVD); @@ -16097,11 +16093,10 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // original.indexk = ivk // body(k); Expr *InitVal = IntegerLiteral::Create(Context, // llvm::APInt(IVWidth, 0), - // } // 1. Create the initialized fuse index - const std::string IndexName = Twine(".omp.fuse.index").str(); + StringRef IndexName = ".omp.fuse.index"; Expr *InitVal = IntegerLiteral::Create(Context, llvm::APInt(IVBitWidth, 0), IVType, SourceLocation()); VarDecl *IndexDecl =