From e3c0558e353d14203c279a0b335219606ca70406 Mon Sep 17 00:00:00 2001 From: Tobias Grosser Date: Sat, 15 Nov 2014 21:32:53 +0000 Subject: [PATCH] Add OpenMP code generation to isl backend This backend supports besides the classical code generation the upcoming SCEV based code generation (which the existing CLooG backend does not support robustly). OpenMP code generation in the isl backend benefits from our run-time alias checks such that the set of loops that can possibly be parallelized is a lot larger. The code was tested on LNT. We do not regress on builds without -polly-parallel. When using -polly-parallel most tests work flawlessly, but a few issues still remain and will be addressed in follow up commits. SCEV/non-SCEV codegen: - Compile time failure in ldecod and TimberWolfMC due a problem in our run-time alias check generation triggered by pointers that escape through the OpenMP subfunction (OpenMP specific). - Several execution time failures. Due to the larger set of loops that we now parallelize (compared to the classical code generation), we currently run into some timeouts in tests with a lot loops that have a low trip count and are slowed down by parallelizing them. SCEV only: - One existing failure in lencod due to llvm.org/PR21204 (not OpenMP specific) OpenMP code generation is the last feature that was only available in the CLooG backend. With the isl backend being the only one supporting features such as run-time alias checks and delinearization, we will soon switch to use the isl ast generator by the default and subsequently remove our dependency on CLooG. http://reviews.llvm.org/D5517 llvm-svn: 222088 --- polly/include/polly/CodeGen/IslExprBuilder.h | 8 +- polly/include/polly/Support/SCEVValidator.h | 16 + polly/lib/CodeGen/IslCodeGeneration.cpp | 336 +++++++++++++++++- polly/lib/Support/SCEVValidator.cpp | 42 +++ ...escaping-pointers-invalidate-alias-info.ll | 55 +++ .../OpenMP/loop-body-references-outer-iv.ll | 44 +++ .../loop-body-references-outer-values-2.ll | 36 ++ .../loop-body-references-outer-values-3.ll | 65 ++++ .../loop-body-references-outer-values.ll | 48 +++ .../OpenMP/loop-bounds-reference-outer-ids.ll | 102 ++++++ .../Isl/CodeGen/OpenMP/reference-other-bb.ll | 27 ++ .../OpenMP/reference-preceeding-loop.ll | 47 +++ polly/test/Isl/CodeGen/OpenMP/single_loop.ll | 118 ++++++ ...single_loop_with_loop_invariant_baseptr.ll | 50 +++ ...ingle_parallel_loop___%for.i---%exit.jscop | 17 + ...o-parallel-loops-reference-outer-indvar.ll | 47 +++ 16 files changed, 1047 insertions(+), 11 deletions(-) create mode 100644 polly/test/Isl/CodeGen/OpenMP/escaping-pointers-invalidate-alias-info.ll create mode 100644 polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-iv.ll create mode 100644 polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-2.ll create mode 100644 polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll create mode 100644 polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values.ll create mode 100644 polly/test/Isl/CodeGen/OpenMP/loop-bounds-reference-outer-ids.ll create mode 100644 polly/test/Isl/CodeGen/OpenMP/reference-other-bb.ll create mode 100644 polly/test/Isl/CodeGen/OpenMP/reference-preceeding-loop.ll create mode 100644 polly/test/Isl/CodeGen/OpenMP/single_loop.ll create mode 100644 polly/test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll create mode 100644 polly/test/Isl/CodeGen/OpenMP/single_parallel_loop___%for.i---%exit.jscop create mode 100644 polly/test/Isl/CodeGen/OpenMP/two-parallel-loops-reference-outer-indvar.ll diff --git a/polly/include/polly/CodeGen/IslExprBuilder.h b/polly/include/polly/CodeGen/IslExprBuilder.h index eec45dd2cd42f8..74b1a099fab9aa 100644 --- a/polly/include/polly/CodeGen/IslExprBuilder.h +++ b/polly/include/polly/CodeGen/IslExprBuilder.h @@ -14,9 +14,9 @@ #include "polly/CodeGen/IRBuilder.h" -#include "isl/ast.h" +#include "llvm/ADT/MapVector.h" -#include +#include "isl/ast.h" namespace llvm { class SCEVExpander; @@ -81,7 +81,7 @@ namespace polly { class IslExprBuilder { public: /// @brief A map from isl_ids to llvm::Values. - typedef std::map IDToValueTy; + typedef llvm::MapVector IDToValueTy; /// @brief Construct an IslExprBuilder. /// @@ -125,7 +125,7 @@ class IslExprBuilder { private: PollyIRBuilder &Builder; - std::map &IDToValue; + IDToValueTy &IDToValue; /// @brief A SCEVExpander to translate dimension sizes to llvm values. llvm::SCEVExpander &Expander; diff --git a/polly/include/polly/Support/SCEVValidator.h b/polly/include/polly/Support/SCEVValidator.h index e787d35d279bf1..17294cef84a84a 100644 --- a/polly/include/polly/Support/SCEVValidator.h +++ b/polly/include/polly/Support/SCEVValidator.h @@ -12,6 +12,7 @@ #ifndef POLLY_SCEV_VALIDATOR_H #define POLLY_SCEV_VALIDATOR_H +#include "llvm/ADT/SetVector.h" #include namespace llvm { @@ -19,9 +20,24 @@ class Region; class SCEV; class ScalarEvolution; class Value; +class Loop; } namespace polly { +/// @brief Find the loops referenced from a SCEV expression. +/// +/// @param Expr The SCEV expression to scan for loops. +/// @param Loops A vector into which the found loops are inserted. +void findLoops(const llvm::SCEV *Expr, + llvm::SetVector &Loops); + +/// @brief Find the values referenced by SCEVUnknowns in a given SCEV +/// expression. +/// +/// @param Expr The SCEV expression to scan for SCEVUnknowns. +/// @param Expr A vector into which the found values are inserted. +void findValues(const llvm::SCEV *Expr, llvm::SetVector &Values); + /// Returns true when the SCEV contains references to instructions within the /// region. /// diff --git a/polly/lib/CodeGen/IslCodeGeneration.cpp b/polly/lib/CodeGen/IslCodeGeneration.cpp index bb1991511b350e..5308da3c921ec9 100644 --- a/polly/lib/CodeGen/IslCodeGeneration.cpp +++ b/polly/lib/CodeGen/IslCodeGeneration.cpp @@ -30,7 +30,11 @@ #include "polly/ScopInfo.h" #include "polly/Support/GICHelper.h" #include "polly/Support/ScopHelper.h" +#include "polly/Support/SCEVValidator.h" #include "polly/TempScopInfo.h" + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" @@ -56,11 +60,12 @@ using namespace llvm; class IslNodeBuilder { public: IslNodeBuilder(PollyIRBuilder &Builder, ScopAnnotator &Annotator, Pass *P, - LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT) - : Builder(Builder), Annotator(Annotator), + const DataLayout &DL, LoopInfo &LI, ScalarEvolution &SE, + DominatorTree &DT, Scop &S) + : S(S), Builder(Builder), Annotator(Annotator), Rewriter(new SCEVExpander(SE, "polly")), - ExprBuilder(Builder, IDToValue, *Rewriter), P(P), LI(LI), SE(SE), - DT(DT) {} + ExprBuilder(Builder, IDToValue, *Rewriter), P(P), DL(DL), LI(LI), + SE(SE), DT(DT) {} ~IslNodeBuilder() { delete Rewriter; } @@ -69,6 +74,7 @@ class IslNodeBuilder { IslExprBuilder &getExprBuilder() { return ExprBuilder; } private: + Scop &S; PollyIRBuilder &Builder; ScopAnnotator &Annotator; @@ -77,10 +83,17 @@ class IslNodeBuilder { IslExprBuilder ExprBuilder; Pass *P; + const DataLayout &DL; LoopInfo &LI; ScalarEvolution &SE; DominatorTree &DT; + /// @brief The current iteration of out-of-scop loops + /// + /// This map provides for a given loop a llvm::Value that contains the current + /// loop iteration. + LoopToScevMapT OutsideLoopIterations; + // This maps an isl_id* to the Value* it has in the generated program. For now // on, the only isl_ids that are stored here are the newly calculated loop // ivs. @@ -95,6 +108,12 @@ class IslNodeBuilder { /// @param Expr The expression to code generate. Value *generateSCEV(const SCEV *Expr); + /// A set of Value -> Value remappings to apply when generating new code. + /// + /// When generating new code for a ScopStmt this map is used to map certain + /// llvm::Values to new llvm::Values. + ValueMapT ValueMap; + // Extract the upper bound of this loop // // The isl code generation can generate arbitrary expressions to check if the @@ -119,10 +138,49 @@ class IslNodeBuilder { unsigned getNumberOfIterations(__isl_keep isl_ast_node *For); + /// Compute the values and loops referenced in this subtree. + /// + /// This function looks at all ScopStmts scheduled below the provided For node + /// and finds the llvm::Value[s] and llvm::Loops[s] which are referenced but + /// not locally defined. + /// + /// Values that can be synthesized or that are available as globals are + /// considered locally defined. + /// + /// Loops that contain the scop or that are part of the scop are considered + /// locally defined. Loops that are before the scop, but do not contain the + /// scop itself are considered not locally defined. + /// + /// @param For The node defining the subtree. + /// @param Values A vector that will be filled with the Values referenced in + /// this subtree. + /// @param Loops A vector that will be filled with the Loops referenced in + /// this subtree. + void getReferencesInSubtree(__isl_keep isl_ast_node *For, + SetVector &Values, + SetVector &Loops); + + /// Change the llvm::Value(s) used for code generation. + /// + /// When generating code certain values (e.g., references to induction + /// variables or array base pointers) in the original code may be replaced by + /// new values. This function allows to (partially) update the set of values + /// used. A typical use case for this function is the case when we continue + /// code generation in a subfunction/kernel function and need to explicitly + /// pass down certain values. + /// + /// @param NewValues A map that maps certain llvm::Values to new llvm::Values. + void updateValues(ParallelLoopGenerator::ValueToValueMapTy &NewValues); + void createFor(__isl_take isl_ast_node *For); void createForVector(__isl_take isl_ast_node *For, int VectorWidth); void createForSequential(__isl_take isl_ast_node *For); + /// Create LLVM-IR that executes a for node thread parallel. + /// + /// @param For The FOR isl_ast_node for which code is generated. + void createForParallel(__isl_take isl_ast_node *For); + /// Generate LLVM-IR that computes the values of the original induction /// variables in function of the newly generated loop induction variables. /// @@ -238,6 +296,98 @@ unsigned IslNodeBuilder::getNumberOfIterations(__isl_keep isl_ast_node *For) { return NumberOfIterations + 1; } +struct FindValuesUser { + LoopInfo &LI; + ScalarEvolution &SE; + Region &R; + SetVector &Values; + SetVector &SCEVs; +}; + +/// Extract the values and SCEVs needed to generate code for a ScopStmt. +/// +/// This function extracts a ScopStmt from a given isl_set and computes the +/// Values this statement depends on as well as a set of SCEV expressions that +/// need to be synthesized when generating code for this statment. +static int findValuesInStmt(isl_set *Set, void *UserPtr) { + isl_id *Id = isl_set_get_tuple_id(Set); + struct FindValuesUser &User = *static_cast(UserPtr); + const ScopStmt *Stmt = static_cast(isl_id_get_user(Id)); + const BasicBlock *BB = Stmt->getBasicBlock(); + + // Check all the operands of instructions in the basic block. + for (const Instruction &Inst : *BB) { + for (Value *SrcVal : Inst.operands()) { + if (Instruction *OpInst = dyn_cast(SrcVal)) + if (canSynthesize(OpInst, &User.LI, &User.SE, &User.R)) { + User.SCEVs.insert( + User.SE.getSCEVAtScope(OpInst, User.LI.getLoopFor(BB))); + continue; + } + if (Instruction *OpInst = dyn_cast(SrcVal)) + if (Stmt->getParent()->getRegion().contains(OpInst)) + continue; + + if (isa(SrcVal) || isa(SrcVal)) + User.Values.insert(SrcVal); + } + } + isl_id_free(Id); + isl_set_free(Set); + return 0; +} + +void IslNodeBuilder::getReferencesInSubtree(__isl_keep isl_ast_node *For, + SetVector &Values, + SetVector &Loops) { + + SetVector SCEVs; + struct FindValuesUser FindValues = {LI, SE, S.getRegion(), Values, SCEVs}; + + for (const auto &I : IDToValue) + Values.insert(I.second); + + for (const auto &I : OutsideLoopIterations) + Values.insert(cast(I.second)->getValue()); + + isl_union_set *Schedule = isl_union_map_domain(IslAstInfo::getSchedule(For)); + + isl_union_set_foreach_set(Schedule, findValuesInStmt, &FindValues); + isl_union_set_free(Schedule); + + for (const SCEV *Expr : SCEVs) { + findValues(Expr, Values); + findLoops(Expr, Loops); + } + + Values.remove_if([](const Value *V) { return isa(V); }); + + /// Remove loops that contain the scop or that are part of the scop, as they + /// are considered local. This leaves only loops that are before the scop, but + /// do not contain the scop itself. + Loops.remove_if([this](const Loop *L) { + return this->S.getRegion().contains(L) || + L->contains(S.getRegion().getEntry()); + }); +} + +void IslNodeBuilder::updateValues( + ParallelLoopGenerator::ValueToValueMapTy &NewValues) { + SmallPtrSet Inserted; + + for (const auto &I : IDToValue) { + IDToValue[I.first] = NewValues[I.second]; + Inserted.insert(I.second); + } + + for (const auto &I : NewValues) { + if (Inserted.count(I.first)) + continue; + + ValueMap[I.first] = I.second; + } +} + void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User, std::vector &IVS, __isl_take isl_id *IteratorID, @@ -315,7 +465,7 @@ void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For, llvm_unreachable("Unhandled isl_ast_node in vectorizer"); } - IDToValue.erase(IteratorID); + IDToValue.erase(IDToValue.find(IteratorID)); isl_id_free(IteratorID); isl_union_map_free(Schedule); @@ -379,7 +529,7 @@ void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For) { Annotator.popLoop(Parallel); - IDToValue.erase(IteratorID); + IDToValue.erase(IDToValue.find(IteratorID)); Builder.SetInsertPoint(ExitBlock->begin()); @@ -388,6 +538,139 @@ void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For) { isl_id_free(IteratorID); } +/// @brief Remove the BBs contained in a (sub)function from the dominator tree. +/// +/// This function removes the basic blocks that are part of a subfunction from +/// the dominator tree. Specifically, when generating code it may happen that at +/// some point the code generation continues in a new sub-function (e.g., when +/// generating OpenMP code). The basic blocks that are created in this +/// sub-function are then still part of the dominator tree of the original +/// function, such that the dominator tree reaches over function boundaries. +/// This is not only incorrect, but also causes crashes. This function now +/// removes from the dominator tree all basic blocks that are dominated (and +/// consequently reachable) from the entry block of this (sub)function. +/// +/// FIXME: A LLVM (function or region) pass should not touch anything outside of +/// the function/region it runs on. Hence, the pure need for this function shows +/// that we do not comply to this rule. At the moment, this does not cause any +/// issues, but we should be aware that such issues may appear. Unfortunately +/// the current LLVM pass infrastructure does not allow to make Polly a module +/// or call-graph pass to solve this issue, as such a pass would not have access +/// to the per-function analyses passes needed by Polly. A future pass manager +/// infrastructure is supposed to enable such kind of access possibly allowing +/// us to create a cleaner solution here. +/// +/// FIXME: Instead of adding the dominance information and then dropping it +/// later on, we should try to just not add it in the first place. This requires +/// some careful testing to make sure this does not break in interaction with +/// the SCEVBuilder and SplitBlock which may rely on the dominator tree or +/// which may try to update it. +/// +/// @param F The function which contains the BBs to removed. +/// @param DT The dominator tree from which to remove the BBs. +static void removeSubFuncFromDomTree(Function *F, DominatorTree &DT) { + DomTreeNode *N = DT.getNode(&F->getEntryBlock()); + std::vector Nodes; + + // We can only remove an element from the dominator tree, if all its children + // have been removed. To ensure this we obtain the list of nodes to remove + // using a post-order tree traversal. + for (po_iterator I = po_begin(N), E = po_end(N); I != E; ++I) + Nodes.push_back(I->getBlock()); + + for (BasicBlock *BB : Nodes) + DT.eraseNode(BB); +} + +void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) { + isl_ast_node *Body; + isl_ast_expr *Init, *Inc, *Iterator, *UB; + isl_id *IteratorID; + Value *ValueLB, *ValueUB, *ValueInc; + Type *MaxType; + Value *IV; + CmpInst::Predicate Predicate; + + Body = isl_ast_node_for_get_body(For); + Init = isl_ast_node_for_get_init(For); + Inc = isl_ast_node_for_get_inc(For); + Iterator = isl_ast_node_for_get_iterator(For); + IteratorID = isl_ast_expr_get_id(Iterator); + UB = getUpperBound(For, Predicate); + + ValueLB = ExprBuilder.create(Init); + ValueUB = ExprBuilder.create(UB); + ValueInc = ExprBuilder.create(Inc); + + // OpenMP always uses SLE. In case the isl generated AST uses a SLT + // expression, we need to adjust the loop blound by one. + if (Predicate == CmpInst::ICMP_SLT) + ValueUB = Builder.CreateAdd( + ValueUB, Builder.CreateSExt(Builder.getTrue(), ValueUB->getType())); + + MaxType = ExprBuilder.getType(Iterator); + MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); + MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType()); + MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); + + if (MaxType != ValueLB->getType()) + ValueLB = Builder.CreateSExt(ValueLB, MaxType); + if (MaxType != ValueUB->getType()) + ValueUB = Builder.CreateSExt(ValueUB, MaxType); + if (MaxType != ValueInc->getType()) + ValueInc = Builder.CreateSExt(ValueInc, MaxType); + + BasicBlock::iterator LoopBody; + + SetVector SubtreeValues; + SetVector Loops; + + getReferencesInSubtree(For, SubtreeValues, Loops); + + // Create for all loops we depend on values that contain the current loop + // iteration. These values are necessary to generate code for SCEVs that + // depend on such loops. As a result we need to pass them to the subfunction. + for (const Loop *L : Loops) { + const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)), + SE.getUnknown(Builder.getInt64(1)), + L, SCEV::FlagAnyWrap); + Value *V = generateSCEV(OuterLIV); + OutsideLoopIterations[L] = SE.getUnknown(V); + SubtreeValues.insert(V); + } + + ParallelLoopGenerator::ValueToValueMapTy NewValues; + ParallelLoopGenerator ParallelLoopGen(Builder, P, LI, DT, DL); + + IV = ParallelLoopGen.createParallelLoop(ValueLB, ValueUB, ValueInc, + SubtreeValues, NewValues, &LoopBody); + BasicBlock::iterator AfterLoop = Builder.GetInsertPoint(); + Builder.SetInsertPoint(LoopBody); + + // Save the current values. + ValueMapT ValueMapCopy = ValueMap; + IslExprBuilder::IDToValueTy IDToValueCopy = IDToValue; + + updateValues(NewValues); + IDToValue[IteratorID] = IV; + + create(Body); + + // Restore the original values. + ValueMap = ValueMapCopy; + IDToValue = IDToValueCopy; + + Builder.SetInsertPoint(AfterLoop); + removeSubFuncFromDomTree((*LoopBody).getParent()->getParent(), DT); + + for (const Loop *L : Loops) + OutsideLoopIterations.erase(L); + + isl_ast_node_free(For); + isl_ast_expr_free(Iterator); + isl_id_free(IteratorID); +} + void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) { bool Vector = PollyVectorizerChoice != VECTORIZER_NONE; @@ -399,6 +682,11 @@ void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) { return; } } + + if (IslAstInfo::isExecutedInParallel(For)) { + createForParallel(For); + return; + } createForSequential(For); } @@ -474,6 +762,12 @@ void IslNodeBuilder::createSubstitutions(isl_ast_expr *Expr, ScopStmt *Stmt, } } + // Add the current ValueMap to our per-statement value map. + // + // This is needed e.g. to rewrite array base addresses when moving code + // into a parallely executed subfunction. + VMap.insert(ValueMap.begin(), ValueMap.end()); + isl_ast_expr_free(Expr); } @@ -506,6 +800,8 @@ void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) { Id = isl_ast_expr_get_id(StmtExpr); isl_ast_expr_free(StmtExpr); + LTS.insert(OutsideLoopIterations.begin(), OutsideLoopIterations.end()); + Stmt = (ScopStmt *)isl_id_get_user(Id); createSubstitutions(Expr, Stmt, VMap, LTS); @@ -558,6 +854,27 @@ void IslNodeBuilder::addParameters(__isl_take isl_set *Context) { isl_id_free(Id); } + // Generate values for the current loop iteration for all surrounding loops. + // + // We may also reference loops outside of the scop which do not contain the + // scop itself, but as the number of such scops may be arbitrarily large we do + // not generate code for them here, but only at the point of code generation + // where these values are needed. + Region &R = S.getRegion(); + Loop *L = LI.getLoopFor(R.getEntry()); + + while (L != nullptr && R.contains(L)) + L = L->getParentLoop(); + + while (L != nullptr) { + const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)), + SE.getUnknown(Builder.getInt64(1)), + L, SCEV::FlagAnyWrap); + Value *V = generateSCEV(OuterLIV); + OutsideLoopIterations[L] = SE.getUnknown(V); + L = L->getParentLoop(); + } + isl_set_free(Context); } @@ -574,6 +891,9 @@ class IslCodeGeneration : public ScopPass { IslCodeGeneration() : ScopPass(ID) {} + /// @brief The datalayout used + const DataLayout *DL; + /// @name The analysis passes we need to generate code. /// ///{ @@ -605,6 +925,7 @@ class IslCodeGeneration : public ScopPass { AI = &getAnalysis(); DT = &getAnalysis().getDomTree(); SE = &getAnalysis(); + DL = &getAnalysis().getDataLayout(); assert(!S.getRegion().isTopLevelRegion() && "Top level regions are not supported"); @@ -616,7 +937,7 @@ class IslCodeGeneration : public ScopPass { BasicBlock *EnteringBB = simplifyRegion(&S, this); PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator); - IslNodeBuilder NodeBuilder(Builder, Annotator, this, *LI, *SE, *DT); + IslNodeBuilder NodeBuilder(Builder, Annotator, this, *DL, *LI, *SE, *DT, S); NodeBuilder.addParameters(S.getContext()); Value *RTC = buildRTC(Builder, NodeBuilder.getExprBuilder()); @@ -630,6 +951,7 @@ class IslCodeGeneration : public ScopPass { virtual void printScop(raw_ostream &OS) const {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); diff --git a/polly/lib/Support/SCEVValidator.cpp b/polly/lib/Support/SCEVValidator.cpp index 6c38782ea9da79..40f1d91388b4ff 100644 --- a/polly/lib/Support/SCEVValidator.cpp +++ b/polly/lib/Support/SCEVValidator.cpp @@ -461,6 +461,48 @@ struct SCEVInRegionDependences }; namespace polly { +/// Find all loops referenced in SCEVAddRecExprs. +class SCEVFindLoops { + SetVector &Loops; + +public: + SCEVFindLoops(SetVector &Loops) : Loops(Loops) {} + + bool follow(const SCEV *S) { + if (const SCEVAddRecExpr *AddRec = dyn_cast(S)) + Loops.insert(AddRec->getLoop()); + return true; + } + bool isDone() { return false; } +}; + +void findLoops(const SCEV *Expr, SetVector &Loops) { + SCEVFindLoops FindLoops(Loops); + SCEVTraversal ST(FindLoops); + ST.visitAll(Expr); +} + +/// Find all values referenced in SCEVUnknowns. +class SCEVFindValues { + SetVector &Values; + +public: + SCEVFindValues(SetVector &Values) : Values(Values) {} + + bool follow(const SCEV *S) { + if (const SCEVUnknown *Unknown = dyn_cast(S)) + Values.insert(Unknown->getValue()); + return true; + } + bool isDone() { return false; } +}; + +void findValues(const SCEV *Expr, SetVector &Values) { + SCEVFindValues FindValues(Values); + SCEVTraversal ST(FindValues); + ST.visitAll(Expr); +} + bool hasScalarDepsInsideRegion(const SCEV *Expr, const Region *R) { return SCEVInRegionDependences::hasDependences(Expr, R); } diff --git a/polly/test/Isl/CodeGen/OpenMP/escaping-pointers-invalidate-alias-info.ll b/polly/test/Isl/CodeGen/OpenMP/escaping-pointers-invalidate-alias-info.ll new file mode 100644 index 00000000000000..f8deb7b03a8b27 --- /dev/null +++ b/polly/test/Isl/CodeGen/OpenMP/escaping-pointers-invalidate-alias-info.ll @@ -0,0 +1,55 @@ +; RUN: opt %loadPolly -basicaa -tbaa -polly-parallel -polly-codegen-isl -polly-code-generator=isl -S < %s | FileCheck %s + +; CHECK: polly.split_new_and_old: +; CHECK-NOT: polly.split_new_and_old: +; CHECK: @CalculateQuant8Param.polly.subfn + +; In this test case the first loop is only detected as a scop because + + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@active_sps = external global [8 x i32]* + +define void @CalculateQuant8Param() { +entry: + %present = alloca i64, align 8 + %present19 = bitcast i64* %present to i8* + br label %for.1 + +for.1: + %indvar.1 = phi i64 [ %indvar.1.next, %for.1 ], [ 0, %entry ] + %tmp = load [8 x i32]** @active_sps, !tbaa !1 + %arrayidx.1b = getelementptr [8 x i32]* %tmp, i32 0, i64 0 + %tmp1 = load i32* %arrayidx.1b, !tbaa !5 + + %arrayidx.1a = getelementptr i8* %present19, i64 0 + %arrayidx.1c = bitcast i8* %arrayidx.1a to i32* + store i32 %tmp1, i32* %arrayidx.1c + %indvar.1.next = add i64 %indvar.1, 1 + br i1 false, label %for.1, label %fence + +fence: + fence seq_cst + br label %for.2 + +for.2: + %indvar.2 = phi i64 [ %indvar.2.next, %for.2 ], [ 0, %fence ] + %uglygep = getelementptr i8* %present19, i64 %indvar.2 + %arrayidx.2 = bitcast i8* %uglygep to i32* + store i32 42, i32* %arrayidx.2 + %indvar.2.next = add i64 %indvar.2, 1 + %exitcond18 = icmp ne i64 %indvar.2.next, 2 + br i1 %exitcond18, label %for.2, label %end + +end: + ret void +} + +!1 = metadata !{metadata !2, metadata !2, i64 0} +!2 = metadata !{metadata !"any pointer", metadata !3, i64 0} +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0} +!4 = metadata !{metadata !"Simple C/C++ TBAA"} +!5 = metadata !{metadata !6, metadata !6, i64 0} +!6 = metadata !{metadata !"int", metadata !3, i64 0} diff --git a/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-iv.ll b/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-iv.ll new file mode 100644 index 00000000000000..02bf3ed4a9c0bc --- /dev/null +++ b/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-iv.ll @@ -0,0 +1,44 @@ +; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR + +; This code has failed the scev based code generation as the scev in the scop +; contains an AddRecExpr of an outer loop. When generating code, we did not +; properly forward the value of this expression to the subfunction. + +; AST: #pragma omp parallel for +; AST: for (int c1 = 0; c1 <= 1023; c1 += 1) +; AST: Stmt_for_j(c1); + +; IR: @single_parallel_loop.polly.subfn + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [1024 x float] zeroinitializer, align 16 + +define void @single_parallel_loop() nounwind { +entry: + br label %for.i + +for.i: + %indvar.i = phi i64 [ %indvar.i.next, %for.i.inc], [ 0, %entry ] + br label %for.j + +for.j: + %indvar.j = phi i64 [ %indvar.j.next, %for.j], [ 0, %for.i ] + %sum = add i64 %indvar.j, %indvar.i + %scevgep = getelementptr [1024 x float]* @A, i64 0, i64 %sum + store float 0.0, float *%scevgep + %indvar.j.next = add i64 %indvar.j, 1 + %exitcond.j = icmp slt i64 %indvar.j.next, 1024 + br i1 %exitcond.j, label %for.j, label %for.i.inc + +for.i.inc: + fence seq_cst + %indvar.i.next = add i64 %indvar.i, 1 + %exitcond.i = icmp ne i64 %indvar.i.next, 1024 + br i1 %exitcond.i, label %for.i, label %exit + +exit: + ret void +} diff --git a/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-2.ll b/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-2.ll new file mode 100644 index 00000000000000..97f9f05581e2be --- /dev/null +++ b/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-2.ll @@ -0,0 +1,36 @@ +; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR-SCEV + +; AST: #pragma simd +; AST: #pragma omp parallel for +; AST: for (int c1 = 0; c1 <= 1023; c1 += 1) +; AST: Stmt_for_i(c1); + +; IR: %[[gep:[._a-zA-Z0-9]*]] = getelementptr inbounds { [1024 x double]*, i64 }* %polly.par.userContext, i32 0, i32 1 +; IR: store i64 %extern, i64* %[[gep]] + +; IR-SCEV: getelementptr inbounds { [1024 x double]* }* %polly.par.userContext, i32 0, i32 0 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @kernel_trmm([1024 x double]* %B) { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: + %extern = add i64 1, 0 + br label %for.i + +for.i: + %indvar.i = phi i64 [ %indvar.i.next, %for.i ], [ 0, %for.cond1.preheader ] + %getelementptr = getelementptr [1024 x double]* %B, i64 %extern, i64 %indvar.i + store double 0.000000e+00, double* %getelementptr + %indvar.i.next = add i64 %indvar.i, 1 + %exitcond.i = icmp ne i64 %indvar.i.next, 1024 + br i1 %exitcond.i, label %for.i, label %end + +end: + ret void +} diff --git a/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll b/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll new file mode 100644 index 00000000000000..98d39d1147caf4 --- /dev/null +++ b/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll @@ -0,0 +1,65 @@ +; RUN: opt %loadPolly -basicaa -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -basicaa -polly-parallel -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -basicaa -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR + +; The interesting part of this test case is the instruction: +; %tmp = bitcast i8* %call to i64** +; which is not part of the scop. In the SCEV based code generation not '%tmp', +; but %call is a parameter of the SCoP and we need to make sure its value is +; properly forwarded to the subfunction. + +; AST: #pragma omp parallel for +; AST: for (int c1 = 0; c1 < cols; c1 += 1) +; AST: Stmt_for_body(c1); + +; IR: @foo.polly.subfn + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @foo(i64 %cols, i8* noalias %call) { +entry: + %tmp = bitcast i8* %call to i64** + br label %for.body + +for.body: + %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i64** %tmp, i64 0 + %tmp1 = load i64** %arrayidx, align 8 + %arrayidx.2 = getelementptr inbounds i64* %tmp1, i64 %indvar + store i64 1, i64* %arrayidx.2, align 4 + %indvar.next = add nsw i64 %indvar, 1 + %cmp = icmp slt i64 %indvar.next, %cols + br i1 %cmp, label %for.body, label %end + +end: + ret void +} + +; Another variation of this test case, now with even more of the index +; expression defined outside of the scop. + +; AST: #pragma omp parallel for +; AST: for (int c1 = 0; c1 < cols; c1 += 1) +; AST: Stmt_for_body(c1); + +; IR: @bar.polly.subfn + +define void @bar(i64 %cols, i8* noalias %call) { +entry: + %tmp = bitcast i8* %call to i64** + %arrayidx = getelementptr inbounds i64** %tmp, i64 0 + br label %for.body + +for.body: + %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ] + %tmp1 = load i64** %arrayidx, align 8 + %arrayidx.2 = getelementptr inbounds i64* %tmp1, i64 %indvar + store i64 1, i64* %arrayidx.2, align 4 + %indvar.next = add nsw i64 %indvar, 1 + %cmp = icmp slt i64 %indvar.next, %cols + br i1 %cmp, label %for.body, label %end + +end: + ret void +} diff --git a/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values.ll b/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values.ll new file mode 100644 index 00000000000000..75b8a5f5c8dd9b --- /dev/null +++ b/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values.ll @@ -0,0 +1,48 @@ +; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev < %s | FileCheck %s -check-prefix=IR + +; Make sure we correctly forward the reference to 'A' to the OpenMP subfunction. +; +; void loop_references_outer_ids(float *A) { +; for (long i = 0; i < 100; i++) +; A[i] = i; +; } + + +; AST: #pragma simd +; AST: #pragma omp parallel for +; AST: for (int c1 = 0; c1 <= 99; c1 += 1) +; AST: Stmt_for_body(c1); + +; IR-LABEL: polly.start: +; IR-NEXT: %0 = bitcast { float* }* %polly.par.userContext to i8* +; IR-NEXT: call void @llvm.lifetime.start(i64 8, i8* %0) +; IR-NEXT: %1 = getelementptr inbounds { float* }* %polly.par.userContext, i32 0, i32 0 +; IR-NEXT: store float* %A, float** %1 +; IR-NEXT: %polly.par.userContext1 = bitcast { float* }* %polly.par.userContext to i8* + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @loop_references_outer_ids(float* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] + %exitcond = icmp ne i64 %i.0, 100 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %conv = sitofp i64 %i.0 to float + %arrayidx = getelementptr inbounds float* %A, i64 %i.0 + store float %conv, float* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i64 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} diff --git a/polly/test/Isl/CodeGen/OpenMP/loop-bounds-reference-outer-ids.ll b/polly/test/Isl/CodeGen/OpenMP/loop-bounds-reference-outer-ids.ll new file mode 100644 index 00000000000000..e94e90e81bcf03 --- /dev/null +++ b/polly/test/Isl/CodeGen/OpenMP/loop-bounds-reference-outer-ids.ll @@ -0,0 +1,102 @@ +; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev < %s | FileCheck %s -check-prefix=IR +; +; float A[100]; +; +; void loop_references_outer_ids(long n) { +; for (long i = 0; i < 100; i++) +; for (long j = 0; j < 100; j++) +; for (long k = 0; k < n + i; k++) +; A[j] += i + j + k; +; } + +; In this test case we verify that the j-loop is generated as OpenMP parallel +; loop and that the values of 'i' and 'n', needed in the loop bounds of the +; k-loop, are correctly passed to the subfunction. + +; AST: #pragma minimal dependence distance: 1 +; AST: for (int c1 = max(0, -n + 1); c1 <= 99; c1 += 1) +; AST: #pragma omp parallel for +; AST: for (int c3 = 0; c3 <= 99; c3 += 1) +; AST: #pragma minimal dependence distance: 1 +; AST: for (int c5 = 0; c5 < n + c1; c5 += 1) +; AST: Stmt_for_body6(c1, c3, c5); + +; IR: %polly.par.userContext = alloca { i64, i64 } +; IR: %4 = bitcast { i64, i64 }* %polly.par.userContext to i8* +; IR-NEXT: call void @llvm.lifetime.start(i64 16, i8* %4) +; IR-NEXT: %5 = getelementptr inbounds { i64, i64 }* %polly.par.userContext, i32 0, i32 0 +; IR-NEXT: store i64 %n, i64* %5 +; IR-NEXT: %6 = getelementptr inbounds { i64, i64 }* %polly.par.userContext, i32 0, i32 1 +; IR-NEXT: store i64 %polly.indvar, i64* %6 +; IR-NEXT: %polly.par.userContext1 = bitcast { i64, i64 }* %polly.par.userContext to i8* + +; IR-LABEL: @loop_references_outer_ids.polly.subfn(i8* %polly.par.userContext) +; IR: %polly.par.userContext1 = bitcast i8* %polly.par.userContext to { i64, i64 }* +; IR-NEXT: %0 = getelementptr inbounds { i64, i64 }* %polly.par.userContext1, i32 0, i32 0 +; IR-NEXT: %1 = load i64* %0 +; IR-NEXT: %2 = getelementptr inbounds { i64, i64 }* %polly.par.userContext1, i32 0, i32 1 +; IR-NEXT: %3 = load i64* %2 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@A = common global [100 x float] zeroinitializer, align 16 + +define void @loop_references_outer_ids(i64 %n) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc13, %entry + %i.0 = phi i64 [ 0, %entry ], [ %inc14, %for.inc13 ] + %exitcond1 = icmp ne i64 %i.0, 100 + br i1 %exitcond1, label %for.body, label %for.end15 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc10, %for.body + %j.0 = phi i64 [ 0, %for.body ], [ %inc11, %for.inc10 ] + %exitcond = icmp ne i64 %j.0, 100 + br i1 %exitcond, label %for.body3, label %for.end12 + +for.body3: ; preds = %for.cond1 + br label %for.cond4 + +for.cond4: ; preds = %for.inc, %for.body3 + %k.0 = phi i64 [ 0, %for.body3 ], [ %inc, %for.inc ] + %add = add nsw i64 %i.0, %n + %cmp5 = icmp slt i64 %k.0, %add + br i1 %cmp5, label %for.body6, label %for.end + +for.body6: ; preds = %for.cond4 + %add7 = add nsw i64 %i.0, %j.0 + %add8 = add nsw i64 %add7, %k.0 + %conv = sitofp i64 %add8 to float + %arrayidx = getelementptr inbounds [100 x float]* @A, i64 0, i64 %j.0 + %tmp = load float* %arrayidx, align 4 + %add9 = fadd float %tmp, %conv + store float %add9, float* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body6 + %inc = add nsw i64 %k.0, 1 + br label %for.cond4 + +for.end: ; preds = %for.cond4 + br label %for.inc10 + +for.inc10: ; preds = %for.end + %inc11 = add nsw i64 %j.0, 1 + br label %for.cond1 + +for.end12: ; preds = %for.cond1 + br label %for.inc13 + +for.inc13: ; preds = %for.end12 + %inc14 = add nsw i64 %i.0, 1 + br label %for.cond + +for.end15: ; preds = %for.cond + ret void +} diff --git a/polly/test/Isl/CodeGen/OpenMP/reference-other-bb.ll b/polly/test/Isl/CodeGen/OpenMP/reference-other-bb.ll new file mode 100644 index 00000000000000..055f717a507d21 --- /dev/null +++ b/polly/test/Isl/CodeGen/OpenMP/reference-other-bb.ll @@ -0,0 +1,27 @@ +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR + +; IR: @foo.polly.subfn +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @foo(i32 %sendcount, i8* %recvbuf) { +entry: + br label %sw.bb3 + +sw.bb3: + %tmp = bitcast i8* %recvbuf to double* + %cmp75 = icmp sgt i32 %sendcount, 0 + br i1 %cmp75, label %for.body, label %end + +for.body: + %i.16 = phi i32 [ %inc14, %for.body ], [ 0, %sw.bb3 ] + %idxprom11 = sext i32 %i.16 to i64 + %arrayidx12 = getelementptr inbounds double* %tmp, i64 %idxprom11 + store double 1.0, double* %arrayidx12, align 8 + %inc14 = add nsw i32 %i.16, 1 + %cmp7 = icmp slt i32 %inc14, %sendcount + br i1 %cmp7, label %for.body, label %end + +end: + ret void +} diff --git a/polly/test/Isl/CodeGen/OpenMP/reference-preceeding-loop.ll b/polly/test/Isl/CodeGen/OpenMP/reference-preceeding-loop.ll new file mode 100644 index 00000000000000..4d7d4002705419 --- /dev/null +++ b/polly/test/Isl/CodeGen/OpenMP/reference-preceeding-loop.ll @@ -0,0 +1,47 @@ +; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze -polly-codegen-scev < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR + + +; - Test the case where scalar evolution references a loop that is outside +; of the scop, but does not contain the scop. +; - Test the case where two parallel subfunctions are created. + +; AST: if (symbol >= p_2 + 1) { +; AST-NEXT: #pragma simd +; AST-NEXT: #pragma omp parallel for +; AST-NEXT: for (int c1 = 0; c1 < p_0 + symbol; c1 += 1) +; AST-NEXT: Stmt_while_body(c1); +; AST-NEXT: } else +; AST-NEXT: #pragma simd +; AST-NEXT: #pragma omp parallel for +; AST-NEXT: for (int c1 = 0; c1 <= p_0 + p_2; c1 += 1) +; AST-NEXT: Stmt_while_body(c1); + +; IR: @update_model.polly.subfn +; IR: @update_model.polly.subfn1 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@cum_freq = external global [258 x i64], align 16 + +define void @update_model(i64 %symbol) { +entry: + br label %for.one + +for.one: + %i.1 = phi i64 [ %dec17, %for.one ], [ %symbol, %entry ] + %dec17 = add nsw i64 %i.1, -1 + br i1 undef, label %for.one, label %while.body + +while.body: + %indvar = phi i64 [ %sub42, %while.body ], [ %i.1, %for.one ] + %sub42 = add nsw i64 %indvar, -1 + %arrayidx44 = getelementptr inbounds [258 x i64]* @cum_freq, i64 0, i64 %sub42 + store i64 1, i64* %arrayidx44, align 4 + %cmp40 = icmp sgt i64 %sub42, 0 + br i1 %cmp40, label %while.body, label %while.end + +while.end: + ret void +} diff --git a/polly/test/Isl/CodeGen/OpenMP/single_loop.ll b/polly/test/Isl/CodeGen/OpenMP/single_loop.ll new file mode 100644 index 00000000000000..76268ab967d1a7 --- /dev/null +++ b/polly/test/Isl/CodeGen/OpenMP/single_loop.ll @@ -0,0 +1,118 @@ +; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR + +; RUN: opt %loadPolly -polly-parallel -polly-import-jscop -polly-import-jscop-dir=%S -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST-STRIDE4 +; RUN: opt %loadPolly -polly-parallel -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen-isl -S < %s | FileCheck %s -check-prefix=IR-STRIDE4 +; RUN: opt %loadPolly -polly-parallel -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen-isl -polly-codegen-scev -S < %s | FileCheck %s -check-prefix=IR-STRIDE4 + +; This extensive test case tests the creation of the full set of OpenMP calls +; as well as the subfunction creation using a trivial loop as example. + +; #define N 1024 +; float A[N]; +; +; void single_parallel_loop(void) { +; for (long i = 0; i < N; i++) +; A[i] = 1; +; } + +; AST: #pragma simd +; AST: #pragma omp parallel for +; AST: for (int c1 = 0; c1 <= 1023; c1 += 1) +; AST: Stmt_S(c1); + +; AST-STRIDE4: #pragma omp parallel for +; AST-STRIDE4: for (int c1 = 0; c1 <= 1023; c1 += 4) +; AST-STRIDE4: #pragma simd +; AST-STRIDE4: for (int c2 = c1; c2 <= c1 + 3; c2 += 1) +; AST-STRIDE4: Stmt_S(c2); + +; IR-LABEL: single_parallel_loop() +; IR-NEXT: entry +; IR-NEXT: %polly.par.userContext = alloca + +; IR-LABEL: polly.start: +; IR-NEXT: %0 = bitcast {}* %polly.par.userContext to i8* +; IR-NEXT: call void @llvm.lifetime.start(i64 0, i8* %0) +; IR-NEXT: %polly.par.userContext1 = bitcast {}* %polly.par.userContext to i8* +; IR-NEXT: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @single_parallel_loop.polly.subfn, i8* %polly.par.userContext1, i32 0, i64 0, i64 1024, i64 1) +; IR-NEXT: call void @single_parallel_loop.polly.subfn(i8* %polly.par.userContext1) +; IR-NEXT: call void @GOMP_parallel_end() +; IR-NEXT: %1 = bitcast {}* %polly.par.userContext to i8* +; IR-NEXT: call void @llvm.lifetime.end(i64 8, i8* %1) +; IR-NEXT: br label %polly.merge_new_and_old + +; IR: define internal void @single_parallel_loop.polly.subfn(i8* %polly.par.userContext) #1 +; IR-LABEL: polly.par.setup: +; IR-NEXT: %polly.par.LBPtr = alloca i64 +; IR-NEXT: %polly.par.UBPtr = alloca i64 +; IR-NEXT: %polly.par.userContext1 = +; IR: br label %polly.par.checkNext + +; IR-LABEL: polly.par.exit: +; IR-NEXT: call void @GOMP_loop_end_nowait() +; IR-NEXT: ret void + +; IR-LABEL: polly.par.checkNext: +; IR-NEXT: %[[parnext:[._a-zA-Z0-9]*]] = call i8 @GOMP_loop_runtime_next(i64* %polly.par.LBPtr, i64* %polly.par.UBPtr) +; IR-NEXT: %[[cmp:[._a-zA-Z0-9]*]] = icmp ne i8 %[[parnext]], 0 +; IR-NEXT: br i1 %[[cmp]], label %polly.par.loadIVBounds, label %polly.par.exit + +; IR-LABEL: polly.par.loadIVBounds: +; IR-NEXT: %polly.par.LB = load i64* %polly.par.LBPtr +; IR-NEXT: %polly.par.UB = load i64* %polly.par.UBPtr +; IR-NEXT: %polly.par.UBAdjusted = sub i64 %polly.par.UB, 1 +; IR-NEXT: br label %polly.loop_preheader + +; IR-LABEL: polly.loop_exit: +; IR-NEXT: br label %polly.par.checkNext + +; IR-LABEL: polly.loop_header: +; IR-NEXT: %polly.indvar = phi i64 [ %polly.par.LB, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.S ] +; IR-NEXT: br label %polly.stmt.S + +; IR-LABEL: polly.stmt.S: +; IR-NEXT: %[[gep:[._a-zA-Z0-9]*]] = getelementptr [1024 x float]* {{.*}}, i64 0, i64 %polly.indvar +; IR-NEXT: store float 1.000000e+00, float* %[[gep]] +; IR-NEXT: %polly.indvar_next = add nsw i64 %polly.indvar, 1 +; IR-NEXT: %polly.adjust_ub = sub i64 %polly.par.UBAdjusted, 1 +; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, %polly.adjust_ub +; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit + +; IR-LABEL: polly.loop_preheader: +; IR-NEXT: br label %polly.loop_header + +; IR: attributes #1 = { "polly.skip.fn" } + +; IR-STRIDE4: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @single_parallel_loop.polly.subfn, i8* %polly.par.userContext1, i32 0, i64 0, i64 1024, i64 4) +; IR-STRIDE4: add nsw i64 %polly.indvar, 3 +; IR-STRIDE4: %polly.indvar_next = add nsw i64 %polly.indvar, 4 +; IR-STRIDE4 %polly.adjust_ub = sub i64 %polly.par.UBAdjusted, 4 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [1024 x float] zeroinitializer, align 16 + +define void @single_parallel_loop() nounwind { +entry: + br label %for.i + +for.i: + %indvar = phi i64 [ %indvar.next, %for.inc], [ 0, %entry ] + %scevgep = getelementptr [1024 x float]* @A, i64 0, i64 %indvar + %exitcond = icmp ne i64 %indvar, 1024 + br i1 %exitcond, label %S, label %exit + +S: + store float 1.0, float* %scevgep + br label %for.inc + +for.inc: + %indvar.next = add i64 %indvar, 1 + br label %for.i + +exit: + ret void +} diff --git a/polly/test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll b/polly/test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll new file mode 100644 index 00000000000000..570f475b58126a --- /dev/null +++ b/polly/test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll @@ -0,0 +1,50 @@ +; RUN: opt %loadPolly -tbaa -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -tbaa -polly-parallel -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -tbaa -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR + +; #define N 1024 +; float A[N]; +; +; void single_parallel_loop(void) { +; for (long i = 0; i < N; i++) +; A[i] = 1; +; } + +; AST: #pragma simd +; AST: #pragma omp parallel for +; AST: for (int c1 = 0; c1 <= 1023; c1 += 1) +; AST: Stmt_S(c1); + +; IR: @single_parallel_loop.polly.subfn +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define void @single_parallel_loop(float** %A) nounwind { +entry: + br label %for.i + +for.i: + %indvar = phi i64 [ %indvar.next, %for.inc], [ 0, %entry ] + %exitcond = icmp ne i64 %indvar, 1024 + br i1 %exitcond, label %S, label %exit + +S: + %ptr = load float** %A, !tbaa !2 + %scevgep = getelementptr float* %ptr, i64 %indvar + %val = load float* %scevgep, !tbaa !6 + %sum = fadd float %val, 1.0 + store float %sum, float* %scevgep, !tbaa !6 + br label %for.inc + +for.inc: + %indvar.next = add i64 %indvar, 1 + br label %for.i + +exit: + ret void +} + +!2 = metadata !{metadata !"float", metadata !3, i64 0} +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0} +!4 = metadata !{metadata !"Simple C/C++ TBAA"} +!6 = metadata !{metadata !"float *ptr", metadata !3, i64 0} diff --git a/polly/test/Isl/CodeGen/OpenMP/single_parallel_loop___%for.i---%exit.jscop b/polly/test/Isl/CodeGen/OpenMP/single_parallel_loop___%for.i---%exit.jscop new file mode 100644 index 00000000000000..5b3b1d4d7f85ad --- /dev/null +++ b/polly/test/Isl/CodeGen/OpenMP/single_parallel_loop___%for.i---%exit.jscop @@ -0,0 +1,17 @@ +{ + "context" : "{ : }", + "name" : "for.i => exit", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "{ Stmt_S[i0] -> MemRef_A[i0] }" + } + ], + "domain" : "{ Stmt_S[i0] : i0 >= 0 and i0 <= 1023 }", + "name" : "Stmt_S", + "schedule" : "{ Stmt_S[i0] -> scattering[0, floor(i0/4) * 4, i0] }" + } + ] +} diff --git a/polly/test/Isl/CodeGen/OpenMP/two-parallel-loops-reference-outer-indvar.ll b/polly/test/Isl/CodeGen/OpenMP/two-parallel-loops-reference-outer-indvar.ll new file mode 100644 index 00000000000000..646e93d92d1378 --- /dev/null +++ b/polly/test/Isl/CodeGen/OpenMP/two-parallel-loops-reference-outer-indvar.ll @@ -0,0 +1,47 @@ +; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze -polly-codegen-scev < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR + +; This test case verifies that we create correct code even if two OpenMP loops +; share common outer variables. + +; AST: if (nj >= p_1 + 3) { +; AST: #pragma simd +; AST: #pragma omp parallel for +; AST: for (int c1 = 0; c1 < p_0 + nj - 1; c1 += 1) +; AST: Stmt_for_body35(c1); +; AST: } else +; AST: #pragma simd +; AST: #pragma omp parallel for +; AST: for (int c1 = 0; c1 <= p_0 + p_1; c1 += 1) +; AST: Stmt_for_body35(c1); + +; IR: @foo.polly.subfn +; IR: @foo.polly.subfn1 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @foo(i64 %nj, [512 x double]* %R) { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: + %k.014 = phi i64 [ %inc87, %for.inc86 ], [ 0, %entry ] + %j.010 = add nsw i64 %k.014, 1 + br i1 undef, label %for.body35, label %for.inc86 + +for.body35: + %j.012 = phi i64 [ %j.0, %for.body35 ], [ %j.010, %for.cond1.preheader ] + %arrayidx39 = getelementptr inbounds [512 x double]* %R, i64 0, i64 %j.012 + store double 0.000000e+00, double* %arrayidx39 + %j.0 = add nsw i64 %j.012, 1 + %cmp34 = icmp slt i64 %j.0, %nj + br i1 %cmp34, label %for.body35, label %for.inc86 + +for.inc86: + %inc87 = add nsw i64 %k.014, 1 + br i1 undef, label %for.cond1.preheader, label %for.end88 + +for.end88: + ret void +}