Skip to content

Commit

Permalink
[Polly] Use separate DT/LI/SE for outlined subfn. NFC. (#102460)
Browse files Browse the repository at this point in the history
DominatorTree, LoopInfo, and ScalarEvolution are function-level analyses
that expect to be called only on instructions and basic blocks of the
function they were original created for. When Polly outlined a parallel
loop body into a separate function, it reused the same analyses seemed
to work until new checks to be added in #101198.

This patch creates new analyses for the subfunctions. GenDT, GenLI, and
GenSE now refer to the analyses of the current region of code. Outside
of an outlined function, they refer to the same analysis as used for the
SCoP, but are substituted within an outlined function.

Additionally to the cross-function queries of DT/LI/SE, we must not
create SCEVs that refer to a mix of expressions for old and generated
values. Currently, SCEVs themselves do not "remember" which
ScalarEvolution analysis they were created for, but mixing them is just
as unexpected as using DT/LI across function boundaries. Hence
`SCEVLoopAddRecRewriter` was combined into `ScopExpander`.
`SCEVLoopAddRecRewriter` only replaced induction variables but left
SCEVUnknowns to reference the old function. `SCEVParameterRewriter`
would have done so but its job was effectively superseded by
`ScopExpander`, and now also `SCEVLoopAddRecRewriter`. Some issues
persist put marked with a FIXME in the code. Changing them would
possibly cause this patch to be not NFC anymore.
  • Loading branch information
Meinersbur authored Aug 10, 2024
1 parent 3b57f6b commit 22c77f2
Show file tree
Hide file tree
Showing 13 changed files with 351 additions and 221 deletions.
13 changes: 11 additions & 2 deletions polly/include/polly/CodeGen/BlockGenerators.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,14 @@ class BlockGenerator {
/// The dominator tree of this function.
DominatorTree &DT;

/// The entry block of the current function.
BasicBlock *EntryBB;
/// Relates to the region where the code is emitted into.
/// @{
DominatorTree *GenDT;
LoopInfo *GenLI;
ScalarEvolution *GenSE;
/// @}

public:
/// Map to resolve scalar dependences for PHI operands and scalars.
///
/// When translating code that contains scalar dependences as they result from
Expand Down Expand Up @@ -298,6 +303,10 @@ class BlockGenerator {
/// Split @p BB to create a new one we can use to clone @p BB in.
BasicBlock *splitBB(BasicBlock *BB);

/// Change the function that code is emitted into.
void switchGeneratedFunc(Function *GenFn, DominatorTree *GenDT,
LoopInfo *GenLI, ScalarEvolution *GenSE);

/// Copy the given basic block.
///
/// @param Stmt The statement to code generate.
Expand Down
13 changes: 11 additions & 2 deletions polly/include/polly/CodeGen/IslExprBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ class IslExprBuilder final {
llvm::ScalarEvolution &SE, llvm::DominatorTree &DT,
llvm::LoopInfo &LI, llvm::BasicBlock *StartBlock);

/// Change the function that code is emitted into.
void switchGeneratedFunc(llvm::Function *GenFn, llvm::DominatorTree *GenDT,
llvm::LoopInfo *GenLI, llvm::ScalarEvolution *GenSE);

/// Create LLVM-IR for an isl_ast_expr[ession].
///
/// @param Expr The ast expression for which we generate LLVM-IR.
Expand Down Expand Up @@ -205,10 +209,15 @@ class IslExprBuilder final {

const llvm::DataLayout &DL;
llvm::ScalarEvolution &SE;
llvm::DominatorTree &DT;
llvm::LoopInfo &LI;
llvm::BasicBlock *StartBlock;

/// Relates to the region where the code is emitted into.
/// @{
llvm::DominatorTree *GenDT;
llvm::LoopInfo *GenLI;
llvm::ScalarEvolution *GenSE;
/// @}

llvm::Value *createOp(__isl_take isl_ast_expr *Expr);
llvm::Value *createOpUnary(__isl_take isl_ast_expr *Expr);
llvm::Value *createOpAccess(__isl_take isl_ast_expr *Expr);
Expand Down
21 changes: 8 additions & 13 deletions polly/include/polly/CodeGen/IslNodeBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class IslNodeBuilder {
BlockGen(Builder, LI, SE, DT, ScalarMap, EscapeMap, ValueMap,
&ExprBuilder, StartBlock),
RegionGen(BlockGen), DL(DL), LI(LI), SE(SE), DT(DT),
StartBlock(StartBlock) {}
StartBlock(StartBlock), GenDT(&DT), GenLI(&LI), GenSE(&SE) {}

virtual ~IslNodeBuilder() = default;

Expand Down Expand Up @@ -147,6 +147,13 @@ class IslNodeBuilder {
DominatorTree &DT;
BasicBlock *StartBlock;

/// Relates to the region where the code is emitted into.
/// @{
DominatorTree *GenDT;
LoopInfo *GenLI;
ScalarEvolution *GenSE;
/// @}

/// The current iteration of out-of-scop loops
///
/// This map provides for a given loop a llvm::Value that contains the current
Expand Down Expand Up @@ -246,18 +253,6 @@ class IslNodeBuilder {
SetVector<Value *> &Values,
SetVector<const Loop *> &Loops);

/// Change the llvm::Value(s) used for code generation.
///
/// When generating code certain values (e.g., references to induction
/// variables or array base pointers) in the original code may be replaced by
/// new values. This function allows to (partially) update the set of values
/// used. A typical use case for this function is the case when we continue
/// code generation in a subfunction/kernel function and need to explicitly
/// pass down certain values.
///
/// @param NewValues A map that maps certain llvm::Values to new llvm::Values.
void updateValues(ValueMapT &NewValues);

/// Return the most up-to-date version of the llvm::Value for code generation.
/// @param Original The Value to check for an up to date version.
/// @returns A remapped `Value` from ValueMap, or `Original` if no mapping
Expand Down
24 changes: 14 additions & 10 deletions polly/include/polly/CodeGen/LoopGenerators.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ extern int PollyChunkSize;
/// @param Builder The builder used to create the loop.
/// @param P A pointer to the pass that uses this function.
/// It is used to update analysis information.
/// @param LI The loop info for the current function
/// @param LI The loop info we need to update
/// @param DT The dominator tree we need to update
/// @param ExitBlock The block the loop will exit to.
/// @param Predicate The predicate used to generate the upper loop
Expand Down Expand Up @@ -128,11 +128,9 @@ llvm::DebugLoc createDebugLocForGeneratedCode(Function *F);
class ParallelLoopGenerator {
public:
/// Create a parallel loop generator for the current function.
ParallelLoopGenerator(PollyIRBuilder &Builder, LoopInfo &LI,
DominatorTree &DT, const DataLayout &DL)
: Builder(Builder), LI(LI), DT(DT),
LongType(
Type::getIntNTy(Builder.getContext(), DL.getPointerSizeInBits())),
ParallelLoopGenerator(PollyIRBuilder &Builder, const DataLayout &DL)
: Builder(Builder), LongType(Type::getIntNTy(Builder.getContext(),
DL.getPointerSizeInBits())),
M(Builder.GetInsertBlock()->getParent()->getParent()),
DLGenerated(createDebugLocForGeneratedCode(
Builder.GetInsertBlock()->getParent())) {}
Expand Down Expand Up @@ -164,11 +162,11 @@ class ParallelLoopGenerator {
/// The IR builder we use to create instructions.
PollyIRBuilder &Builder;

/// The loop info of the current function we need to update.
LoopInfo &LI;
/// The loop info for the generated subfunction.
std::unique_ptr<LoopInfo> SubFnLI;

/// The dominance tree of the current function we need to update.
DominatorTree &DT;
/// The dominance tree for the generated subfunction.
std::unique_ptr<DominatorTree> SubFnDT;

/// The type of a "long" on this hardware used for backend calls.
Type *LongType;
Expand All @@ -184,6 +182,12 @@ class ParallelLoopGenerator {
llvm::DebugLoc DLGenerated;

public:
/// Returns the DominatorTree for the generated subfunction.
DominatorTree *getCalleeDominatorTree() const { return SubFnDT.get(); }

/// Returns the LoopInfo for the generated subfunction.
LoopInfo *getCalleeLoopInfo() const { return SubFnLI.get(); }

/// Create a struct for all @p Values and store them in there.
///
/// @param Values The values which should be stored in the struct.
Expand Down
5 changes: 2 additions & 3 deletions polly/include/polly/CodeGen/LoopGeneratorsGOMP.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,8 @@ namespace polly {
class ParallelLoopGeneratorGOMP final : public ParallelLoopGenerator {
public:
/// Create a parallel loop generator for the current function.
ParallelLoopGeneratorGOMP(PollyIRBuilder &Builder, LoopInfo &LI,
DominatorTree &DT, const DataLayout &DL)
: ParallelLoopGenerator(Builder, LI, DT, DL) {}
ParallelLoopGeneratorGOMP(PollyIRBuilder &Builder, const DataLayout &DL)
: ParallelLoopGenerator(Builder, DL) {}

// The functions below may be used if one does not want to generate a
// specific OpenMP parallel loop, but generate individual parts of it
Expand Down
5 changes: 2 additions & 3 deletions polly/include/polly/CodeGen/LoopGeneratorsKMP.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,8 @@ using llvm::GlobalVariable;
class ParallelLoopGeneratorKMP final : public ParallelLoopGenerator {
public:
/// Create a parallel loop generator for the current function.
ParallelLoopGeneratorKMP(PollyIRBuilder &Builder, LoopInfo &LI,
DominatorTree &DT, const DataLayout &DL)
: ParallelLoopGenerator(Builder, LI, DT, DL) {
ParallelLoopGeneratorKMP(PollyIRBuilder &Builder, const DataLayout &DL)
: ParallelLoopGenerator(Builder, DL) {
SourceLocationInfo = createSourceLocation();
}

Expand Down
11 changes: 9 additions & 2 deletions polly/include/polly/Support/ScopHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ namespace polly {
class Scop;
class ScopStmt;

/// Same as llvm/Analysis/ScalarEvolutionExpressions.h
using LoopToScevMapT = llvm::DenseMap<const llvm::Loop *, const llvm::SCEV *>;

/// Enumeration of assumptions Polly can take.
enum AssumptionKind {
ALIASING,
Expand Down Expand Up @@ -383,20 +386,24 @@ void splitEntryBlockForAlloca(llvm::BasicBlock *EntryBlock,
/// as the call to SCEVExpander::expandCodeFor:
///
/// @param S The current Scop.
/// @param SE The Scalar Evolution pass.
/// @param SE The Scalar Evolution pass used by @p S.
/// @param GenFn The function to generate code in. Can be the same as @p SE.
/// @param GenSE The Scalar Evolution pass for @p GenFn.
/// @param DL The module data layout.
/// @param Name The suffix added to the new instruction names.
/// @param E The expression for which code is actually generated.
/// @param Ty The type of the resulting code.
/// @param IP The insertion point for the new code.
/// @param VMap A remapping of values used in @p E.
/// @param LoopMap A remapping of loops used in @p E.
/// @param RTCBB The last block of the RTC. Used to insert loop-invariant
/// instructions in rare cases.
llvm::Value *expandCodeFor(Scop &S, llvm::ScalarEvolution &SE,
llvm::Function *GenFn, llvm::ScalarEvolution &GenSE,
const llvm::DataLayout &DL, const char *Name,
const llvm::SCEV *E, llvm::Type *Ty,
llvm::Instruction *IP, ValueMapT *VMap,
llvm::BasicBlock *RTCBB);
LoopToScevMapT *LoopMap, llvm::BasicBlock *RTCBB);

/// Return the condition for the terminator @p TI.
///
Expand Down
38 changes: 22 additions & 16 deletions polly/lib/CodeGen/BlockGenerators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ BlockGenerator::BlockGenerator(
PollyIRBuilder &B, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT,
AllocaMapTy &ScalarMap, EscapeUsersAllocaMapTy &EscapeMap,
ValueMapT &GlobalMap, IslExprBuilder *ExprBuilder, BasicBlock *StartBlock)
: Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT),
EntryBB(nullptr), ScalarMap(ScalarMap), EscapeMap(EscapeMap),
: Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT), GenDT(&DT),
GenLI(&LI), GenSE(&SE), ScalarMap(ScalarMap), EscapeMap(EscapeMap),
GlobalMap(GlobalMap), StartBlock(StartBlock) {}

Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old,
Expand All @@ -75,7 +75,6 @@ Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old,
if (isa<SCEVCouldNotCompute>(Scev))
return nullptr;

const SCEV *NewScev = SCEVLoopAddRecRewriter::rewrite(Scev, LTS, SE);
ValueMapT VTV;
VTV.insert(BBMap.begin(), BBMap.end());
VTV.insert(GlobalMap.begin(), GlobalMap.end());
Expand All @@ -86,9 +85,9 @@ Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old,

assert(IP != Builder.GetInsertBlock()->end() &&
"Only instructions can be insert points for SCEVExpander");
Value *Expanded =
expandCodeFor(S, SE, DL, "polly", NewScev, Old->getType(), &*IP, &VTV,
StartBlock->getSinglePredecessor());
Value *Expanded = expandCodeFor(
S, SE, Builder.GetInsertBlock()->getParent(), *GenSE, DL, "polly", Scev,
Old->getType(), &*IP, &VTV, &LTS, StartBlock->getSinglePredecessor());

BBMap[Old] = Expanded;
return Expanded;
Expand Down Expand Up @@ -233,6 +232,8 @@ void BlockGenerator::copyInstScalar(ScopStmt &Stmt, Instruction *Inst,
return;
}

// FIXME: We will encounter "NewOperand" again if used twice. getNewValue()
// is meant to be called on old values only.
NewInst->replaceUsesOfWith(OldOperand, NewOperand);
}

Expand Down Expand Up @@ -410,7 +411,7 @@ void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,

BasicBlock *BlockGenerator::splitBB(BasicBlock *BB) {
BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
&*Builder.GetInsertPoint(), &DT, &LI);
&*Builder.GetInsertPoint(), GenDT, GenLI);
CopyBB->setName("polly.stmt." + BB->getName());
return CopyBB;
}
Expand All @@ -431,11 +432,20 @@ BasicBlock *BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB,
return CopyBB;
}

void BlockGenerator::switchGeneratedFunc(Function *GenFn, DominatorTree *GenDT,
LoopInfo *GenLI,
ScalarEvolution *GenSE) {
assert(GenFn == GenDT->getRoot()->getParent());
assert(GenLI->getTopLevelLoops().empty() ||
GenFn == GenLI->getTopLevelLoops().front()->getHeader()->getParent());
this->GenDT = GenDT;
this->GenLI = GenLI;
this->GenSE = GenSE;
}

void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB,
ValueMapT &BBMap, LoopToScevMapT &LTS,
isl_id_to_ast_expr *NewAccesses) {
EntryBB = &CopyBB->getParent()->getEntryBlock();

// Block statements and the entry blocks of region statement are code
// generated from instruction lists. This allow us to optimize the
// instructions that belong to a certain scop statement. As the code
Expand Down Expand Up @@ -497,7 +507,7 @@ Value *BlockGenerator::getOrCreateAlloca(const ScopArrayInfo *Array) {
Addr =
new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr,
DL.getPrefTypeAlign(Ty), ScalarBase->getName() + NameExt);
EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
Addr->insertBefore(&*EntryBB->getFirstInsertionPt());

return Addr;
Expand Down Expand Up @@ -554,10 +564,6 @@ void BlockGenerator::generateScalarLoads(

auto *Address =
getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS, BBMap, NewAccesses);
assert((!isa<Instruction>(Address) ||
DT.dominates(cast<Instruction>(Address)->getParent(),
Builder.GetInsertBlock())) &&
"Domination violation");
BBMap[MA->getAccessValue()] = Builder.CreateLoad(
MA->getElementType(), Address, Address->getName() + ".reload");
}
Expand Down Expand Up @@ -615,9 +621,9 @@ void BlockGenerator::generateConditionalExecution(
StringRef BlockName = HeadBlock->getName();

// Generate the conditional block.
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
DomTreeUpdater DTU(GenDT, DomTreeUpdater::UpdateStrategy::Eager);
SplitBlockAndInsertIfThen(Cond, &*Builder.GetInsertPoint(), false, nullptr,
&DTU, &LI);
&DTU, GenLI);
BranchInst *Branch = cast<BranchInst>(HeadBlock->getTerminator());
BasicBlock *ThenBlock = Branch->getSuccessor(0);
BasicBlock *TailBlock = Branch->getSuccessor(1);
Expand Down
35 changes: 23 additions & 12 deletions polly/lib/CodeGen/IslExprBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,23 @@ IslExprBuilder::IslExprBuilder(Scop &S, PollyIRBuilder &Builder,
DominatorTree &DT, LoopInfo &LI,
BasicBlock *StartBlock)
: S(S), Builder(Builder), IDToValue(IDToValue), GlobalMap(GlobalMap),
DL(DL), SE(SE), DT(DT), LI(LI), StartBlock(StartBlock) {
DL(DL), SE(SE), StartBlock(StartBlock), GenDT(&DT), GenLI(&LI),
GenSE(&SE) {
OverflowState = (OTMode == OT_ALWAYS) ? Builder.getFalse() : nullptr;
}

void IslExprBuilder::switchGeneratedFunc(llvm::Function *GenFn,
llvm::DominatorTree *GenDT,
llvm::LoopInfo *GenLI,
llvm::ScalarEvolution *GenSE) {
assert(GenFn == GenDT->getRoot()->getParent());
assert(GenLI->getTopLevelLoops().empty() ||
GenFn == GenLI->getTopLevelLoops().front()->getHeader()->getParent());
this->GenDT = GenDT;
this->GenLI = GenLI;
this->GenSE = GenSE;
}

void IslExprBuilder::setTrackOverflow(bool Enable) {
// If potential overflows are tracked always or never we ignore requests
// to change the behavior.
Expand Down Expand Up @@ -307,14 +320,12 @@ IslExprBuilder::createAccessAddress(__isl_take isl_ast_expr *Expr) {

const SCEV *DimSCEV = SAI->getDimensionSize(u);

llvm::ValueToSCEVMapTy Map;
for (auto &KV : GlobalMap)
Map[KV.first] = SE.getSCEV(KV.second);
DimSCEV = SCEVParameterRewriter::rewrite(DimSCEV, SE, Map);
Value *DimSize =
expandCodeFor(S, SE, DL, "polly", DimSCEV, DimSCEV->getType(),
&*Builder.GetInsertPoint(), nullptr,
StartBlock->getSinglePredecessor());
// DimSize should be invariant to the SCoP, so no BBMap nor LoopToScev
// needed. But GlobalMap may contain SCoP-invariant vars.
Value *DimSize = expandCodeFor(
S, SE, Builder.GetInsertBlock()->getParent(), *GenSE, DL, "polly",
DimSCEV, DimSCEV->getType(), &*Builder.GetInsertPoint(), &GlobalMap,
/*LoopMap*/ nullptr, StartBlock->getSinglePredecessor());

Type *Ty = getWidestType(DimSize->getType(), IndexOp->getType());

Expand Down Expand Up @@ -602,10 +613,10 @@ IslExprBuilder::createOpBooleanConditional(__isl_take isl_ast_expr *Expr) {

auto InsertBB = Builder.GetInsertBlock();
auto InsertPoint = Builder.GetInsertPoint();
auto NextBB = SplitBlock(InsertBB, &*InsertPoint, &DT, &LI);
auto NextBB = SplitBlock(InsertBB, &*InsertPoint, GenDT, GenLI);
BasicBlock *CondBB = BasicBlock::Create(Context, "polly.cond", F);
LI.changeLoopFor(CondBB, LI.getLoopFor(InsertBB));
DT.addNewBlock(CondBB, InsertBB);
GenLI->changeLoopFor(CondBB, GenLI->getLoopFor(InsertBB));
GenDT->addNewBlock(CondBB, InsertBB);

InsertBB->getTerminator()->eraseFromParent();
Builder.SetInsertPoint(InsertBB);
Expand Down
Loading

0 comments on commit 22c77f2

Please sign in to comment.