Skip to content

Commit

Permalink
[VPlan] Store VPlan directly in VPRecipeBuilder (NFCI).
Browse files Browse the repository at this point in the history
Instead of passing VPlan in a number of places, just store it directly
in VPRecipeBuilder. A single instance is only used for a single VPlan.

This simplifies the code and was suggested by @nikolaypanchenko in
llvm#84464.
  • Loading branch information
fhahn authored and chencha3 committed Mar 22, 2024
1 parent 12c7e6f commit 26505a8
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 84 deletions.
132 changes: 66 additions & 66 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7898,8 +7898,7 @@ void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF,
}
}

VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst,
VPlan &Plan) {
VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
assert(is_contained(predecessors(Dst), Src) && "Invalid edge");

// Look for cached value.
Expand Down Expand Up @@ -7954,7 +7953,7 @@ VPValue *VPRecipeBuilder::getEdgeMask(BasicBlock *Src, BasicBlock *Dst) const {
return ECEntryIt->second;
}

void VPRecipeBuilder::createHeaderMask(VPlan &Plan) {
void VPRecipeBuilder::createHeaderMask() {
BasicBlock *Header = OrigLoop->getHeader();

// When not folding the tail, use nullptr to model all-true mask.
Expand Down Expand Up @@ -7989,7 +7988,7 @@ VPValue *VPRecipeBuilder::getBlockInMask(BasicBlock *BB) const {
return BCEntryIt->second;
}

void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) {
void VPRecipeBuilder::createBlockInMask(BasicBlock *BB) {
assert(OrigLoop->contains(BB) && "Block is not a part of a loop");
assert(BlockMaskCache.count(BB) == 0 && "Mask for block already computed");
assert(OrigLoop->getHeader() != BB &&
Expand All @@ -8000,7 +7999,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) {
VPValue *BlockMask = nullptr;
// This is the block mask. We OR all incoming edges.
for (auto *Predecessor : predecessors(BB)) {
VPValue *EdgeMask = createEdgeMask(Predecessor, BB, Plan);
VPValue *EdgeMask = createEdgeMask(Predecessor, BB);
if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is too.
BlockMaskCache[BB] = EdgeMask;
return;
Expand All @@ -8019,7 +8018,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) {

VPWidenMemoryInstructionRecipe *
VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
VFRange &Range, VPlanPtr &Plan) {
VFRange &Range) {
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
"Must be called with either a load or store");

Expand Down Expand Up @@ -8092,7 +8091,7 @@ createWidenInductionRecipes(PHINode *Phi, Instruction *PhiOrTrunc,
}

VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
PHINode *Phi, ArrayRef<VPValue *> Operands, VPlan &Plan, VFRange &Range) {
PHINode *Phi, ArrayRef<VPValue *> Operands, VFRange &Range) {

// Check if this is an integer or fp induction. If so, build the recipe that
// produces its scalar and vector values.
Expand All @@ -8116,7 +8115,7 @@ VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
}

VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
TruncInst *I, ArrayRef<VPValue *> Operands, VFRange &Range, VPlan &Plan) {
TruncInst *I, ArrayRef<VPValue *> Operands, VFRange &Range) {
// Optimize the special case where the source is a constant integer
// induction variable. Notice that we can only optimize the 'trunc' case
// because (a) FP conversions lose precision, (b) sext/zext may wrap, and
Expand Down Expand Up @@ -8144,8 +8143,7 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
}

VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
ArrayRef<VPValue *> Operands,
VPlanPtr &Plan) {
ArrayRef<VPValue *> Operands) {
unsigned NumIncoming = Phi->getNumIncomingValues();

// We know that all PHIs in non-header blocks are converted into selects, so
Expand All @@ -8158,7 +8156,7 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
for (unsigned In = 0; In < NumIncoming; In++) {
OperandsWithMask.push_back(Operands[In]);
VPValue *EdgeMask =
createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent(), *Plan);
createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent());
if (!EdgeMask) {
assert(In == 0 && "Both null and non-null edge masks found");
assert(all_equal(Operands) &&
Expand All @@ -8172,8 +8170,7 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,

VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
ArrayRef<VPValue *> Operands,
VFRange &Range,
VPlanPtr &Plan) {
VFRange &Range) {
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
[this, CI](ElementCount VF) {
return CM.isScalarWithPredication(CI, VF);
Expand Down Expand Up @@ -8248,7 +8245,7 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
if (Legal->isMaskRequired(CI))
Mask = getBlockInMask(CI->getParent());
else
Mask = Plan->getVPValueOrAddLiveIn(ConstantInt::getTrue(
Mask = Plan.getVPValueOrAddLiveIn(ConstantInt::getTrue(
IntegerType::getInt1Ty(Variant->getFunctionType()->getContext())));

Ops.insert(Ops.begin() + *MaskPos, Mask);
Expand Down Expand Up @@ -8278,7 +8275,7 @@ bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const {

VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
ArrayRef<VPValue *> Operands,
VPBasicBlock *VPBB, VPlanPtr &Plan) {
VPBasicBlock *VPBB) {
switch (I->getOpcode()) {
default:
return nullptr;
Expand All @@ -8291,8 +8288,8 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
if (CM.isPredicatedInst(I)) {
SmallVector<VPValue *> Ops(Operands.begin(), Operands.end());
VPValue *Mask = getBlockInMask(I->getParent());
VPValue *One = Plan->getVPValueOrAddLiveIn(
ConstantInt::get(I->getType(), 1u, false));
VPValue *One =
Plan.getVPValueOrAddLiveIn(ConstantInt::get(I->getType(), 1u, false));
auto *SafeRHS =
new VPInstruction(Instruction::Select, {Mask, Ops[1], One},
I->getDebugLoc());
Expand Down Expand Up @@ -8336,8 +8333,7 @@ void VPRecipeBuilder::fixHeaderPhis() {
}

VPReplicateRecipe *VPRecipeBuilder::handleReplication(Instruction *I,
VFRange &Range,
VPlan &Plan) {
VFRange &Range) {
bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); },
Range);
Expand Down Expand Up @@ -8392,21 +8388,22 @@ VPReplicateRecipe *VPRecipeBuilder::handleReplication(Instruction *I,
return Recipe;
}

VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
Instruction *Instr, ArrayRef<VPValue *> Operands, VFRange &Range,
VPBasicBlock *VPBB, VPlanPtr &Plan) {
VPRecipeBase *
VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
ArrayRef<VPValue *> Operands,
VFRange &Range, VPBasicBlock *VPBB) {
// First, check for specific widening recipes that deal with inductions, Phi
// nodes, calls and memory operations.
VPRecipeBase *Recipe;
if (auto Phi = dyn_cast<PHINode>(Instr)) {
if (Phi->getParent() != OrigLoop->getHeader())
return tryToBlend(Phi, Operands, Plan);
return tryToBlend(Phi, Operands);

// Always record recipes for header phis. Later first-order recurrence phis
// can have earlier phis as incoming values.
recordRecipeOf(Phi);

if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range)))
if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range)))
return Recipe;

VPHeaderPHIRecipe *PhiRecipe = nullptr;
Expand Down Expand Up @@ -8442,9 +8439,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
return PhiRecipe;
}

if (isa<TruncInst>(Instr) &&
(Recipe = tryToOptimizeInductionTruncate(cast<TruncInst>(Instr), Operands,
Range, *Plan)))
if (isa<TruncInst>(Instr) && (Recipe = tryToOptimizeInductionTruncate(
cast<TruncInst>(Instr), Operands, Range)))
return Recipe;

// All widen recipes below deal only with VF > 1.
Expand All @@ -8453,10 +8449,10 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
return nullptr;

if (auto *CI = dyn_cast<CallInst>(Instr))
return tryToWidenCall(CI, Operands, Range, Plan);
return tryToWidenCall(CI, Operands, Range);

if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr))
return tryToWidenMemory(Instr, Operands, Range, Plan);
return tryToWidenMemory(Instr, Operands, Range);

if (!shouldWiden(Instr, Range))
return nullptr;
Expand All @@ -8475,7 +8471,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
*CI);
}

return tryToWiden(Instr, Operands, VPBB, Plan);
return tryToWiden(Instr, Operands, VPBB);
}

void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
Expand Down Expand Up @@ -8547,37 +8543,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {

SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;

VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, PSE, Builder);

// ---------------------------------------------------------------------------
// Pre-construction: record ingredients whose recipes we'll need to further
// process after constructing the initial VPlan.
// ---------------------------------------------------------------------------

// For each interleave group which is relevant for this (possibly trimmed)
// Range, add it to the set of groups to be later applied to the VPlan and add
// placeholders for its members' Recipes which we'll be replacing with a
// single VPInterleaveRecipe.
for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) {
auto applyIG = [IG, this](ElementCount VF) -> bool {
bool Result = (VF.isVector() && // Query is illegal for VF == 1
CM.getWideningDecision(IG->getInsertPos(), VF) ==
LoopVectorizationCostModel::CM_Interleave);
// For scalable vectors, the only interleave factor currently supported
// is 2 since we require the (de)interleave2 intrinsics instead of
// shufflevectors.
assert((!Result || !VF.isScalable() || IG->getFactor() == 2) &&
"Unsupported interleave factor for scalable vectors");
return Result;
};
if (!getDecisionAndClampRange(applyIG, Range))
continue;
InterleaveGroups.insert(IG);
for (unsigned i = 0; i < IG->getFactor(); i++)
if (Instruction *Member = IG->getMember(i))
RecipeBuilder.recordRecipeOf(Member);
};

// ---------------------------------------------------------------------------
// Build initial VPlan: Scan the body of the loop in a topological order to
// visit each basic block after having visited its predecessor basic blocks.
Expand Down Expand Up @@ -8612,6 +8577,41 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
bool HasNUW = Style == TailFoldingStyle::None;
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL);

VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, Legal, CM, PSE, Builder);

// ---------------------------------------------------------------------------
// Pre-construction: record ingredients whose recipes we'll need to further
// process after constructing the initial VPlan.
// ---------------------------------------------------------------------------

// For each interleave group which is relevant for this (possibly trimmed)
// Range, add it to the set of groups to be later applied to the VPlan and add
// placeholders for its members' Recipes which we'll be replacing with a
// single VPInterleaveRecipe.
for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) {
auto applyIG = [IG, this](ElementCount VF) -> bool {
bool Result = (VF.isVector() && // Query is illegal for VF == 1
CM.getWideningDecision(IG->getInsertPos(), VF) ==
LoopVectorizationCostModel::CM_Interleave);
// For scalable vectors, the only interleave factor currently supported
// is 2 since we require the (de)interleave2 intrinsics instead of
// shufflevectors.
assert((!Result || !VF.isScalable() || IG->getFactor() == 2) &&
"Unsupported interleave factor for scalable vectors");
return Result;
};
if (!getDecisionAndClampRange(applyIG, Range))
continue;
InterleaveGroups.insert(IG);
for (unsigned i = 0; i < IG->getFactor(); i++)
if (Instruction *Member = IG->getMember(i))
RecipeBuilder.recordRecipeOf(Member);
};

// ---------------------------------------------------------------------------
// Construct recipes for the instructions in the loop
// ---------------------------------------------------------------------------

// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.
LoopBlocksDFS DFS(OrigLoop);
Expand All @@ -8633,9 +8633,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
Builder.setInsertPoint(VPBB);

if (VPBB == HeaderVPBB)
RecipeBuilder.createHeaderMask(*Plan);
RecipeBuilder.createHeaderMask();
else if (NeedsMasks)
RecipeBuilder.createBlockInMask(BB, *Plan);
RecipeBuilder.createBlockInMask(BB);

// Introduce each ingredient into VPlan.
// TODO: Model and preserve debug intrinsics in VPlan.
Expand All @@ -8658,10 +8658,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
Legal->isInvariantAddressOfReduction(SI->getPointerOperand()))
continue;

VPRecipeBase *Recipe = RecipeBuilder.tryToCreateWidenRecipe(
Instr, Operands, Range, VPBB, Plan);
VPRecipeBase *Recipe =
RecipeBuilder.tryToCreateWidenRecipe(Instr, Operands, Range, VPBB);
if (!Recipe)
Recipe = RecipeBuilder.handleReplication(Instr, Range, *Plan);
Recipe = RecipeBuilder.handleReplication(Instr, Range);
for (auto *Def : Recipe->definedValues()) {
auto *UV = Def->getUnderlyingValue();
Plan->addVPValue(UV, Def);
Expand Down
Loading

0 comments on commit 26505a8

Please sign in to comment.