Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Prove some cases where strength reducing to GC pointers is ok #104679

Merged
merged 4 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions src/coreclr/jit/assertionprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4848,6 +4848,52 @@ AssertionIndex Compiler::optAssertionIsNonNullInternal(GenTree*
return NO_ASSERTION_INDEX;
}

//------------------------------------------------------------------------
// optAssertionVNIsNonNull: See if we can prove that the value of a VN is
// non-null using assertions.
//
// Arguments:
// vn - VN to check
// assertions - set of live assertions
//
// Return Value:
// True if the VN could be proven non-null.
//
bool Compiler::optAssertionVNIsNonNull(ValueNum vn, ASSERT_VALARG_TP assertions)
{
if (vnStore->IsKnownNonNull(vn))
{
return true;
}

// Check each assertion to find if we have a vn != null assertion.
//
BitVecOps::Iter iter(apTraits, assertions);
unsigned index = 0;
while (iter.NextElem(&index))
{
AssertionIndex assertionIndex = GetAssertionIndex(index);
if (assertionIndex > optAssertionCount)
{
break;
}
AssertionDsc* curAssertion = optGetAssertion(assertionIndex);
if (!curAssertion->CanPropNonNull())
{
continue;
}

if (curAssertion->op1.vn != vn)
{
continue;
}

return true;
}

return false;
}

/*****************************************************************************
*
* Given a tree consisting of a call and a set of available assertions, we
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -8025,6 +8025,7 @@ class Compiler
AssertionIndex optAssertionIsSubrange(GenTree* tree, IntegralRange range, ASSERT_VALARG_TP assertions);
AssertionIndex optAssertionIsSubtype(GenTree* tree, GenTree* methodTableArg, ASSERT_VALARG_TP assertions);
AssertionIndex optAssertionIsNonNullInternal(GenTree* op, ASSERT_VALARG_TP assertions DEBUGARG(bool* pVnBased));
bool optAssertionVNIsNonNull(ValueNum vn, ASSERT_VALARG_TP assertions);
bool optAssertionIsNonNull(GenTree* op,
ASSERT_VALARG_TP assertions DEBUGARG(bool* pVnBased) DEBUGARG(AssertionIndex* pIndex));

Expand Down
195 changes: 166 additions & 29 deletions src/coreclr/jit/inductionvariableopts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,46 @@
// scalar evolution analysis (see scev.h and scev.cpp for more information
// about the scalar evolution analysis).
//
// Currently the only optimization done is widening of primary induction
// variables from 32 bits into 64 bits. This is generally only profitable on
// x64 that does not allow zero extension of 32-bit values in addressing modes
// (in contrast, arm64 does have the capability of including zero extensions in
// addressing modes). For x64 this saves a zero extension for every array
// access inside the loop, in exchange for some widening or narrowing stores
// outside the loop:
// - To make sure the new widened IV starts at the right value it is
// initialized to the value of the narrow IV outside the loop (either in the
// preheader or at the def location of the narrow IV). Usually the start
// value is a constant, in which case the widened IV is just initialized to
// the constant value.
// - If the narrow IV is used after the loop we need to store it back from
// the widened IV in the exits. We depend on liveness sets to figure out
// which exits to insert IR into.
//
// These steps ensure that the wide IV has the right value to begin with and
// the old narrow IV still has the right value after the loop. Additionally,
// we must replace every use of the narrow IV inside the loop with the widened
// IV. This is done by a traversal of the IR inside the loop. We do not
// actually widen the uses of the IV; rather, we keep all uses and defs as
// 32-bit, which the backend is able to handle efficiently on x64. Because of
// this we do not need to worry about overflow.
// Currently the following optimizations are done:
//
// IV widening:
// This widens primary induction variables from 32 bits into 64 bits. This is
// generally only profitable on x64 that does not allow zero extension of
// 32-bit values in addressing modes (in contrast, arm64 does have the
// capability of including zero extensions in addressing modes). For x64 this
// saves a zero extension for every array access inside the loop, in exchange
// for some widening or narrowing stores outside the loop:
// - To make sure the new widened IV starts at the right value it is
// initialized to the value of the narrow IV outside the loop (either in
// the preheader or at the def location of the narrow IV). Usually the
// start value is a constant, in which case the widened IV is just
// initialized to the constant value.
// - If the narrow IV is used after the loop we need to store it back from
// the widened IV in the exits. We depend on liveness sets to figure out
// which exits to insert IR into.
//
// These steps ensure that the wide IV has the right value to begin with and
// the old narrow IV still has the right value after the loop. Additionally,
// we must replace every use of the narrow IV inside the loop with the widened
// IV. This is done by a traversal of the IR inside the loop. We do not
// actually widen the uses of the IV; rather, we keep all uses and defs as
// 32-bit, which the backend is able to handle efficiently on x64. Because of
// this we do not need to worry about overflow.
//
// Loop reversing:
// This converts loops that are up-counted into loops that are down-counted.
// Down-counted loops can generally do their IV update and compare in a
// single instruction, bypassing the need to do a separate comparison with a
// bound.
//
// Strength reduction (disabled):
// This changes the stride of primary IVs in a loop to avoid more expensive
// multiplications inside the loop. Commonly the primary IVs are only used
// for indexing memory at some element size, which can end up with these
// multiplications.
//
// Strength reduction frequently relies on reversing the loop to remove the
// last non-multiplied use of the primary IV.
//

#include "jitpch.h"
Expand Down Expand Up @@ -1227,6 +1244,7 @@ class StrengthReductionContext
bool InitializeCursors(GenTreeLclVarCommon* primaryIVLcl, ScevAddRec* primaryIV);
void AdvanceCursors(ArrayStack<CursorInfo>* cursors, ArrayStack<CursorInfo>* nextCursors);
bool CheckAdvancedCursors(ArrayStack<CursorInfo>* cursors, int derivedLevel, ScevAddRec** nextIV);
bool StaysWithinManagedObject(ArrayStack<CursorInfo>* cursors, ScevAddRec* addRec);
bool TryReplaceUsesWithNewPrimaryIV(ArrayStack<CursorInfo>* cursors, ScevAddRec* iv);
BasicBlock* FindUpdateInsertionPoint(ArrayStack<CursorInfo>* cursors);

Expand Down Expand Up @@ -1344,13 +1362,11 @@ bool StrengthReductionContext::TryStrengthReduce()
}
assert(nextIV != nullptr);

// We need more sanity checks to allow materializing GC-typed add
// recs. Otherwise we may eagerly form a GC pointer that was only
// lazily formed under some conditions before, which can be
// illegal. For now we just bail.
if (varTypeIsGC(nextIV->Type))
if (varTypeIsGC(nextIV->Type) && !StaysWithinManagedObject(nextCursors, nextIV))
{
JITDUMP(" Next IV has type %s. Bailing.\n", varTypeName(nextIV->Type));
JITDUMP(
" Next IV computes a GC pointer that we cannot prove to be inside a managed object. Bailing.\n",
varTypeName(nextIV->Type));
break;
}

Expand Down Expand Up @@ -1694,6 +1710,127 @@ bool StrengthReductionContext::CheckAdvancedCursors(ArrayStack<CursorInfo>* curs
return *nextIV != nullptr;
}

//------------------------------------------------------------------------
// StaysWithinManagedObject: Check whether the specified GC-pointer add-rec can
// be guaranteed to be inside the same managed object for the whole loop.
//
// Parameters:
// cursors - Cursors pointing to next uses that correspond to the specific add-rec.
// addRec - The add recurrence
//
// Returns:
// True if we were able to prove so.
//
bool StrengthReductionContext::StaysWithinManagedObject(ArrayStack<CursorInfo>* cursors, ScevAddRec* addRec)
{
int64_t offset;
Scev* baseScev = addRec->Start->PeelAdditions(&offset);
offset = static_cast<target_ssize_t>(offset);

// We only support arrays here. To strength reduce Span<T> accesses we need
// additional properies on the range designated by a Span<T> that we
// currently do not specify, or we need to prove that the byref we may form
// in the IV update would have been formed anyway by the loop.
if (!baseScev->OperIs(ScevOper::Local) || !baseScev->TypeIs(TYP_REF))
{
return false;
}

// Now use the fact that we keep ARR_ADDRs in the IR when we have array
// accesses.
GenTreeArrAddr* arrAddr = nullptr;
for (int i = 0; i < cursors->Height(); i++)
{
CursorInfo& cursor = cursors->BottomRef(i);
GenTree* parent = cursor.Tree->gtGetParent(nullptr);
if ((parent != nullptr) && parent->OperIs(GT_ARR_ADDR))
{
arrAddr = parent->AsArrAddr();
break;
}
}

if (arrAddr == nullptr)
{
return false;
}

unsigned arrElemSize = arrAddr->GetElemType() == TYP_STRUCT
? m_comp->typGetObjLayout(arrAddr->GetElemClassHandle())->GetSize()
: genTypeSize(arrAddr->GetElemType());

int64_t stepCns;
if (!addRec->Step->GetConstantValue(m_comp, &stepCns) || ((unsigned)stepCns > arrElemSize))
{
return false;
}

ScevLocal* local = (ScevLocal*)baseScev;

ValueNum vn = m_scevContext.MaterializeVN(baseScev);
if (vn == ValueNumStore::NoVN)
{
return false;
}

BasicBlock* preheader = m_loop->EntryEdge(0)->getSourceBlock();
if (!m_comp->optAssertionVNIsNonNull(vn, preheader->bbAssertionOut))
{
return false;
}

// We have a non-null array. Check that the 'start' offset looks fine.
// TODO: We could also use assertions on the length of the array. E.g. if
// we know the length of the array is > 3, then we can allow the add rec to
// have a later start. Maybe range check can be used?
if ((offset < 0) || (offset > (int64_t)OFFSETOF__CORINFO_Array__data))
{
return false;
}

// Now see if we have a bound that guarantees that we iterate less than the
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a little surprised the IV step is not showing up in the calculations here, though perhaps cases where the step is not the element size are rare.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do compare the element size against the IV step size above. I suppose the more general formulation would be a symbolic check like bound * iv->Step <= ARR_LENGTH(arr) * arrElemSize

// array length's times.
for (int i = 0; i < m_backEdgeBounds.Height(); i++)
{
// TODO: EvaluateRelop ought to be powerful enough to prove something
// like bound < ARR_LENGTH(vn), but it is not able to prove that
// currently, even for bound = ARR_LENGTH(vn) - 1 (common case).
Scev* bound = m_backEdgeBounds.Bottom(i);

int64_t boundOffset;
Scev* boundBase = bound->PeelAdditions(&boundOffset);

if (bound->TypeIs(TYP_INT))
{
boundOffset = static_cast<int32_t>(boundOffset);
}

if (boundOffset >= 0)
{
// If we take the backedge >= the array length times, then we would
// advance the addrec past the end.
continue;
}

ValueNum boundBaseVN = m_scevContext.MaterializeVN(boundBase);

VNFuncApp vnf;
if (!m_comp->vnStore->GetVNFunc(boundBaseVN, &vnf))
{
continue;
}

if ((vnf.m_func != VNF_ARR_LENGTH) || (vnf.m_args[0] != vn))
{
continue;
}

return true;
}

return false;
}

//------------------------------------------------------------------------
// TryReplaceUsesWithNewPrimaryIV: Perform final sanity checks before
// introducing a new primary IV and replacing the uses represented by the
Expand Down
42 changes: 42 additions & 0 deletions src/coreclr/jit/scev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,48 @@ bool Scev::IsInvariant()
return result != ScevVisit::Abort;
}

//------------------------------------------------------------------------
// Scev::PeelAdditions: Peel the aditions from a SCEV and return the base SCEV
// and the sum of the offsets peeled.
//
// Parameters:
// offset - [out] The sum of offsets peeled
//
// Returns:
// The base SCEV.
//
// Remarks:
// If the SCEV is 32-bits, the user is expected to apply the proper
// truncation (or extension into 64-bit).
//
Scev* Scev::PeelAdditions(int64_t* offset)
{
*offset = 0;

Scev* scev = this;
while (scev->OperIs(ScevOper::Add))
{
Scev* op1 = ((ScevBinop*)scev)->Op1;
Scev* op2 = ((ScevBinop*)scev)->Op2;
if (op1->OperIs(ScevOper::Constant))
{
*offset += ((ScevConstant*)op1)->Value;
scev = op2;
}
else if (op2->OperIs(ScevOper::Constant))
{
*offset += ((ScevConstant*)op2)->Value;
scev = op1;
}
else
{
break;
}
}

return scev;
}

//------------------------------------------------------------------------
// Scev::Equals: Check if two SCEV trees are equal.
//
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/scev.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ struct Scev

bool IsInvariant();

Scev* PeelAdditions(int64_t* offset);

static bool Equals(Scev* left, Scev* right);
};

Expand Down
Loading