Skip to content

Commit

Permalink
[LAA] Refine stride checks for SCEVs during dependence analysis. (llv…
Browse files Browse the repository at this point in the history
…m#99577)

Update getDependenceDistanceStrideAndSize to reason about different
combinations of strides directly and explicitly.

Update getPtrStride to return 0 for invariant pointers.

Then proceed by checking the strides.

If either source or sink are not strided by a constant (i.e. not a
non-wrapping AddRec) or invariant, the accesses may overlap
with earlier or later iterations and we cannot generate runtime
checks to disambiguate them.

Otherwise they are either loop invariant or strided. In that case, we
can generate a runtime check to disambiguate them.

If both are strided by constants, we proceed as previously.

This is an alternative to
llvm#99239 and also replaces
additional checks if the underlying object is loop-invariant.

Fixes llvm#87189.

PR: llvm#99577
  • Loading branch information
fhahn committed Aug 6, 2024
1 parent d033ae1 commit 83098f4
Show file tree
Hide file tree
Showing 7 changed files with 87 additions and 101 deletions.
23 changes: 9 additions & 14 deletions llvm/include/llvm/Analysis/LoopAccessAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,8 @@ class MemoryDepChecker {
/// Check whether the dependencies between the accesses are safe.
///
/// Only checks sets with elements in \p CheckDeps.
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
const DenseMap<Value *, SmallVector<const Value *, 16>>
&UnderlyingObjects);
bool areDepsSafe(const DepCandidates &AccessSets,
const MemAccessInfoList &CheckDeps);

/// No memory dependence was encountered that would inhibit
/// vectorization.
Expand Down Expand Up @@ -351,11 +350,8 @@ class MemoryDepChecker {
/// element access it records this distance in \p MinDepDistBytes (if this
/// distance is smaller than any other distance encountered so far).
/// Otherwise, this function returns true signaling a possible dependence.
Dependence::DepType
isDependent(const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
unsigned BIdx,
const DenseMap<Value *, SmallVector<const Value *, 16>>
&UnderlyingObjects);
Dependence::DepType isDependent(const MemAccessInfo &A, unsigned AIdx,
const MemAccessInfo &B, unsigned BIdx);

/// Check whether the data dependence could prevent store-load
/// forwarding.
Expand Down Expand Up @@ -392,11 +388,9 @@ class MemoryDepChecker {
/// determined, or a struct containing (Distance, Stride, TypeSize, AIsWrite,
/// BIsWrite).
std::variant<Dependence::DepType, DepDistanceStrideAndSizeInfo>
getDependenceDistanceStrideAndSize(
const MemAccessInfo &A, Instruction *AInst, const MemAccessInfo &B,
Instruction *BInst,
const DenseMap<Value *, SmallVector<const Value *, 16>>
&UnderlyingObjects);
getDependenceDistanceStrideAndSize(const MemAccessInfo &A, Instruction *AInst,
const MemAccessInfo &B,
Instruction *BInst);
};

class RuntimePointerChecking;
Expand Down Expand Up @@ -797,7 +791,8 @@ replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
Value *Ptr);

/// If the pointer has a constant stride return it in units of the access type
/// size. Otherwise return std::nullopt.
/// size. If the pointer is loop-invariant, return 0. Otherwise return
/// std::nullopt.
///
/// Ensure that it does not wrap in the address space, assuming the predicate
/// associated with \p PSE is true.
Expand Down
121 changes: 56 additions & 65 deletions llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -728,11 +728,6 @@ class AccessAnalysis {

MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }

const DenseMap<Value *, SmallVector<const Value *, 16>> &
getUnderlyingObjects() {
return UnderlyingObjects;
}

private:
typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;

Expand Down Expand Up @@ -1459,22 +1454,23 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
}

/// Check whether the access through \p Ptr has a constant stride.
std::optional<int64_t> llvm::getPtrStride(PredicatedScalarEvolution &PSE,
Type *AccessTy, Value *Ptr,
const Loop *Lp,
const DenseMap<Value *, const SCEV *> &StridesMap,
bool Assume, bool ShouldCheckWrap) {
std::optional<int64_t>
llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
const Loop *Lp,
const DenseMap<Value *, const SCEV *> &StridesMap,
bool Assume, bool ShouldCheckWrap) {
const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
if (PSE.getSE()->isLoopInvariant(PtrScev, Lp))
return {0};

Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");

if (isa<ScalableVectorType>(AccessTy)) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
<< "\n");
return std::nullopt;
}

const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);

const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (Assume && !AR)
AR = PSE.getAsAddRec(Ptr);
Expand Down Expand Up @@ -1899,24 +1895,12 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
return ScaledDist % Stride;
}

/// Returns true if any of the underlying objects has a loop varying address,
/// i.e. may change in \p L.
static bool
isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects,
ScalarEvolution &SE, const Loop *L) {
return any_of(UnderlyingObjects, [&SE, L](const Value *UO) {
return !SE.isLoopInvariant(SE.getSCEV(const_cast<Value *>(UO)), L);
});
}

std::variant<MemoryDepChecker::Dependence::DepType,
MemoryDepChecker::DepDistanceStrideAndSizeInfo>
MemoryDepChecker::getDependenceDistanceStrideAndSize(
const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
const AccessAnalysis::MemAccessInfo &B, Instruction *BInst,
const DenseMap<Value *, SmallVector<const Value *, 16>>
&UnderlyingObjects) {
auto &DL = InnermostLoop->getHeader()->getDataLayout();
const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) {
const auto &DL = InnermostLoop->getHeader()->getDataLayout();
auto &SE = *PSE.getSE();
auto [APtr, AIsWrite] = A;
auto [BPtr, BIsWrite] = B;
Expand All @@ -1933,39 +1917,30 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
BPtr->getType()->getPointerAddressSpace())
return MemoryDepChecker::Dependence::Unknown;

int64_t StrideAPtr =
getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true)
.value_or(0);
int64_t StrideBPtr =
getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true)
.value_or(0);
std::optional<int64_t> StrideAPtr =
getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true, true);
std::optional<int64_t> StrideBPtr =
getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true, true);

const SCEV *Src = PSE.getSCEV(APtr);
const SCEV *Sink = PSE.getSCEV(BPtr);

// If the induction step is negative we have to invert source and sink of the
// dependence when measuring the distance between them. We should not swap
// AIsWrite with BIsWrite, as their uses expect them in program order.
if (StrideAPtr < 0) {
if (StrideAPtr && *StrideAPtr < 0) {
std::swap(Src, Sink);
std::swap(AInst, BInst);
std::swap(StrideAPtr, StrideBPtr);
}

const SCEV *Dist = SE.getMinusSCEV(Sink, Src);

LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
<< "(Induction step: " << StrideAPtr << ")\n");
<< "\n");
LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
<< ": " << *Dist << "\n");

// Needs accesses where the addresses of the accessed underlying objects do
// not change within the loop.
if (isLoopVariantIndirectAddress(UnderlyingObjects.find(APtr)->second, SE,
InnermostLoop) ||
isLoopVariantIndirectAddress(UnderlyingObjects.find(BPtr)->second, SE,
InnermostLoop))
return MemoryDepChecker::Dependence::IndirectUnsafe;

// Check if we can prove that Sink only accesses memory after Src's end or
// vice versa. At the moment this is limited to cases where either source or
// sink are loop invariant to avoid compile-time increases. This is not
Expand All @@ -1987,12 +1962,33 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
}
}

// Need accesses with constant strides and the same direction. We don't want
// to vectorize "A[B[i]] += ..." and similar code or pointer arithmetic that
// could wrap in the address space.
if (!StrideAPtr || !StrideBPtr || (StrideAPtr > 0 && StrideBPtr < 0) ||
(StrideAPtr < 0 && StrideBPtr > 0)) {
// Need accesses with constant strides and the same direction for further
// dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
// similar code or pointer arithmetic that could wrap in the address space.

// If either Src or Sink are not strided (i.e. not a non-wrapping AddRec) and
// not loop-invariant (stride will be 0 in that case), we cannot analyze the
// dependence further and also cannot generate runtime checks.
if (!StrideAPtr || !StrideBPtr) {
LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
return MemoryDepChecker::Dependence::IndirectUnsafe;
}

int64_t StrideAPtrInt = *StrideAPtr;
int64_t StrideBPtrInt = *StrideBPtr;
LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << StrideAPtrInt
<< " Sink induction step: " << StrideBPtrInt << "\n");
// At least Src or Sink are loop invariant and the other is strided or
// invariant. We can generate a runtime check to disambiguate the accesses.
if (StrideAPtrInt == 0 || StrideBPtrInt == 0)
return MemoryDepChecker::Dependence::Unknown;

// Both Src and Sink have a constant stride, check if they are in the same
// direction.
if ((StrideAPtrInt > 0 && StrideBPtrInt < 0) ||
(StrideAPtrInt < 0 && StrideBPtrInt > 0)) {
LLVM_DEBUG(
dbgs() << "Pointer access with strides in different directions\n");
return MemoryDepChecker::Dependence::Unknown;
}

Expand All @@ -2001,22 +1997,20 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
if (!HasSameSize)
TypeByteSize = 0;
return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtr),
std::abs(StrideBPtr), TypeByteSize,
return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtrInt),
std::abs(StrideBPtrInt), TypeByteSize,
AIsWrite, BIsWrite);
}

MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
unsigned BIdx,
const DenseMap<Value *, SmallVector<const Value *, 16>>
&UnderlyingObjects) {
MemoryDepChecker::Dependence::DepType
MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
const MemAccessInfo &B, unsigned BIdx) {
assert(AIdx < BIdx && "Must pass arguments in program order");

// Get the dependence distance, stride, type size and what access writes for
// the dependence between A and B.
auto Res = getDependenceDistanceStrideAndSize(
A, InstMap[AIdx], B, InstMap[BIdx], UnderlyingObjects);
auto Res =
getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);
if (std::holds_alternative<Dependence::DepType>(Res))
return std::get<Dependence::DepType>(Res);

Expand Down Expand Up @@ -2250,10 +2244,8 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
return Dependence::BackwardVectorizable;
}

bool MemoryDepChecker::areDepsSafe(
DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
const DenseMap<Value *, SmallVector<const Value *, 16>>
&UnderlyingObjects) {
bool MemoryDepChecker::areDepsSafe(const DepCandidates &AccessSets,
const MemAccessInfoList &CheckDeps) {

MinDepDistBytes = -1;
SmallPtrSet<MemAccessInfo, 8> Visited;
Expand Down Expand Up @@ -2296,8 +2288,8 @@ bool MemoryDepChecker::areDepsSafe(
if (*I1 > *I2)
std::swap(A, B);

Dependence::DepType Type = isDependent(*A.first, A.second, *B.first,
B.second, UnderlyingObjects);
Dependence::DepType Type =
isDependent(*A.first, A.second, *B.first, B.second);
mergeInStatus(Dependence::isSafeForVectorization(Type));

// Gather dependences unless we accumulated MaxDependences
Expand Down Expand Up @@ -2652,8 +2644,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
if (Accesses.isDependencyCheckNeeded()) {
LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
DepsAreSafe = DepChecker->areDepsSafe(DependentAccesses,
Accesses.getDependenciesToCheck(),
Accesses.getUnderlyingObjects());
Accesses.getDependenciesToCheck());

if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) {
LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,19 @@
define void @B_indices_loaded_in_loop_A_stored(ptr %A, ptr noalias %B, i64 %N, i64 %off) {
; CHECK-LABEL: 'B_indices_loaded_in_loop_A_stored'
; CHECK-NEXT: loop:
; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: IndirectUnsafe:
; CHECK-NEXT: %l = load i32, ptr %gep.B, align 4 ->
; CHECK-NEXT: store i32 %inc, ptr %gep.B, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: %indices = load i8, ptr %gep.A, align 1 ->
; CHECK-NEXT: store i32 %l, ptr %gep.C, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]):
; CHECK-NEXT: %gep.C = getelementptr inbounds i32, ptr %A, i64 %iv
; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]):
; CHECK-NEXT: %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv.off
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group [[GRP1]]:
; CHECK-NEXT: (Low: %A High: ((4 * %N) + %A))
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop>
; CHECK-NEXT: Group [[GRP2]]:
; CHECK-NEXT: (Low: (%off + %A) High: (%N + %off + %A))
; CHECK-NEXT: Member: {(%off + %A),+,1}<nw><%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
Expand Down Expand Up @@ -59,9 +57,9 @@ define void @B_indices_loaded_in_loop_A_not_stored(ptr %A, ptr noalias %B, i64 %
; CHECK-LABEL: 'B_indices_loaded_in_loop_A_not_stored'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: IndirectUnsafe:
; CHECK-NEXT: %l = load i32, ptr %gep.B, align 4 ->
; CHECK-NEXT: store i32 %inc, ptr %gep.B, align 4
; CHECK-EMPTY:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; CHECK-NEXT: for.body:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop
; CHECK-NOT: Report: cannot identify array bounds
; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: IndirectUnsafe:
; CHECK-NEXT: %loadA = load i16, ptr %arrayidxA, align 2 ->
; CHECK-NEXT: store i16 %mul, ptr %arrayidxA, align 2

Expand Down
6 changes: 4 additions & 2 deletions llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
; CHECK-LABEL: 'negative_step'
; CHECK: LAA: Found an analyzable loop: loop
; CHECK: LAA: Checking memory dependencies
; CHECK-NEXT: LAA: Src Scev: {(4092 + %A),+,-4}<nw><%loop>Sink Scev: {(4088 + %A)<nuw>,+,-4}<nw><%loop>(Induction step: -1)
; CHECK-NEXT: LAA: Src Scev: {(4092 + %A),+,-4}<nw><%loop>Sink Scev: {(4088 + %A)<nuw>,+,-4}<nw><%loop>
; CHECK-NEXT: LAA: Distance for store i32 %add, ptr %gep.A.plus.1, align 4 to %l = load i32, ptr %gep.A, align 4: -4
; CHECK-NEXT: LAA: Src induction step: -1 Sink induction step: -1
; CHECK-NEXT: LAA: Dependence is negative

define void @negative_step(ptr nocapture %A) {
Expand Down Expand Up @@ -41,8 +42,9 @@ exit:
; CHECK-LABEL: 'positive_step'
; CHECK: LAA: Found an analyzable loop: loop
; CHECK: LAA: Checking memory dependencies
; CHECK-NEXT: LAA: Src Scev: {(4 + %A)<nuw>,+,4}<nuw><%loop>Sink Scev: {%A,+,4}<nw><%loop>(Induction step: 1)
; CHECK-NEXT: LAA: Src Scev: {(4 + %A)<nuw>,+,4}<nuw><%loop>Sink Scev: {%A,+,4}<nw><%loop>
; CHECK-NEXT: LAA: Distance for %l = load i32, ptr %gep.A, align 4 to store i32 %add, ptr %gep.A.minus.1, align 4: -4
; CHECK-NEXT: LAA: Src induction step: 1 Sink induction step: 1
; CHECK-NEXT: LAA: Dependence is negative

define void @positive_step(ptr nocapture %A) {
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Analysis/LoopAccessAnalysis/select-dependence.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ define void @test(ptr noalias %x, ptr noalias %y, ptr noalias %z) {
; CHECK-LABEL: 'test'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: IndirectUnsafe:
; CHECK-NEXT: %load = load double, ptr %gep.sel, align 8 ->
; CHECK-NEXT: store double %load, ptr %gep.sel2, align 8
; CHECK-EMPTY:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
Original file line number Diff line number Diff line change
Expand Up @@ -276,9 +276,9 @@ define void @single_stride_used_for_trip_count(ptr noalias %A, ptr noalias %B, i
; CHECK-LABEL: 'single_stride_used_for_trip_count'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: IndirectUnsafe:
; CHECK-NEXT: %load = load i32, ptr %gep.A, align 4 ->
; CHECK-NEXT: store i32 %add, ptr %gep.A.next, align 4
; CHECK-EMPTY:
Expand Down

0 comments on commit 83098f4

Please sign in to comment.