Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[VectorUtils] Skip interleave members with diff type and alloca sizes. #5727

Merged
merged 1 commit into from
Dec 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion llvm/lib/Analysis/VectorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1110,6 +1110,12 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
continue;
Type *ElementTy = getLoadStoreType(&I);

// Currently, codegen doesn't support cases where the type size doesn't
// match the alloc size. Skip them for now.
uint64_t Size = DL.getTypeAllocSize(ElementTy);
if (Size * 8 != DL.getTypeSizeInBits(ElementTy))
continue;

// We don't check wrapping here because we don't know yet if Ptr will be
// part of a full group or a group with gaps. Checking wrapping for all
// pointers (even those that end up in groups with no gaps) will be overly
Expand All @@ -1122,7 +1128,6 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
/*Assume=*/true, /*ShouldCheckWrap=*/false).value_or(0);

const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
uint64_t Size = DL.getTypeAllocSize(ElementTy);
AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,
getLoadStoreAlignment(&I));
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=loop-vectorize -S %s | FileCheck %s

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"

; Make sure LV does not crash when analyzing potential interleave groups with
; accesses where the typesize doesn't match to allocsize.
define void @pr58722_load_interleave_group(ptr %src, ptr %dst) {
; CHECK-LABEL: @pr58722_load_interleave_group(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 40004
; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 80007
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[UGLYGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4, !alias.scope !0
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4, !alias.scope !0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP9]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !0
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !0
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP12]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP13]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 1
; CHECK-NEXT: [[TMP20:%.*]] = load i24, ptr [[TMP16]], align 4, !alias.scope !0
; CHECK-NEXT: [[TMP21:%.*]] = load i24, ptr [[TMP17]], align 4, !alias.scope !0
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i24> poison, i24 [[TMP20]], i32 0
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i24> [[TMP22]], i24 [[TMP21]], i32 1
; CHECK-NEXT: [[TMP24:%.*]] = load i24, ptr [[TMP18]], align 4, !alias.scope !0
; CHECK-NEXT: [[TMP25:%.*]] = load i24, ptr [[TMP19]], align 4, !alias.scope !0
; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x i24> poison, i24 [[TMP24]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <2 x i24> [[TMP26]], i24 [[TMP25]], i32 1
; CHECK-NEXT: [[TMP28:%.*]] = zext <2 x i24> [[TMP23]] to <2 x i32>
; CHECK-NEXT: [[TMP29:%.*]] = zext <2 x i24> [[TMP27]] to <2 x i32>
; CHECK-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP11]], [[TMP28]]
; CHECK-NEXT: [[TMP31:%.*]] = add <2 x i32> [[TMP15]], [[TMP29]]
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 0
; CHECK-NEXT: store <2 x i32> [[TMP30]], ptr [[TMP34]], align 4, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 2
; CHECK-NEXT: store <2 x i32> [[TMP31]], ptr [[TMP35]], align 4, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 10001, 10000
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[GEP_IV:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[GEP_IV]], align 4
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[GEP_IV]], i64 1
; CHECK-NEXT: [[V2:%.*]] = load i24, ptr [[GEP]], align 4
; CHECK-NEXT: [[V2_EXT:%.*]] = zext i24 [[V2]] to i32
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[V1]], [[V2_EXT]]
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i32 [[SUM]], ptr [[GEP_DST]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV]], 10000
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop

loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.iv = getelementptr inbounds i64, ptr %src, i64 %iv
%v1 = load i32, ptr %gep.iv, align 4
%gep = getelementptr inbounds i32, ptr %gep.iv, i64 1
%v2 = load i24, ptr %gep, align 4
%v2.ext = zext i24 %v2 to i32
%sum = add i32 %v1, %v2.ext
%gep.dst = getelementptr inbounds i32, ptr %dst, i64 %iv
store i32 %sum, ptr %gep.dst
%iv.next = add i64 %iv, 1
%cmp = icmp eq i64 %iv, 10000
br i1 %cmp, label %exit, label %loop

exit:
ret void
}

define void @pr58722_store_interleave_group(ptr %src, ptr %dst) {
; CHECK-LABEL: @pr58722_store_interleave_group(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[GEP_IV:%.*]] = getelementptr inbounds i64, ptr [[SRC:%.*]], i32 [[IV]]
; CHECK-NEXT: store i32 [[IV]], ptr [[GEP_IV]], align 4
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[GEP_IV]], i64 1
; CHECK-NEXT: [[TRUNC_IV:%.*]] = trunc i32 [[IV]] to i24
; CHECK-NEXT: store i24 [[TRUNC_IV]], ptr [[GEP]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 2
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 10000
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop

loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%gep.iv = getelementptr inbounds i64, ptr %src, i32 %iv
store i32 %iv, ptr %gep.iv
%gep = getelementptr inbounds i64, ptr %gep.iv, i64 1
%trunc.iv = trunc i32 %iv to i24
store i24 %trunc.iv, ptr %gep
%iv.next = add i32 %iv, 2
%cmp = icmp eq i32 %iv, 10000
br i1 %cmp, label %exit, label %loop

exit:
ret void
}