From 29f98d6c25e237d311038ce225f0b3109925d400 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 10 Jan 2024 19:33:18 +0800 Subject: [PATCH] [InstCombine] Fold bitwise logic with intrinsics (#77460) This patch does the following folds: ``` bitwise(fshl (A, B, ShAmt), fshl(C, D, ShAmt)) -> fshl(bitwise(A, C), bitwise(B, D), ShAmt) bitwise(fshr (A, B, ShAmt), fshr(C, D, ShAmt)) -> fshr(bitwise(A, C), bitwise(B, D), ShAmt) bitwise(bswap(A), bswap(B)) -> bswap(bitwise(A, B)) bitwise(bswap(A), C) -> bswap(bitwise(A, bswap(C))) bitwise(bitreverse(A), bitreverse(B)) -> bitreverse(bitwise(A, B)) bitwise(bitreverse(A), C) -> bitreverse(bitwise(A, bitreverse(C))) ``` Alive2: https://alive2.llvm.org/ce/z/iZN_TL --- .../InstCombine/InstCombineAndOrXor.cpp | 114 +++++---- .../InstCombine/bitreverse-known-bits.ll | 5 +- .../InstCombine/bitwiselogic-bitmanip.ll | 220 ++++++++++++++++++ .../test/Transforms/InstCombine/bswap-fold.ll | 8 +- 4 files changed, 293 insertions(+), 54 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/bitwiselogic-bitmanip.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index c03f50d75814d8..0620752e321394 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -46,44 +46,6 @@ static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS, return Builder.CreateFCmp(NewPred, LHS, RHS); } -/// Transform BITWISE_OP(BSWAP(A),BSWAP(B)) or -/// BITWISE_OP(BSWAP(A), Constant) to BSWAP(BITWISE_OP(A, B)) -/// \param I Binary operator to transform. -/// \return Pointer to node that must replace the original binary operator, or -/// null pointer if no transformation was made. -static Value *SimplifyBSwap(BinaryOperator &I, - InstCombiner::BuilderTy &Builder) { - assert(I.isBitwiseLogicOp() && "Unexpected opcode for bswap simplifying"); - - Value *OldLHS = I.getOperand(0); - Value *OldRHS = I.getOperand(1); - - Value *NewLHS; - if (!match(OldLHS, m_BSwap(m_Value(NewLHS)))) - return nullptr; - - Value *NewRHS; - const APInt *C; - - if (match(OldRHS, m_BSwap(m_Value(NewRHS)))) { - // OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) ) - if (!OldLHS->hasOneUse() && !OldRHS->hasOneUse()) - return nullptr; - // NewRHS initialized by the matcher. - } else if (match(OldRHS, m_APInt(C))) { - // OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) ) - if (!OldLHS->hasOneUse()) - return nullptr; - NewRHS = ConstantInt::get(I.getType(), C->byteSwap()); - } else - return nullptr; - - Value *BinOp = Builder.CreateBinOp(I.getOpcode(), NewLHS, NewRHS); - Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap, - I.getType()); - return Builder.CreateCall(F, BinOp); -} - /// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise /// (V < Lo || V >= Hi). This method expects that Lo < Hi. IsSigned indicates /// whether to treat V, Lo, and Hi as signed or not. @@ -2159,6 +2121,64 @@ Instruction *InstCombinerImpl::foldBinOpOfDisplacedShifts(BinaryOperator &I) { return BinaryOperator::Create(ShiftOp, NewC, ShAmt); } +// Fold and/or/xor with two equal intrinsic IDs: +// bitwise(fshl (A, B, ShAmt), fshl(C, D, ShAmt)) +// -> fshl(bitwise(A, C), bitwise(B, D), ShAmt) +// bitwise(fshr (A, B, ShAmt), fshr(C, D, ShAmt)) +// -> fshr(bitwise(A, C), bitwise(B, D), ShAmt) +// bitwise(bswap(A), bswap(B)) -> bswap(bitwise(A, B)) +// bitwise(bswap(A), C) -> bswap(bitwise(A, bswap(C))) +// bitwise(bitreverse(A), bitreverse(B)) -> bitreverse(bitwise(A, B)) +// bitwise(bitreverse(A), C) -> bitreverse(bitwise(A, bitreverse(C))) +static Instruction * +foldBitwiseLogicWithIntrinsics(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + assert(I.isBitwiseLogicOp() && "Should and/or/xor"); + if (!I.getOperand(0)->hasOneUse()) + return nullptr; + IntrinsicInst *X = dyn_cast(I.getOperand(0)); + if (!X) + return nullptr; + + IntrinsicInst *Y = dyn_cast(I.getOperand(1)); + if (Y && (!Y->hasOneUse() || X->getIntrinsicID() != Y->getIntrinsicID())) + return nullptr; + + Intrinsic::ID IID = X->getIntrinsicID(); + const APInt *RHSC; + // Try to match constant RHS. + if (!Y && (!(IID == Intrinsic::bswap || IID == Intrinsic::bitreverse) || + !match(I.getOperand(1), m_APInt(RHSC)))) + return nullptr; + + switch (IID) { + case Intrinsic::fshl: + case Intrinsic::fshr: { + if (X->getOperand(2) != Y->getOperand(2)) + return nullptr; + Value *NewOp0 = + Builder.CreateBinOp(I.getOpcode(), X->getOperand(0), Y->getOperand(0)); + Value *NewOp1 = + Builder.CreateBinOp(I.getOpcode(), X->getOperand(1), Y->getOperand(1)); + Function *F = Intrinsic::getDeclaration(I.getModule(), IID, I.getType()); + return CallInst::Create(F, {NewOp0, NewOp1, X->getOperand(2)}); + } + case Intrinsic::bswap: + case Intrinsic::bitreverse: { + Value *NewOp0 = Builder.CreateBinOp( + I.getOpcode(), X->getOperand(0), + Y ? Y->getOperand(0) + : ConstantInt::get(I.getType(), IID == Intrinsic::bswap + ? RHSC->byteSwap() + : RHSC->reverseBits())); + Function *F = Intrinsic::getDeclaration(I.getModule(), IID, I.getType()); + return CallInst::Create(F, {NewOp0}); + } + default: + return nullptr; + } +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. @@ -2194,9 +2214,6 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { if (Value *V = foldUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyBSwap(I, Builder)) - return replaceInstUsesWith(I, V); - if (Instruction *R = foldBinOpShiftWithShift(I)) return R; @@ -2688,6 +2705,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { if (Instruction *Res = foldBinOpOfDisplacedShifts(I)) return Res; + if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder)) + return Res; + return nullptr; } @@ -3347,9 +3367,6 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (Value *V = foldUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyBSwap(I, Builder)) - return replaceInstUsesWith(I, V); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); Type *Ty = I.getType(); if (Ty->isIntOrIntVectorTy(1)) { @@ -3884,6 +3901,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *C1 | *C2)); } + if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder)) + return Res; + return nullptr; } @@ -4507,9 +4527,6 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { if (SimplifyDemandedInstructionBits(I)) return &I; - if (Value *V = SimplifyBSwap(I, Builder)) - return replaceInstUsesWith(I, V); - if (Instruction *R = foldNot(I)) return R; @@ -4799,5 +4816,8 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { if (Instruction *Res = foldBinOpOfDisplacedShifts(I)) return Res; + if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder)) + return Res; + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/bitreverse-known-bits.ll b/llvm/test/Transforms/InstCombine/bitreverse-known-bits.ll index ad2b56f492fb78..a8683e563874f5 100644 --- a/llvm/test/Transforms/InstCombine/bitreverse-known-bits.ll +++ b/llvm/test/Transforms/InstCombine/bitreverse-known-bits.ll @@ -46,9 +46,8 @@ define i1 @test3(i32 %arg) { define i8 @add_bitreverse(i8 %a) { ; CHECK-LABEL: @add_bitreverse( -; CHECK-NEXT: [[B:%.*]] = and i8 [[A:%.*]], -4 -; CHECK-NEXT: [[REVERSE:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[B]]), !range [[RNG0:![0-9]+]] -; CHECK-NEXT: [[C:%.*]] = or disjoint i8 [[REVERSE]], -16 +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[A:%.*]], 15 +; CHECK-NEXT: [[C:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[TMP1]]) ; CHECK-NEXT: ret i8 [[C]] ; %b = and i8 %a, 252 diff --git a/llvm/test/Transforms/InstCombine/bitwiselogic-bitmanip.ll b/llvm/test/Transforms/InstCombine/bitwiselogic-bitmanip.ll new file mode 100644 index 00000000000000..d733bd41f0bc38 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/bitwiselogic-bitmanip.ll @@ -0,0 +1,220 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +define i32 @test_or_fshl(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) { +; CHECK-LABEL: define i32 @test_or_fshl( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[B]], [[D]] +; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP2]], i32 [[SH]]) +; CHECK-NEXT: ret i32 [[RET]] +; + %val1 = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %sh) + %val2 = call i32 @llvm.fshl.i32(i32 %c, i32 %d, i32 %sh) + %ret = or i32 %val1, %val2 + ret i32 %ret +} +define i32 @test_and_fshl(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) { +; CHECK-LABEL: define i32 @test_and_fshl( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[B]], [[D]] +; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP2]], i32 [[SH]]) +; CHECK-NEXT: ret i32 [[RET]] +; + %val1 = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %sh) + %val2 = call i32 @llvm.fshl.i32(i32 %c, i32 %d, i32 %sh) + %ret = and i32 %val1, %val2 + ret i32 %ret +} +define i32 @test_xor_fshl(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) { +; CHECK-LABEL: define i32 @test_xor_fshl( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[A]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[B]], [[D]] +; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP2]], i32 [[SH]]) +; CHECK-NEXT: ret i32 [[RET]] +; + %val1 = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %sh) + %val2 = call i32 @llvm.fshl.i32(i32 %c, i32 %d, i32 %sh) + %ret = xor i32 %val1, %val2 + ret i32 %ret +} +define i32 @test_or_fshr(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) { +; CHECK-LABEL: define i32 @test_or_fshr( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[B]], [[D]] +; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP1]], i32 [[TMP2]], i32 [[SH]]) +; CHECK-NEXT: ret i32 [[RET]] +; + %val1 = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %sh) + %val2 = call i32 @llvm.fshr.i32(i32 %c, i32 %d, i32 %sh) + %ret = or i32 %val1, %val2 + ret i32 %ret +} +define i32 @test_or_fshl_cascade(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: define i32 @test_or_fshl_cascade( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[A]], [[B]] +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[C]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[C]] +; CHECK-NEXT: [[OR2:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP3]], i32 [[TMP4]], i32 24) +; CHECK-NEXT: ret i32 [[OR2]] +; + %fshl1 = call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 24) + %fshl2 = call i32 @llvm.fshl.i32(i32 %b, i32 %b, i32 24) + %fshl3 = call i32 @llvm.fshl.i32(i32 %c, i32 %c, i32 24) + %or1 = or i32 %fshl1, %fshl2 + %or2 = or i32 %or1, %fshl3 + ret i32 %or2 +} +define i32 @test_or_bitreverse(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @test_or_bitreverse( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[TMP1]]) +; CHECK-NEXT: ret i32 [[RET]] +; + %val1 = call i32 @llvm.bitreverse.i32(i32 %a) + %val2 = call i32 @llvm.bitreverse.i32(i32 %b) + %ret = or i32 %val1, %val2 + ret i32 %ret +} +define i32 @test_or_bitreverse_constant(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @test_or_bitreverse_constant( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A]], 255 +; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[TMP1]]) +; CHECK-NEXT: ret i32 [[RET]] +; + %val1 = call i32 @llvm.bitreverse.i32(i32 %a) + %ret = or i32 %val1, 4278190080 + ret i32 %ret +} +define i32 @test_or_bswap(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @test_or_bswap( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; CHECK-NEXT: ret i32 [[RET]] +; + %val1 = call i32 @llvm.bswap.i32(i32 %a) + %val2 = call i32 @llvm.bswap.i32(i32 %b) + %ret = or i32 %val1, %val2 + ret i32 %ret +} +define i32 @test_or_bswap_constant(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @test_or_bswap_constant( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A]], 255 +; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; CHECK-NEXT: ret i32 [[RET]] +; + %val1 = call i32 @llvm.bswap.i32(i32 %a) + %ret = or i32 %val1, 4278190080 + ret i32 %ret +} + +; Negative tests + +define i32 @test_or_fshl_fshr(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) { +; CHECK-LABEL: define i32 @test_or_fshl_fshr( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH:%.*]]) { +; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A]], i32 [[B]], i32 [[SH]]) +; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.fshr.i32(i32 [[C]], i32 [[D]], i32 [[SH]]) +; CHECK-NEXT: [[RET:%.*]] = or i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: ret i32 [[RET]] +; + %val1 = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %sh) + %val2 = call i32 @llvm.fshr.i32(i32 %c, i32 %d, i32 %sh) + %ret = or i32 %val1, %val2 + ret i32 %ret +} +define i32 @test_or_bitreverse_bswap(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @test_or_bitreverse_bswap( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[A]]) +; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.bswap.i32(i32 [[B]]) +; CHECK-NEXT: [[RET:%.*]] = or i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: ret i32 [[RET]] +; + %val1 = call i32 @llvm.bitreverse.i32(i32 %a) + %val2 = call i32 @llvm.bswap.i32(i32 %b) + %ret = or i32 %val1, %val2 + ret i32 %ret +} +define i32 @test_or_fshl_mismatched_shamt(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh1, i32 %sh2) { +; CHECK-LABEL: define i32 @test_or_fshl_mismatched_shamt( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH1:%.*]], i32 [[SH2:%.*]]) { +; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A]], i32 [[B]], i32 [[SH1]]) +; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.fshl.i32(i32 [[C]], i32 [[D]], i32 [[SH2]]) +; CHECK-NEXT: [[RET:%.*]] = or i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: ret i32 [[RET]] +; + %val1 = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %sh1) + %val2 = call i32 @llvm.fshl.i32(i32 %c, i32 %d, i32 %sh2) + %ret = or i32 %val1, %val2 + ret i32 %ret +} +define i32 @test_add_fshl(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) { +; CHECK-LABEL: define i32 @test_add_fshl( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH:%.*]]) { +; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A]], i32 [[B]], i32 [[SH]]) +; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.fshl.i32(i32 [[C]], i32 [[D]], i32 [[SH]]) +; CHECK-NEXT: [[RET:%.*]] = add i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: ret i32 [[RET]] +; + %val1 = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %sh) + %val2 = call i32 @llvm.fshl.i32(i32 %c, i32 %d, i32 %sh) + %ret = add i32 %val1, %val2 + ret i32 %ret +} +define i32 @test_or_fshl_multiuse(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) { +; CHECK-LABEL: define i32 @test_or_fshl_multiuse( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH:%.*]]) { +; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A]], i32 [[B]], i32 [[SH]]) +; CHECK-NEXT: call void @use(i32 [[VAL1]]) +; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.fshl.i32(i32 [[C]], i32 [[D]], i32 [[SH]]) +; CHECK-NEXT: [[RET:%.*]] = or i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: ret i32 [[RET]] +; + %val1 = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %sh) + call void @use(i32 %val1) + %val2 = call i32 @llvm.fshl.i32(i32 %c, i32 %d, i32 %sh) + %ret = or i32 %val1, %val2 + ret i32 %ret +} +define i32 @test_or_bitreverse_multiuse(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @test_or_bitreverse_multiuse( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[A]]) +; CHECK-NEXT: call void @use(i32 [[VAL1]]) +; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[B]]) +; CHECK-NEXT: [[RET:%.*]] = or i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: ret i32 [[RET]] +; + %val1 = call i32 @llvm.bitreverse.i32(i32 %a) + call void @use(i32 %val1) + %val2 = call i32 @llvm.bitreverse.i32(i32 %b) + %ret = or i32 %val1, %val2 + ret i32 %ret +} +define i32 @test_or_fshl_constant(i32 %a, i32 %b, i32 %sh) { +; CHECK-LABEL: define i32 @test_or_fshl_constant( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[SH:%.*]]) { +; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A]], i32 [[B]], i32 [[SH]]) +; CHECK-NEXT: [[RET:%.*]] = or i32 [[VAL1]], -16777216 +; CHECK-NEXT: ret i32 [[RET]] +; + %val1 = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %sh) + %ret = or i32 %val1, 4278190080 + ret i32 %ret +} + +declare void @use(i32) +declare i32 @llvm.fshl.i32(i32, i32, i32) +declare i32 @llvm.fshr.i32(i32, i32, i32) +declare i32 @llvm.bitreverse.i32(i32) +declare i32 @llvm.bswap.i32(i32) diff --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll index a9061d3c95ce98..05933d37057cce 100644 --- a/llvm/test/Transforms/InstCombine/bswap-fold.ll +++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll @@ -498,8 +498,8 @@ define i64 @bs_and64_multiuse1(i64 %a, i64 %b) #0 { define i64 @bs_and64_multiuse2(i64 %a, i64 %b) #0 { ; CHECK-LABEL: @bs_and64_multiuse2( ; CHECK-NEXT: [[T1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[A]], [[B:%.*]] -; CHECK-NEXT: [[T3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; CHECK-NEXT: [[T2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]]) +; CHECK-NEXT: [[T3:%.*]] = and i64 [[T1]], [[T2]] ; CHECK-NEXT: [[T4:%.*]] = mul i64 [[T3]], [[T1]] ; CHECK-NEXT: ret i64 [[T4]] ; @@ -512,9 +512,9 @@ define i64 @bs_and64_multiuse2(i64 %a, i64 %b) #0 { define i64 @bs_and64_multiuse3(i64 %a, i64 %b) #0 { ; CHECK-LABEL: @bs_and64_multiuse3( +; CHECK-NEXT: [[T1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]]) ; CHECK-NEXT: [[T2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[A:%.*]], [[B]] -; CHECK-NEXT: [[T3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; CHECK-NEXT: [[T3:%.*]] = and i64 [[T1]], [[T2]] ; CHECK-NEXT: [[T4:%.*]] = mul i64 [[T3]], [[T2]] ; CHECK-NEXT: ret i64 [[T4]] ;