From 7d9d974b7b844e7e2c91f467c7b35fd49f910a8a Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 23 Mar 2020 12:47:32 -0500 Subject: [PATCH] Handle part-word LL/SC in atomic expansion pass Differential Revision: https://reviews.llvm.org/D77213 --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 260 +++++++++++------- .../Target/Hexagon/HexagonISelLowering.cpp | 6 +- llvm/test/CodeGen/Hexagon/atomic-rmw-add.ll | 22 ++ llvm/test/CodeGen/Hexagon/partword-cmpxchg.ll | 28 ++ .../AtomicExpand/ARM/atomic-expansion-v7.ll | 39 ++- .../AtomicExpand/ARM/atomic-expansion-v8.ll | 45 ++- .../AtomicExpand/ARM/cmpxchg-weak.ll | 23 +- .../Transforms/AtomicExpand/SPARC/partword.ll | 44 +-- 8 files changed, 322 insertions(+), 145 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/atomic-rmw-add.ll create mode 100644 llvm/test/CodeGen/Hexagon/partword-cmpxchg.ll diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 37a50cde639174..3a79e6c9625d91 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -570,8 +570,8 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; unsigned ValueSize = getAtomicOpSize(AI); if (ValueSize < MinCASSize) { - llvm_unreachable( - "MinCmpXchgSizeInBits not yet supported for LL/SC architectures."); + expandPartwordAtomicRMW(AI, + TargetLoweringBase::AtomicExpansionKind::LLSC); } else { auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) { return performAtomicOp(AI->getOperation(), Builder, Loaded, @@ -608,16 +608,43 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { namespace { -/// Result values from createMaskInstrs helper. struct PartwordMaskValues { - Type *WordType; - Type *ValueType; - Value *AlignedAddr; - Value *ShiftAmt; - Value *Mask; - Value *Inv_Mask; + // These three fields are guaranteed to be set by createMaskInstrs. + Type *WordType = nullptr; + Type *ValueType = nullptr; + Value *AlignedAddr = nullptr; + // The remaining fields can be null. + Value *ShiftAmt = nullptr; + Value *Mask = nullptr; + Value *Inv_Mask = nullptr; }; +LLVM_ATTRIBUTE_UNUSED +raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { + auto PrintObj = [&O](auto *V) { + if (V) + O << *V; + else + O << "nullptr"; + O << '\n'; + }; + O << "PartwordMaskValues {\n"; + O << " WordType: "; + PrintObj(PMV.WordType); + O << " ValueType: "; + PrintObj(PMV.ValueType); + O << " AlignedAddr: "; + PrintObj(PMV.AlignedAddr); + O << " ShiftAmt: "; + PrintObj(PMV.ShiftAmt); + O << " Mask: "; + PrintObj(PMV.Mask); + O << " Inv_Mask: "; + PrintObj(PMV.Inv_Mask); + O << "}\n"; + return O; +} + } // end anonymous namespace /// This is a helper function which builds instructions to provide @@ -638,48 +665,74 @@ struct PartwordMaskValues { /// Inv_Mask: The inverse of Mask. static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *ValueType, Value *Addr, - unsigned WordSize) { - PartwordMaskValues Ret; + unsigned MinWordSize) { + PartwordMaskValues PMV; - BasicBlock *BB = I->getParent(); - Function *F = BB->getParent(); Module *M = I->getModule(); - - LLVMContext &Ctx = F->getContext(); + LLVMContext &Ctx = M->getContext(); const DataLayout &DL = M->getDataLayout(); - unsigned ValueSize = DL.getTypeStoreSize(ValueType); - assert(ValueSize < WordSize); + PMV.ValueType = ValueType; + PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8) + : ValueType; + if (PMV.ValueType == PMV.WordType) { + PMV.AlignedAddr = Addr; + return PMV; + } - Ret.ValueType = ValueType; - Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8); + assert(ValueSize < MinWordSize); Type *WordPtrType = - Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace()); + PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace()); Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx)); - Ret.AlignedAddr = Builder.CreateIntToPtr( - Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType, + PMV.AlignedAddr = Builder.CreateIntToPtr( + Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType, "AlignedAddr"); - Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB"); + Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB"); if (DL.isLittleEndian()) { // turn bytes into bits - Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3); + PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3); } else { // turn bytes into bits, and count from the other side. - Ret.ShiftAmt = - Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3); + PMV.ShiftAmt = Builder.CreateShl( + Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3); } - Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt"); - Ret.Mask = Builder.CreateShl( - ConstantInt::get(Ret.WordType, (1 << (ValueSize * 8)) - 1), Ret.ShiftAmt, + PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt"); + PMV.Mask = Builder.CreateShl( + ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt, "Mask"); - Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask"); + PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask"); + return PMV; +} + +static Value *extractMaskedValue(IRBuilder<> &Builder, Value *WideWord, + const PartwordMaskValues &PMV) { + assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); + if (PMV.WordType == PMV.ValueType) + return WideWord; - return Ret; + Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted"); + Value *Trunc = Builder.CreateTrunc(Shift, PMV.ValueType, "extracted"); + return Trunc; +} + +static Value *insertMaskedValue(IRBuilder<> &Builder, Value *WideWord, + Value *Updated, const PartwordMaskValues &PMV) { + assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); + assert(Updated->getType() == PMV.ValueType && "Value type mismatch"); + if (PMV.WordType == PMV.ValueType) + return Updated; + + Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended"); + Value *Shift = + Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true); + Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked"); + Value *Or = Builder.CreateOr(And, Shift, "inserted"); + return Or; } /// Emit IR to implement a masked version of a given atomicrmw @@ -719,13 +772,9 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, // Finally, comparison ops will operate on the full value, so // truncate down to the original size, and expand out again after // doing the operation. - Value *Loaded_Shiftdown = Builder.CreateTrunc( - Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType); - Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc); - Value *NewVal_Shiftup = Builder.CreateShl( - Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt); - Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); - Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup); + Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV); + Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc); + Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV); return FinalVal; } default: @@ -738,12 +787,10 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, /// /// It will create an LL/SC or cmpxchg loop, as appropriate, the same /// way as a typical atomicrmw expansion. The only difference here is -/// that the operation inside of the loop must operate only upon a +/// that the operation inside of the loop may operate upon only a /// part of the value. void AtomicExpand::expandPartwordAtomicRMW( AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) { - assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg); - AtomicOrdering MemOpOrder = AI->getOrdering(); IRBuilder<> Builder(AI); @@ -761,13 +808,18 @@ void AtomicExpand::expandPartwordAtomicRMW( ValOperand_Shifted, AI->getValOperand(), PMV); }; - // TODO: When we're ready to support LLSC conversions too, use - // insertRMWLLSCLoop here for ExpansionKind==LLSC. - Value *OldResult = - insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder, - PerformPartwordOp, createCmpXchgInstFun); - Value *FinalOldResult = Builder.CreateTrunc( - Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType); + Value *OldResult; + if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) { + OldResult = + insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder, + PerformPartwordOp, createCmpXchgInstFun); + } else { + assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC); + OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr, + MemOpOrder, PerformPartwordOp); + } + + Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); AI->replaceAllUsesWith(FinalOldResult); AI->eraseFromParent(); } @@ -800,8 +852,7 @@ AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand, AI->getOrdering()); - Value *FinalOldResult = Builder.CreateTrunc( - Builder.CreateLShr(NewAI, PMV.ShiftAmt), PMV.ValueType); + Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV); AI->replaceAllUsesWith(FinalOldResult); AI->eraseFromParent(); return NewAI; @@ -923,8 +974,7 @@ void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { // partword.cmpxchg.end: Builder.SetInsertPoint(CI); - Value *FinalOldVal = Builder.CreateTrunc( - Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType); + Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); Value *Res = UndefValue::get(CI->getType()); Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); Res = Builder.CreateInsertValue(Res, Success, 1); @@ -965,8 +1015,7 @@ void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic( Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt, AI->getOrdering()); - Value *FinalOldResult = Builder.CreateTrunc( - Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType); + Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); AI->replaceAllUsesWith(FinalOldResult); AI->eraseFromParent(); } @@ -987,9 +1036,7 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic( Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask, CI->getSuccessOrdering()); - Value *FinalOldVal = Builder.CreateTrunc( - Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType); - + Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); Value *Res = UndefValue::get(CI->getType()); Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); Value *Success = Builder.CreateICmpEQ( @@ -1126,24 +1173,28 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // // The full expansion we produce is: // [...] + // %aligned.addr = ... // cmpxchg.start: - // %unreleasedload = @load.linked(%addr) - // %should_store = icmp eq %unreleasedload, %desired - // br i1 %should_store, label %cmpxchg.fencedstore, + // %unreleasedload = @load.linked(%aligned.addr) + // %unreleasedload.extract = extract value from %unreleasedload + // %should_store = icmp eq %unreleasedload.extract, %desired + // br i1 %should_store, label %cmpxchg.releasingstore, // label %cmpxchg.nostore // cmpxchg.releasingstore: // fence? // br label cmpxchg.trystore // cmpxchg.trystore: - // %loaded.trystore = phi [%unreleasedload, %releasingstore], + // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore], // [%releasedload, %cmpxchg.releasedload] - // %stored = @store_conditional(%new, %addr) + // %updated.new = insert %new into %loaded.trystore + // %stored = @store_conditional(%updated.new, %aligned.addr) // %success = icmp eq i32 %stored, 0 // br i1 %success, label %cmpxchg.success, // label %cmpxchg.releasedload/%cmpxchg.failure // cmpxchg.releasedload: - // %releasedload = @load.linked(%addr) - // %should_store = icmp eq %releasedload, %desired + // %releasedload = @load.linked(%aligned.addr) + // %releasedload.extract = extract value from %releasedload + // %should_store = icmp eq %releasedload.extract, %desired // br i1 %should_store, label %cmpxchg.trystore, // label %cmpxchg.failure // cmpxchg.success: @@ -1159,9 +1210,10 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // fence? // br label %cmpxchg.end // cmpxchg.end: - // %loaded = phi [%loaded.nostore, %cmpxchg.failure], - // [%loaded.trystore, %cmpxchg.trystore] + // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure], + // [%loaded.trystore, %cmpxchg.trystore] // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] + // %loaded = extract value from %loaded.exit // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 // [...] @@ -1187,13 +1239,20 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.SetInsertPoint(BB); if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier) TLI->emitLeadingFence(Builder, CI, SuccessOrder); + + PartwordMaskValues PMV = + createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, + TLI->getMinCmpXchgSizeInBits() / 8); Builder.CreateBr(StartBB); // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(StartBB); - Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *UnreleasedLoad = + TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder); + Value *UnreleasedLoadExtract = + extractMaskedValue(Builder, UnreleasedLoad, PMV); Value *ShouldStore = Builder.CreateICmpEQ( - UnreleasedLoad, CI->getCompareOperand(), "should_store"); + UnreleasedLoadExtract, CI->getCompareOperand(), "should_store"); // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). @@ -1205,8 +1264,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.CreateBr(TryStoreBB); Builder.SetInsertPoint(TryStoreBB); - Value *StoreSuccess = TLI->emitStoreConditional( - Builder, CI->getNewValOperand(), Addr, MemOpOrder); + PHINode *LoadedTryStore = + Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore"); + LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB); + Value *NewValueInsert = + insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV); + Value *StoreSuccess = + TLI->emitStoreConditional(Builder, NewValueInsert, Addr, MemOpOrder); StoreSuccess = Builder.CreateICmpEQ( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; @@ -1216,13 +1280,16 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.SetInsertPoint(ReleasedLoadBB); Value *SecondLoad; if (HasReleasedLoadBB) { - SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); - ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(), - "should_store"); + SecondLoad = TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder); + Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV); + ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract, + CI->getCompareOperand(), "should_store"); // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); + // Update PHI node in TryStoreBB. + LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB); } else Builder.CreateUnreachable(); @@ -1234,6 +1301,12 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.CreateBr(ExitBB); Builder.SetInsertPoint(NoStoreBB); + PHINode *LoadedNoStore = + Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore"); + LoadedNoStore->addIncoming(UnreleasedLoad, StartBB); + if (HasReleasedLoadBB) + LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB); + // In the failing case, where we don't execute the store-conditional, the // target might want to balance out the load-linked with a dedicated // instruction (e.g., on ARM, clearing the exclusive monitor). @@ -1241,6 +1314,11 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.CreateBr(FailureBB); Builder.SetInsertPoint(FailureBB); + PHINode *LoadedFailure = + Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure"); + LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB); + if (CI->isWeak()) + LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB); if (ShouldInsertFencesForAtomic) TLI->emitTrailingFence(Builder, CI, FailureOrder); Builder.CreateBr(ExitBB); @@ -1250,32 +1328,20 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate // PHI. Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); + PHINode *LoadedExit = + Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit"); + LoadedExit->addIncoming(LoadedTryStore, SuccessBB); + LoadedExit->addIncoming(LoadedFailure, FailureBB); + PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success"); Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); - // Setup the builder so we can create any PHIs we need. - Value *Loaded; - if (!HasReleasedLoadBB) - Loaded = UnreleasedLoad; - else { - Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin()); - PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); - TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB); - TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB); - - Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin()); - PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); - NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB); - NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB); - - Builder.SetInsertPoint(ExitBB, ++ExitBB->begin()); - PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); - ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB); - ExitLoaded->addIncoming(NoStoreLoaded, FailureBB); - - Loaded = ExitLoaded; - } + // This is the "exit value" from the cmpxchg expansion. It may be of + // a type wider than the one in the cmpxchg instruction. + Value *LoadedFull = LoadedExit; + + Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator())); + Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV); // Look for any users of the cmpxchg that are just comparing the loaded value // against the desired one, and replace them with the CFG-derived version. @@ -1417,8 +1483,6 @@ bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { expandPartwordCmpXchg(CI); return false; case TargetLoweringBase::AtomicExpansionKind::LLSC: { - assert(ValueSize >= MinCASSize && - "MinCmpXchgSizeInBits not yet supported for LL/SC expansions."); return expandAtomicCmpXchg(CI); } case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index e11ecdc7d035ff..c532f2f3aadf71 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3357,9 +3357,5 @@ bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { TargetLowering::AtomicExpansionKind HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *AI) const { - const DataLayout &DL = AI->getModule()->getDataLayout(); - unsigned Size = DL.getTypeStoreSize(AI->getCompareOperand()->getType()); - if (Size >= 4 && Size <= 8) - return AtomicExpansionKind::LLSC; - return AtomicExpansionKind::None; + return AtomicExpansionKind::LLSC; } diff --git a/llvm/test/CodeGen/Hexagon/atomic-rmw-add.ll b/llvm/test/CodeGen/Hexagon/atomic-rmw-add.ll new file mode 100644 index 00000000000000..aedbd6101c4f94 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/atomic-rmw-add.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; CHECK: .LBB0_1: +; CHECK: [[R1:r[0-9]+]] = memw_locked(r0) +; CHECK-DAG: [[R2:r[0-9]+]] = and([[R1]], +; CHECK-DAG: [[R3:r[0-9]+]] = add([[R1]], +; CHECK: [[R2]] |= and([[R3]], +; CHECK: memw_locked(r0,[[P0:p[0-3]]]) = [[R2]] +; CHECK: if (![[P0]]) jump:nt .LBB0_1 + + +%struct.a = type { i8 } + +define void @b() #0 { + %d = alloca %struct.a + %c = getelementptr %struct.a, %struct.a* %d, i32 0, i32 0 + atomicrmw add i8* %c, i8 2 monotonic + ret void +} + +attributes #0 = { "target-cpu"="hexagonv66" } + diff --git a/llvm/test/CodeGen/Hexagon/partword-cmpxchg.ll b/llvm/test/CodeGen/Hexagon/partword-cmpxchg.ll new file mode 100644 index 00000000000000..3a52d8546b85a7 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/partword-cmpxchg.ll @@ -0,0 +1,28 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; CHECK-LABEL: danny +; CHECK: memw_locked +define i8 @danny(i8* %a0) unnamed_addr #0 { +start: + %v0 = cmpxchg i8* %a0, i8 0, i8 1 seq_cst seq_cst + %v1 = extractvalue { i8, i1 } %v0, 0 + ret i8 %v1 +} + +; CHECK-LABEL: sammy +; CHECK: memw_locked +define i16 @sammy(i16* %a0) unnamed_addr #0 { +start: + %v0 = cmpxchg i16* %a0, i16 0, i16 1 seq_cst seq_cst + %v1 = extractvalue { i16, i1 } %v0, 0 + ret i16 %v1 +} + +; CHECK-LABEL: kirby +; CHECK: memw_locked +define i32 @kirby(i32* %a0) unnamed_addr #0 { +start: + %v0 = cmpxchg i32* %a0, i32 0, i32 1 seq_cst seq_cst + %v1 = extractvalue { i32, i1 } %v0, 0 + ret i32 %v1 +} diff --git a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll index 5e84460b9c0c2f..39108874b7f983 100644 --- a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll +++ b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll @@ -257,12 +257,13 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) { ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] ; CHECK: [[FAILURE_BB]]: +; CHECK: [[LOADED_FAILURE:%.*]] = phi i8 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ] ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[DONE]] ; CHECK: [[DONE]]: +; CHECK: [[LOADED:%.*]] = phi i8 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: [[LOADED:%.*]] = phi i8 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_NO_STORE]], %[[FAILURE_BB]] ] ; CHECK: ret i8 [[LOADED]] %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst @@ -307,12 +308,13 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] ; CHECK: [[FAILURE_BB]]: +; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i16 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ] ; CHECK-NOT: dmb ; CHECK: br label %[[DONE]] ; CHECK: [[DONE]]: +; CHECK: [[LOADED:%.*]] = phi i16 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: [[LOADED:%.*]] = phi i16 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_NO_STORE]], %[[FAILURE_BB]] ] ; CHECK: ret i16 [[LOADED]] %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic @@ -328,9 +330,13 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva ; CHECK: [[LOOP]]: ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr) ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[FENCED_STORE]]: +; CHECK-NEXT: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[OLDVAL]], %[[FENCED_STORE]] ] ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr) ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] @@ -340,16 +346,19 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva ; CHECK: br label %[[DONE:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i32 [ [[OLDVAL]], %[[LOOP]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] ; CHECK: [[FAILURE_BB]]: +; CHECK: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ] ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[DONE]] ; CHECK: [[DONE]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i32 [[OLDVAL]] +; CHECK: ret i32 [[LOADED_EXIT]] %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire %old = extractvalue { i32, i1 } %pairold, 0 @@ -371,9 +380,13 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]] ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[FENCED_STORE]]: +; CHECK-NEXT: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i64 [ [[OLDVAL]], %[[FENCED_STORE]] ] ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32 ; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32 @@ -387,16 +400,19 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n ; CHECK: br label %[[DONE:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i64 [ [[OLDVAL]], %[[LOOP]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] ; CHECK: [[FAILURE_BB]]: +; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i64 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ] ; CHECK-NOT: dmb ; CHECK: br label %[[DONE]] ; CHECK: [[DONE]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i64 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i64 [[OLDVAL]] +; CHECK: ret i64 [[LOADED_EXIT]] %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic %old = extractvalue { i64, i1 } %pairold, 0 @@ -411,9 +427,13 @@ define i32 @test_cmpxchg_minsize(i32* %addr, i32 %desired, i32 %new) minsize { ; CHECK: [[START]]: ; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr) ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[FENCED_STORE]]: +; CHECK-NEXT: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[LOADED]], %[[FENCED_STORE]] ] ; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr) ; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0 ; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[START]] @@ -423,16 +443,19 @@ define i32 @test_cmpxchg_minsize(i32* %addr, i32 %desired, i32 %new) minsize { ; CHECK: br label %[[END:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK: [[LOADED_NO_STORE:%.*]] = phi i32 [ [[LOADED]], %[[START]] ] ; CHECK: call void @llvm.arm.clrex() ; CHECK: br label %[[FAILURE_BB]] ; CHECK: [[FAILURE_BB]]: +; CHECK: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ] ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[END]] ; CHECK: [[END]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i32 [[LOADED]] +; CHECK: ret i32 [[LOADED_EXIT]] %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst %oldval = extractvalue { i32, i1 } %pair, 0 diff --git a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll index 8397182e7e8ff8..deecf01f24365d 100644 --- a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll +++ b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll @@ -91,9 +91,13 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) { ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i8(i8* %ptr) ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[FENCED_STORE]]: +; CHECK-NEXT: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i8 [ [[OLDVAL]], %[[FENCED_STORE]] ] ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i8(i32 [[NEWVAL32]], i8* %ptr) ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 @@ -104,16 +108,19 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) { ; CHECK: br label %[[DONE:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i8 [ [[OLDVAL]], %[[LOOP]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] ; CHECK: [[FAILURE_BB]]: +; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i8 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ] ; CHECK-NOT: fence_cst ; CHECK: br label %[[DONE]] ; CHECK: [[DONE]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i8 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i8 [[OLDVAL]] +; CHECK: ret i8 [[LOADED_EXIT]] %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst %old = extractvalue { i8, i1 } %pairold, 0 @@ -129,9 +136,13 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr) ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[FENCED_STORE]]: +; CHECK-NEXT: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i16 [ [[OLDVAL]], %[[FENCED_STORE]] ] ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr) ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 @@ -142,16 +153,20 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv ; CHECK: br label %[[DONE:.*]] ; CHECK: [[NO_STORE_BB]]: +; The PHI is not required. +; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i16 [ [[OLDVAL]], %[[LOOP]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] ; CHECK: [[FAILURE_BB]]: +; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i16 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ] ; CHECK-NOT: fence ; CHECK: br label %[[DONE]] ; CHECK: [[DONE]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i16 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i16 [[OLDVAL]] +; CHECK: ret i16 [[LOADED_EXIT]] %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic %old = extractvalue { i16, i1 } %pairold, 0 @@ -166,9 +181,13 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva ; CHECK: [[LOOP]]: ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr) ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[FENCED_STORE]]: +; CHECK-NEXT: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[OLDVAL]], %[[FENCED_STORE]] ] ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr) ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] @@ -178,16 +197,19 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva ; CHECK: br label %[[DONE:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i32 [ [[OLDVAL]], %[[LOOP]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] ; CHECK: [[FAILURE_BB]]: +; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ] ; CHECK-NOT: fence_cst ; CHECK: br label %[[DONE]] ; CHECK: [[DONE]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i32 [[OLDVAL]] +; CHECK: ret i32 [[LOADED_EXIT]] %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire %old = extractvalue { i32, i1 } %pairold, 0 @@ -209,9 +231,13 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]] ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[FENCED_STORE]]: +; CHECK-NEXT: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i64 [ [[OLDVAL]], %[[FENCED_STORE]] ] ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32 ; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32 @@ -225,16 +251,19 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n ; CHECK: br label %[[DONE:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i64 [ [[OLDVAL]], %[[LOOP]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] ; CHECK: [[FAILURE_BB]]: +; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i64 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ] ; CHECK-NOT: fence_cst ; CHECK: br label %[[DONE]] ; CHECK: [[DONE]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i64 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i64 [[OLDVAL]] +; CHECK: ret i64 [[LOADED_EXIT]] %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic %old = extractvalue { i64, i1 } %pairold, 0 diff --git a/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll b/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll index 375b41a26dbd4f..828c1c4cb3b7ec 100644 --- a/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll +++ b/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll @@ -15,6 +15,7 @@ define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) { ; CHECK: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[LOADED]], %[[FENCED_STORE]] ] ; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr) ; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0 ; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]] @@ -24,16 +25,19 @@ define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) { ; CHECK: br label %[[END:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK: [[LOADED_NOSTORE:%.*]] = phi i32 [ [[LOADED]], %[[START]] ] ; CHECK: call void @llvm.arm.clrex() ; CHECK: br label %[[FAILURE_BB]] ; CHECK: [[FAILURE_BB]]: +; CHECK: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ], [ [[LOADED_TRYSTORE]], %[[TRY_STORE]] ] ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[END]] ; CHECK: [[END]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i32 [[LOADED]] +; CHECK: ret i32 [[LOADED_EXIT]] %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst seq_cst %oldval = extractvalue { i32, i1 } %pair, 0 @@ -87,9 +91,13 @@ define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) { ; CHECK: [[START]]: ; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr) ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[FENCED_STORE]]: +; CHECK-NEXT: br label %[[TRY_STORE]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[LOADED]], %[[FENCED_STORE]] ] ; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr) ; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0 ; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]] @@ -99,16 +107,19 @@ define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) { ; CHECK: br label %[[END:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK: [[LOADED_NOSTORE:%.*]] = phi i32 [ [[LOADED]], %[[START]] ] ; CHECK: call void @llvm.arm.clrex() ; CHECK: br label %[[FAILURE_BB]] ; CHECK: [[FAILURE_BB]]: +; CHECK: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ], [ [[LOADED_TRYSTORE]], %[[TRY_STORE]] ] ; CHECK-NOT: dmb ; CHECK: br label %[[END]] ; CHECK: [[END]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i32 [[LOADED]] +; CHECK: ret i32 [[LOADED_EXIT]] %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new monotonic monotonic %oldval = extractvalue { i32, i1 } %pair, 0 @@ -129,6 +140,7 @@ define i32 @test_cmpxchg_seq_cst_minsize(i32* %addr, i32 %desired, i32 %new) min ; CHECK: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[LOADED]], %[[FENCED_STORE]] ] ; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr) ; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0 ; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]] @@ -138,16 +150,19 @@ define i32 @test_cmpxchg_seq_cst_minsize(i32* %addr, i32 %desired, i32 %new) min ; CHECK: br label %[[END:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK: [[LOADED_NOSTORE:%.*]] = phi i32 [ [[LOADED]], %[[START]] ] ; CHECK: call void @llvm.arm.clrex() ; CHECK: br label %[[FAILURE_BB]] ; CHECK: [[FAILURE_BB]]: +; CHECK: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ], [ [[LOADED_TRYSTORE]], %[[TRY_STORE]] ] ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[END]] ; CHECK: [[END]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i32 [[LOADED]] +; CHECK: ret i32 [[LOADED_EXIT]] %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst seq_cst %oldval = extractvalue { i32, i1 } %pair, 0 diff --git a/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll b/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll index 74c05615d0b9af..999fa1541f5656 100644 --- a/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll +++ b/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll @@ -39,12 +39,12 @@ target triple = "sparcv9-unknown-unknown" ; CHECK: %17 = icmp ne i32 %10, %16 ; CHECK: br i1 %17, label %partword.cmpxchg.loop, label %partword.cmpxchg.end ; CHECK:partword.cmpxchg.end: -; CHECK: %18 = lshr i32 %14, %ShiftAmt -; CHECK: %19 = trunc i32 %18 to i8 -; CHECK: %20 = insertvalue { i8, i1 } undef, i8 %19, 0 -; CHECK: %21 = insertvalue { i8, i1 } %20, i1 %15, 1 +; CHECK: %shifted = lshr i32 %14, %ShiftAmt +; CHECK: %extracted = trunc i32 %shifted to i8 +; CHECK: %18 = insertvalue { i8, i1 } undef, i8 %extracted, 0 +; CHECK: %19 = insertvalue { i8, i1 } %18, i1 %15, 1 ; CHECK: fence seq_cst -; CHECK: %ret = extractvalue { i8, i1 } %21, 0 +; CHECK: %ret = extractvalue { i8, i1 } %19, 0 ; CHECK: ret i8 %ret define i8 @test_cmpxchg_i8(i8* %arg, i8 %old, i8 %new) { entry: @@ -84,12 +84,12 @@ entry: ; CHECK: %17 = icmp ne i32 %10, %16 ; CHECK: br i1 %17, label %partword.cmpxchg.loop, label %partword.cmpxchg.end ; CHECK:partword.cmpxchg.end: -; CHECK: %18 = lshr i32 %14, %ShiftAmt -; CHECK: %19 = trunc i32 %18 to i16 -; CHECK: %20 = insertvalue { i16, i1 } undef, i16 %19, 0 -; CHECK: %21 = insertvalue { i16, i1 } %20, i1 %15, 1 +; CHECK: %shifted = lshr i32 %14, %ShiftAmt +; CHECK: %extracted = trunc i32 %shifted to i16 +; CHECK: %18 = insertvalue { i16, i1 } undef, i16 %extracted, 0 +; CHECK: %19 = insertvalue { i16, i1 } %18, i1 %15, 1 ; CHECK: fence seq_cst -; CHECK: %ret = extractvalue { i16, i1 } %21, 0 +; CHECK: %ret = extractvalue { i16, i1 } %19, 0 ; CHECK: ret i16 %ret define i16 @test_cmpxchg_i16(i16* %arg, i16 %old, i16 %new) { entry: @@ -125,10 +125,10 @@ entry: ; CHECK: %newloaded = extractvalue { i32, i1 } %9, 0 ; CHECK: br i1 %success, label %atomicrmw.end, label %atomicrmw.start ; CHECK:atomicrmw.end: -; CHECK: %10 = lshr i32 %newloaded, %ShiftAmt -; CHECK: %11 = trunc i32 %10 to i16 +; CHECK: %shifted = lshr i32 %newloaded, %ShiftAmt +; CHECK: %extracted = trunc i32 %shifted to i16 ; CHECK: fence seq_cst -; CHECK: ret i16 %11 +; CHECK: ret i16 %extracted define i16 @test_add_i16(i16* %arg, i16 %val) { entry: %ret = atomicrmw add i16* %arg, i16 %val seq_cst @@ -174,15 +174,15 @@ entry: ; CHECK-LABEL: @test_min_i16( ; CHECK:atomicrmw.start: -; CHECK: %6 = lshr i32 %loaded, %ShiftAmt -; CHECK: %7 = trunc i32 %6 to i16 -; CHECK: %8 = icmp sle i16 %7, %val -; CHECK: %new = select i1 %8, i16 %7, i16 %val -; CHECK: %9 = zext i16 %new to i32 -; CHECK: %10 = shl i32 %9, %ShiftAmt -; CHECK: %11 = and i32 %loaded, %Inv_Mask -; CHECK: %12 = or i32 %11, %10 -; CHECK: %13 = cmpxchg i32* %AlignedAddr, i32 %loaded, i32 %12 monotonic monotonic +; CHECK: %shifted = lshr i32 %loaded, %ShiftAmt +; CHECK: %extracted = trunc i32 %shifted to i16 +; CHECK: %6 = icmp sle i16 %extracted, %val +; CHECK: %new = select i1 %6, i16 %extracted, i16 %val +; CHECK: %extended = zext i16 %new to i32 +; CHECK: %shifted1 = shl nuw i32 %extended, %ShiftAmt +; CHECK: %unmasked = and i32 %loaded, %Inv_Mask +; CHECK: %inserted = or i32 %unmasked, %shifted1 +; CHECK: %7 = cmpxchg i32* %AlignedAddr, i32 %loaded, i32 %inserted monotonic monotonic ; CHECK:atomicrmw.end: define i16 @test_min_i16(i16* %arg, i16 %val) { entry: