diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index a300e5b2b1dabd..813d825f8b570c 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -2415,17 +2415,15 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, Fragments.insert(BF); for (const BinaryFunction *F : Fragments) { const uint64_t FuncAddr = F->getAddress(); - const auto &FragmentProbes = - llvm::make_range(ProbeMap.lower_bound(FuncAddr), - ProbeMap.lower_bound(FuncAddr + F->getSize())); - for (const auto &[OutputAddress, Probes] : FragmentProbes) { + for (const MCDecodedPseudoProbe &Probe : + ProbeMap.find(FuncAddr, FuncAddr + F->getSize())) { + const uint32_t OutputAddress = Probe.getAddress(); const uint32_t InputOffset = BAT->translate( FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true); const unsigned BlockIndex = getBlock(InputOffset).second; - for (const MCDecodedPseudoProbe &Probe : Probes) - YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back( - yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(), - Probe.getType()}); + YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back( + yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(), + Probe.getType()}); } } } diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp index 84777741d611a3..f74cf60e076d0a 100644 --- a/bolt/lib/Profile/YAMLProfileWriter.cpp +++ b/bolt/lib/Profile/YAMLProfileWriter.cpp @@ -193,13 +193,10 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS, const uint64_t FuncAddr = BF.getAddress(); const std::pair &BlockRange = BB->getInputAddressRange(); - const auto &BlockProbes = - llvm::make_range(ProbeMap.lower_bound(FuncAddr + BlockRange.first), - ProbeMap.lower_bound(FuncAddr + BlockRange.second)); - for (const auto &[_, Probes] : BlockProbes) - for (const MCDecodedPseudoProbe &Probe : Probes) - YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{ - Probe.getGuid(), Probe.getIndex(), Probe.getType()}); + for (const MCDecodedPseudoProbe &Probe : ProbeMap.find( + FuncAddr + BlockRange.first, FuncAddr + BlockRange.second)) + YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{ + Probe.getGuid(), Probe.getIndex(), Probe.getType()}); } YamlBF.Blocks.emplace_back(YamlBB); diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp index 886bbdbf9d686e..4925b4b385d9b1 100644 --- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp +++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp @@ -143,7 +143,6 @@ void PseudoProbeRewriter::parsePseudoProbe() { if (!ProbeDecoder.buildAddress2ProbeMap( reinterpret_cast(Contents.data()), Contents.size(), GuidFilter, FuncStartAddrs)) { - ProbeDecoder.getAddress2ProbesMap().clear(); errs() << "BOLT-WARNING: fail in building Address2ProbeMap\n"; return; } @@ -156,7 +155,8 @@ void PseudoProbeRewriter::parsePseudoProbe() { ProbeDecoder.printProbesForAllAddresses(outs()); } - for (const auto &[GUID, FuncDesc] : ProbeDecoder.getGUID2FuncDescMap()) { + for (const auto &FuncDesc : ProbeDecoder.getGUID2FuncDescMap()) { + uint64_t GUID = FuncDesc.FuncGUID; if (!FuncStartAddrs.contains(GUID)) continue; BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncStartAddrs[GUID]); @@ -174,13 +174,13 @@ void PseudoProbeRewriter::updatePseudoProbes() { AddressProbesMap &Address2ProbesMap = ProbeDecoder.getAddress2ProbesMap(); const GUIDProbeFunctionMap &GUID2Func = ProbeDecoder.getGUID2FuncDescMap(); - for (auto &AP : Address2ProbesMap) { - BinaryFunction *F = BC.getBinaryFunctionContainingAddress(AP.first); + for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) { + uint64_t Address = Probe.getAddress(); + BinaryFunction *F = BC.getBinaryFunctionContainingAddress(Address); // If F is removed, eliminate all probes inside it from inline tree // Setting probes' addresses as INT64_MAX means elimination if (!F) { - for (MCDecodedPseudoProbe &Probe : AP.second) - Probe.setAddress(INT64_MAX); + Probe.setAddress(INT64_MAX); continue; } // If F is not emitted, the function will remain in the same address as its @@ -188,45 +188,36 @@ void PseudoProbeRewriter::updatePseudoProbes() { if (!F->isEmitted()) continue; - uint64_t Offset = AP.first - F->getAddress(); + uint64_t Offset = Address - F->getAddress(); const BinaryBasicBlock *BB = F->getBasicBlockContainingOffset(Offset); uint64_t BlkOutputAddress = BB->getOutputAddressRange().first; // Check if block output address is defined. // If not, such block is removed from binary. Then remove the probes from // inline tree if (BlkOutputAddress == 0) { - for (MCDecodedPseudoProbe &Probe : AP.second) - Probe.setAddress(INT64_MAX); + Probe.setAddress(INT64_MAX); continue; } - unsigned ProbeTrack = AP.second.size(); - std::list::iterator Probe = AP.second.begin(); - while (ProbeTrack != 0) { - if (Probe->isBlock()) { - Probe->setAddress(BlkOutputAddress); - } else if (Probe->isCall()) { - // A call probe may be duplicated due to ICP - // Go through output of InputOffsetToAddressMap to collect all related - // probes - auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(AP.first); - auto CallOutputAddress = CallOutputAddresses.first; - if (CallOutputAddress == CallOutputAddresses.second) { - Probe->setAddress(INT64_MAX); - } else { - Probe->setAddress(CallOutputAddress->second); - CallOutputAddress = std::next(CallOutputAddress); - } - - while (CallOutputAddress != CallOutputAddresses.second) { - AP.second.push_back(*Probe); - AP.second.back().setAddress(CallOutputAddress->second); - Probe->getInlineTreeNode()->addProbes(&(AP.second.back())); - CallOutputAddress = std::next(CallOutputAddress); - } + if (Probe.isBlock()) { + Probe.setAddress(BlkOutputAddress); + } else if (Probe.isCall()) { + // A call probe may be duplicated due to ICP + // Go through output of InputOffsetToAddressMap to collect all related + // probes + auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(Address); + auto CallOutputAddress = CallOutputAddresses.first; + if (CallOutputAddress == CallOutputAddresses.second) { + Probe.setAddress(INT64_MAX); + } else { + Probe.setAddress(CallOutputAddress->second); + CallOutputAddress = std::next(CallOutputAddress); + } + + while (CallOutputAddress != CallOutputAddresses.second) { + ProbeDecoder.addInjectedProbe(Probe, CallOutputAddress->second); + CallOutputAddress = std::next(CallOutputAddress); } - Probe = std::next(Probe); - ProbeTrack--; } } @@ -242,22 +233,16 @@ void PseudoProbeRewriter::updatePseudoProbes() { BinaryBlock.getName(); // scan all addresses -> correlate probe to block when print out - std::vector Addresses; - for (auto &Entry : Address2ProbesMap) - Addresses.push_back(Entry.first); - llvm::sort(Addresses); - for (uint64_t Key : Addresses) { - for (MCDecodedPseudoProbe &Probe : Address2ProbesMap[Key]) { - if (Probe.getAddress() == INT64_MAX) - outs() << "Deleted Probe: "; - else - outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " "; - Probe.print(outs(), GUID2Func, true); - // print block name only if the probe is block type and undeleted. - if (Probe.isBlock() && Probe.getAddress() != INT64_MAX) - outs() << format_hex(Probe.getAddress(), 8) << " Probe is in " - << Addr2BlockNames[Probe.getAddress()] << "\n"; - } + for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) { + if (Probe.getAddress() == INT64_MAX) + outs() << "Deleted Probe: "; + else + outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " "; + Probe.print(outs(), GUID2Func, true); + // print block name only if the probe is block type and undeleted. + if (Probe.isBlock() && Probe.getAddress() != INT64_MAX) + outs() << format_hex(Probe.getAddress(), 8) << " Probe is in " + << Addr2BlockNames[Probe.getAddress()] << "\n"; } outs() << "=======================================\n"; } @@ -333,7 +318,7 @@ void PseudoProbeRewriter::encodePseudoProbes() { ProbeDecoder.getDummyInlineRoot(); for (auto Child = Root.getChildren().begin(); Child != Root.getChildren().end(); ++Child) - Inlinees[Child->first] = Child->second.get(); + Inlinees[Child->getInlineSite()] = &*Child; for (auto Inlinee : Inlinees) // INT64_MAX is "placeholder" of unused callsite index field in the pair @@ -359,25 +344,37 @@ void PseudoProbeRewriter::encodePseudoProbes() { EmitInt(Cur->Guid, 8); // Emit number of probes in this node uint64_t Deleted = 0; - for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) + for (MCDecodedPseudoProbe *&Probe : + llvm::make_pointer_range(Cur->getProbes())) if (Probe->getAddress() == INT64_MAX) Deleted++; LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n"); - uint64_t ProbesSize = Cur->getProbes().size() - Deleted; + size_t InjectedProbes = ProbeDecoder.getNumInjectedProbes(Cur); + uint64_t ProbesSize = Cur->getProbes().size() - Deleted + InjectedProbes; EmitULEB128IntValue(ProbesSize); // Emit number of direct inlinees EmitULEB128IntValue(Cur->getChildren().size()); // Emit probes in this group - for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) { + for (MCDecodedPseudoProbe *&Probe : + llvm::make_pointer_range(Cur->getProbes())) { if (Probe->getAddress() == INT64_MAX) continue; EmitDecodedPseudoProbe(Probe); LastProbe = Probe; } + if (InjectedProbes) { + for (MCDecodedPseudoProbe *&Probe : + llvm::make_pointer_range(ProbeDecoder.getInjectedProbes(Cur))) { + if (Probe->getAddress() == INT64_MAX) + continue; + EmitDecodedPseudoProbe(Probe); + LastProbe = Probe; + } + } for (auto Child = Cur->getChildren().begin(); Child != Cur->getChildren().end(); ++Child) - Inlinees[Child->first] = Child->second.get(); + Inlinees[Child->getInlineSite()] = &*Child; for (const auto &Inlinee : Inlinees) { assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid"); NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second}); diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 3d998d4d81a5e4..f1cd34b999ceff 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -314,6 +314,8 @@ Bug Fixes to C++ Support template depth than the friend function template. (#GH98258) - Clang now rebuilds the template parameters of out-of-line declarations and specializations in the context of the current instantiation in all cases. +- Fix evaluation of the index of dependent pack indexing expressions/types specifiers (#GH105900) + Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index d683106bb0e298..212c1f6ff3a124 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -660,6 +660,9 @@ KEYWORD(out , KEYHLSL) #define HLSL_INTANGIBLE_TYPE(Name, Id, SingletonId) KEYWORD(Name, KEYHLSL) #include "clang/Basic/HLSLIntangibleTypes.def" +// HLSL Type traits. +TYPE_TRAIT_2(__builtin_hlsl_is_scalarized_layout_compatible, IsScalarizedLayoutCompatible, KEYHLSL) + // OpenMP Type Traits UNARY_EXPR_OR_TYPE_TRAIT(__builtin_omp_required_simd_align, OpenMPRequiredSimdAlign, KEYALL) diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index d59c2cce1fd780..f9060f55d2f78a 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -384,8 +384,7 @@ class Driver { /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. - static std::string GetResourcesPath(StringRef BinaryPath, - StringRef CustomResourceDir = ""); + static std::string GetResourcesPath(StringRef BinaryPath); Driver(StringRef ClangExecutable, StringRef TargetTriple, DiagnosticsEngine &Diags, std::string Title = "clang LLVM compiler", diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h index 3aae3383c215b5..5277fb57a23343 100644 --- a/clang/include/clang/Sema/SemaHLSL.h +++ b/clang/include/clang/Sema/SemaHLSL.h @@ -61,6 +61,9 @@ class SemaHLSL : public SemaBase { void handleParamModifierAttr(Decl *D, const ParsedAttr &AL); bool CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); + + // HLSL Type trait implementations + bool IsScalarizedLayoutCompatible(QualType T1, QualType T2) const; }; } // namespace clang diff --git a/clang/lib/AST/ByteCode/EvalEmitter.cpp b/clang/lib/AST/ByteCode/EvalEmitter.cpp index 53ec8f52d4921f..3b9e5f9f9f69cd 100644 --- a/clang/lib/AST/ByteCode/EvalEmitter.cpp +++ b/clang/lib/AST/ByteCode/EvalEmitter.cpp @@ -219,7 +219,7 @@ bool EvalEmitter::emitRetValue(const SourceInfo &Info) { return false; if (std::optional APV = - Ptr.toRValue(S.getCtx(), EvalResult.getSourceType())) { + Ptr.toRValue(S.getASTContext(), EvalResult.getSourceType())) { EvalResult.setValue(*APV); return true; } diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index aea303f0e630c9..09d3f4525138ed 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -326,7 +326,7 @@ bool CheckConstant(InterpState &S, CodePtr OpPC, const Descriptor *Desc) { auto IsConstType = [&S](const VarDecl *VD) -> bool { QualType T = VD->getType(); - if (T.isConstant(S.getCtx())) + if (T.isConstant(S.getASTContext())) return true; if (S.getLangOpts().CPlusPlus && !S.getLangOpts().CPlusPlus11) @@ -523,9 +523,9 @@ bool CheckGlobalInitialized(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { assert(S.getLangOpts().CPlusPlus); const auto *VD = cast(Ptr.getDeclDesc()->asValueDecl()); if ((!VD->hasConstantInitialization() && - VD->mightBeUsableInConstantExpressions(S.getCtx())) || + VD->mightBeUsableInConstantExpressions(S.getASTContext())) || (S.getLangOpts().OpenCL && !S.getLangOpts().CPlusPlus11 && - !VD->hasICEInitializer(S.getCtx()))) { + !VD->hasICEInitializer(S.getASTContext()))) { const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_var_init_non_constant, 1) << VD; S.Note(VD->getLocation(), diag::note_declared_at); @@ -797,7 +797,7 @@ bool CheckNewDeleteForms(InterpState &S, CodePtr OpPC, bool NewWasArray, // but we want to get the array size right. if (D->isArray()) { QualType ElemQT = D->getType()->getPointeeType(); - TypeToDiagnose = S.getCtx().getConstantArrayType( + TypeToDiagnose = S.getASTContext().getConstantArrayType( ElemQT, APInt(64, static_cast(D->getNumElems()), false), nullptr, ArraySizeModifier::Normal, 0); } else @@ -819,7 +819,7 @@ bool CheckDeleteSource(InterpState &S, CodePtr OpPC, const Expr *Source, // Whatever this is, we didn't heap allocate it. const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_delete_not_heap_alloc) - << Ptr.toDiagnosticString(S.getCtx()); + << Ptr.toDiagnosticString(S.getASTContext()); if (Ptr.isTemporary()) S.Note(Ptr.getDeclLoc(), diag::note_constexpr_temporary_here); diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index 81c547991c3d7d..242532a3f0544e 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -41,7 +41,7 @@ using APSInt = llvm::APSInt; /// Convert a value to an APValue. template bool ReturnValue(const InterpState &S, const T &V, APValue &R) { - R = V.toAPValue(S.getCtx()); + R = V.toAPValue(S.getASTContext()); return true; } @@ -231,12 +231,12 @@ bool CheckArraySize(InterpState &S, CodePtr OpPC, SizeT *NumElements, // constructing the array, we catch this here. SizeT MaxElements = SizeT::from(Descriptor::MaxArrayElemBytes / ElemSize); if (NumElements->toAPSInt().getActiveBits() > - ConstantArrayType::getMaxSizeBits(S.getCtx()) || + ConstantArrayType::getMaxSizeBits(S.getASTContext()) || *NumElements > MaxElements) { if (!IsNoThrow) { const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_new_too_large) - << NumElements->toDiagnosticString(S.getCtx()); + << NumElements->toDiagnosticString(S.getASTContext()); } return false; } @@ -911,8 +911,8 @@ inline bool CmpHelper(InterpState &S, CodePtr OpPC, const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_pointer_comparison_unspecified) - << LHS.toDiagnosticString(S.getCtx()) - << RHS.toDiagnosticString(S.getCtx()); + << LHS.toDiagnosticString(S.getASTContext()) + << RHS.toDiagnosticString(S.getASTContext()); return false; } @@ -927,7 +927,7 @@ inline bool CmpHelperEQ(InterpState &S, CodePtr OpPC, if (FP.isWeak()) { const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_pointer_weak_comparison) - << FP.toDiagnosticString(S.getCtx()); + << FP.toDiagnosticString(S.getASTContext()); return false; } } @@ -945,8 +945,8 @@ inline bool CmpHelper(InterpState &S, CodePtr OpPC, CompareFn Fn) { if (!Pointer::hasSameBase(LHS, RHS)) { const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_pointer_comparison_unspecified) - << LHS.toDiagnosticString(S.getCtx()) - << RHS.toDiagnosticString(S.getCtx()); + << LHS.toDiagnosticString(S.getASTContext()) + << RHS.toDiagnosticString(S.getASTContext()); return false; } else { unsigned VL = LHS.getByteOffset(); @@ -974,7 +974,7 @@ inline bool CmpHelperEQ(InterpState &S, CodePtr OpPC, CompareFn Fn) { if (P.isWeak()) { const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_pointer_weak_comparison) - << P.toDiagnosticString(S.getCtx()); + << P.toDiagnosticString(S.getASTContext()); return false; } } @@ -984,13 +984,13 @@ inline bool CmpHelperEQ(InterpState &S, CodePtr OpPC, CompareFn Fn) { RHS.getOffset() == 0) { const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_pointer_comparison_past_end) - << LHS.toDiagnosticString(S.getCtx()); + << LHS.toDiagnosticString(S.getASTContext()); return false; } else if (RHS.isOnePastEnd() && !LHS.isOnePastEnd() && !LHS.isZero() && LHS.getOffset() == 0) { const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_pointer_comparison_past_end) - << RHS.toDiagnosticString(S.getCtx()); + << RHS.toDiagnosticString(S.getASTContext()); return false; } @@ -1073,8 +1073,8 @@ bool CMP3(InterpState &S, CodePtr OpPC, const ComparisonCategoryInfo *CmpInfo) { // This should only happen with pointers. const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_pointer_comparison_unspecified) - << LHS.toDiagnosticString(S.getCtx()) - << RHS.toDiagnosticString(S.getCtx()); + << LHS.toDiagnosticString(S.getASTContext()) + << RHS.toDiagnosticString(S.getASTContext()); return false; } @@ -1342,7 +1342,7 @@ bool InitGlobalTemp(InterpState &S, CodePtr OpPC, uint32_t I, const Pointer &Ptr = S.P.getGlobal(I); const T Value = S.Stk.peek(); - APValue APV = Value.toAPValue(S.getCtx()); + APValue APV = Value.toAPValue(S.getASTContext()); APValue *Cached = Temp->getOrCreateValue(true); *Cached = APV; @@ -1369,7 +1369,7 @@ inline bool InitGlobalTempComp(InterpState &S, CodePtr OpPC, std::make_pair(P.getDeclDesc()->asExpr(), Temp)); if (std::optional APV = - P.toRValue(S.getCtx(), Temp->getTemporaryExpr()->getType())) { + P.toRValue(S.getASTContext(), Temp->getTemporaryExpr()->getType())) { *Cached = *APV; return true; } @@ -1404,7 +1404,8 @@ bool InitThisBitField(InterpState &S, CodePtr OpPC, const Record::Field *F, return false; const Pointer &Field = This.atField(FieldOffset); const auto &Value = S.Stk.pop(); - Field.deref() = Value.truncate(F->Decl->getBitWidthValue(S.getCtx())); + Field.deref() = + Value.truncate(F->Decl->getBitWidthValue(S.getASTContext())); Field.initialize(); return true; } @@ -1427,7 +1428,8 @@ bool InitBitField(InterpState &S, CodePtr OpPC, const Record::Field *F) { assert(F->isBitField()); const T &Value = S.Stk.pop(); const Pointer &Field = S.Stk.peek().atField(F->Offset); - Field.deref() = Value.truncate(F->Decl->getBitWidthValue(S.getCtx())); + Field.deref() = + Value.truncate(F->Decl->getBitWidthValue(S.getASTContext())); Field.activate(); Field.initialize(); return true; @@ -1477,7 +1479,7 @@ inline bool GetPtrField(InterpState &S, CodePtr OpPC, uint32_t Off) { return false; if (Ptr.isIntegralPointer()) { - S.Stk.push(Ptr.asIntPointer().atOffset(S.getCtx(), Off)); + S.Stk.push(Ptr.asIntPointer().atOffset(S.getASTContext(), Off)); return true; } @@ -1505,7 +1507,7 @@ inline bool GetPtrFieldPop(InterpState &S, CodePtr OpPC, uint32_t Off) { return false; if (Ptr.isIntegralPointer()) { - S.Stk.push(Ptr.asIntPointer().atOffset(S.getCtx(), Off)); + S.Stk.push(Ptr.asIntPointer().atOffset(S.getASTContext(), Off)); return true; } @@ -1721,7 +1723,7 @@ bool StoreBitField(InterpState &S, CodePtr OpPC) { if (Ptr.canBeInitialized()) Ptr.initialize(); if (const auto *FD = Ptr.getField()) - Ptr.deref() = Value.truncate(FD->getBitWidthValue(S.getCtx())); + Ptr.deref() = Value.truncate(FD->getBitWidthValue(S.getASTContext())); else Ptr.deref() = Value; return true; @@ -1736,7 +1738,7 @@ bool StoreBitFieldPop(InterpState &S, CodePtr OpPC) { if (Ptr.canBeInitialized()) Ptr.initialize(); if (const auto *FD = Ptr.getField()) - Ptr.deref() = Value.truncate(FD->getBitWidthValue(S.getCtx())); + Ptr.deref() = Value.truncate(FD->getBitWidthValue(S.getASTContext())); else Ptr.deref() = Value; return true; @@ -2014,7 +2016,7 @@ inline bool SubPtr(InterpState &S, CodePtr OpPC) { while (auto *AT = dyn_cast(PtrT)) PtrT = AT->getElementType(); - QualType ArrayTy = S.getCtx().getConstantArrayType( + QualType ArrayTy = S.getASTContext().getConstantArrayType( PtrT, APInt::getZero(1), nullptr, ArraySizeModifier::Normal, 0); S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_pointer_subtraction_zero_size) @@ -2953,7 +2955,7 @@ inline bool CheckDecl(InterpState &S, CodePtr OpPC, const VarDecl *VD) { if (VD == S.EvaluatingDecl) return true; - if (!VD->isUsableInConstantExpressions(S.getCtx())) { + if (!VD->isUsableInConstantExpressions(S.getASTContext())) { S.CCEDiag(VD->getLocation(), diag::note_constexpr_static_local) << (VD->getTSCSpec() == TSCS_unspecified ? 0 : 1) << VD; return false; @@ -3047,7 +3049,7 @@ static inline bool Free(InterpState &S, CodePtr OpPC, bool DeleteIsArrayForm) { if (!Ptr.isRoot() || Ptr.isOnePastEnd() || Ptr.isArrayElement()) { const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_delete_subobject) - << Ptr.toDiagnosticString(S.getCtx()) << Ptr.isOnePastEnd(); + << Ptr.toDiagnosticString(S.getASTContext()) << Ptr.isOnePastEnd(); return false; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 26abf582051067..1a71bff25d2540 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -38,7 +38,7 @@ static T getParam(const InterpFrame *Frame, unsigned Index) { } PrimType getIntPrimType(const InterpState &S) { - const TargetInfo &TI = S.getCtx().getTargetInfo(); + const TargetInfo &TI = S.getASTContext().getTargetInfo(); unsigned IntWidth = TI.getIntWidth(); if (IntWidth == 32) @@ -49,7 +49,7 @@ PrimType getIntPrimType(const InterpState &S) { } PrimType getLongPrimType(const InterpState &S) { - const TargetInfo &TI = S.getCtx().getTargetInfo(); + const TargetInfo &TI = S.getASTContext().getTargetInfo(); unsigned LongWidth = TI.getLongWidth(); if (LongWidth == 64) @@ -272,10 +272,10 @@ static bool interp__builtin_nan(InterpState &S, CodePtr OpPC, return false; const llvm::fltSemantics &TargetSemantics = - S.getCtx().getFloatTypeSemantics(F->getDecl()->getReturnType()); + S.getASTContext().getFloatTypeSemantics(F->getDecl()->getReturnType()); Floating Result; - if (S.getCtx().getTargetInfo().isNan2008()) { + if (S.getASTContext().getTargetInfo().isNan2008()) { if (Signaling) Result = Floating( llvm::APFloat::getSNaN(TargetSemantics, /*Negative=*/false, &Fill)); @@ -303,7 +303,7 @@ static bool interp__builtin_nan(InterpState &S, CodePtr OpPC, static bool interp__builtin_inf(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const Function *F) { const llvm::fltSemantics &TargetSemantics = - S.getCtx().getFloatTypeSemantics(F->getDecl()->getReturnType()); + S.getASTContext().getFloatTypeSemantics(F->getDecl()->getReturnType()); S.Stk.push(Floating::getInf(TargetSemantics)); return true; @@ -689,8 +689,8 @@ static bool interp__builtin_eh_return_data_regno(InterpState &S, CodePtr OpPC, PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType()); APSInt Arg = peekToAPSInt(S.Stk, ArgT); - int Result = - S.getCtx().getTargetInfo().getEHDataRegisterNumber(Arg.getZExtValue()); + int Result = S.getASTContext().getTargetInfo().getEHDataRegisterNumber( + Arg.getZExtValue()); pushInteger(S, Result, Call->getType()); return true; } @@ -734,7 +734,7 @@ static bool interp__builtin_overflowop(InterpState &S, CodePtr OpPC, ResultType->isSignedIntegerOrEnumerationType(); uint64_t LHSSize = LHS.getBitWidth(); uint64_t RHSSize = RHS.getBitWidth(); - uint64_t ResultSize = S.getCtx().getTypeSize(ResultType); + uint64_t ResultSize = S.getASTContext().getTypeSize(ResultType); uint64_t MaxBits = std::max(std::max(LHSSize, RHSSize), ResultSize); // Add an additional bit if the signedness isn't uniformly agreed to. We @@ -794,7 +794,7 @@ static bool interp__builtin_overflowop(InterpState &S, CodePtr OpPC, // since it will give us the behavior of a TruncOrSelf in the case where // its parameter <= its size. We previously set Result to be at least the // type-size of the result, so getTypeSize(ResultType) <= Resu - APSInt Temp = Result.extOrTrunc(S.getCtx().getTypeSize(ResultType)); + APSInt Temp = Result.extOrTrunc(S.getASTContext().getTypeSize(ResultType)); Temp.setIsSigned(ResultType->isSignedIntegerOrEnumerationType()); if (!APSInt::isSameValue(Temp, Result)) @@ -974,8 +974,8 @@ static bool interp__builtin_atomic_lock_free(InterpState &S, CodePtr OpPC, if (Size.isPowerOfTwo()) { // Check against inlining width. unsigned InlineWidthBits = - S.getCtx().getTargetInfo().getMaxAtomicInlineWidth(); - if (Size <= S.getCtx().toCharUnitsFromBits(InlineWidthBits)) { + S.getASTContext().getTargetInfo().getMaxAtomicInlineWidth(); + if (Size <= S.getASTContext().toCharUnitsFromBits(InlineWidthBits)) { // OK, we will inline appropriately-aligned operations of this size, // and _Atomic(T) is appropriately-aligned. @@ -1007,7 +1007,7 @@ static bool interp__builtin_atomic_lock_free(InterpState &S, CodePtr OpPC, if (auto PtrTy = PtrArg->getType()->getAs()) { QualType PointeeType = PtrTy->getPointeeType(); if (!PointeeType->isIncompleteType() && - S.getCtx().getTypeAlignInChars(PointeeType) >= Size) { + S.getASTContext().getTypeAlignInChars(PointeeType) >= Size) { // OK, we will inline operations on this object. return returnBool(true); } @@ -1059,7 +1059,7 @@ static bool interp__builtin_is_aligned_up_down(InterpState &S, CodePtr OpPC, S.FFDiag(Call, diag::note_constexpr_invalid_alignment) << Alignment; return false; } - unsigned SrcWidth = S.getCtx().getIntWidth(Call->getArg(0)->getType()); + unsigned SrcWidth = S.getASTContext().getIntWidth(Call->getArg(0)->getType()); APSInt MaxValue(APInt::getOneBitSet(SrcWidth, SrcWidth - 1)); if (APSInt::compareValues(Alignment, MaxValue) > 0) { S.FFDiag(Call, diag::note_constexpr_alignment_too_big) @@ -1094,7 +1094,7 @@ static bool interp__builtin_is_aligned_up_down(InterpState &S, CodePtr OpPC, unsigned PtrOffset = Ptr.getByteOffset(); PtrOffset = Ptr.getIndex(); CharUnits BaseAlignment = - S.getCtx().getDeclAlign(Ptr.getDeclDesc()->asValueDecl()); + S.getASTContext().getDeclAlign(Ptr.getDeclDesc()->asValueDecl()); CharUnits PtrAlign = BaseAlignment.alignmentAtOffset(CharUnits::fromQuantity(PtrOffset)); @@ -1157,7 +1157,7 @@ static bool interp__builtin_os_log_format_buffer_size(InterpState &S, const Function *Func, const CallExpr *Call) { analyze_os_log::OSLogBufferLayout Layout; - analyze_os_log::computeOSLogBufferLayout(S.getCtx(), Call, Layout); + analyze_os_log::computeOSLogBufferLayout(S.getASTContext(), Call, Layout); pushInteger(S, Layout.size().getQuantity(), Call->getType()); return true; } @@ -1624,10 +1624,11 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, const RecordDecl *RD = RT->getDecl(); if (RD->isInvalidDecl()) return false; - const ASTRecordLayout &RL = S.getCtx().getASTRecordLayout(RD); + const ASTRecordLayout &RL = S.getASTContext().getASTRecordLayout(RD); unsigned FieldIndex = MemberDecl->getFieldIndex(); assert(FieldIndex < RL.getFieldCount() && "offsetof field in wrong type"); - Result += S.getCtx().toCharUnitsFromBits(RL.getFieldOffset(FieldIndex)); + Result += + S.getASTContext().toCharUnitsFromBits(RL.getFieldOffset(FieldIndex)); CurrentType = MemberDecl->getType().getNonReferenceType(); break; } @@ -1635,11 +1636,11 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, // When generating bytecode, we put all the index expressions as Sint64 on // the stack. int64_t Index = ArrayIndices[ArrayIndex]; - const ArrayType *AT = S.getCtx().getAsArrayType(CurrentType); + const ArrayType *AT = S.getASTContext().getAsArrayType(CurrentType); if (!AT) return false; CurrentType = AT->getElementType(); - CharUnits ElementSize = S.getCtx().getTypeSizeInChars(CurrentType); + CharUnits ElementSize = S.getASTContext().getTypeSizeInChars(CurrentType); Result += Index * ElementSize; ++ArrayIndex; break; @@ -1656,7 +1657,7 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, const RecordDecl *RD = RT->getDecl(); if (RD->isInvalidDecl()) return false; - const ASTRecordLayout &RL = S.getCtx().getASTRecordLayout(RD); + const ASTRecordLayout &RL = S.getASTContext().getASTRecordLayout(RD); // Find the base class itself. CurrentType = BaseSpec->getType(); diff --git a/clang/lib/AST/ByteCode/InterpFrame.cpp b/clang/lib/AST/ByteCode/InterpFrame.cpp index 8b55b61cbbfa7e..5e98444ef05a59 100644 --- a/clang/lib/AST/ByteCode/InterpFrame.cpp +++ b/clang/lib/AST/ByteCode/InterpFrame.cpp @@ -179,7 +179,7 @@ void InterpFrame::describe(llvm::raw_ostream &OS) const { if (const auto *MCE = dyn_cast_if_present(CallExpr)) { const Expr *Object = MCE->getImplicitObjectArgument(); Object->printPretty(OS, /*Helper=*/nullptr, - S.getCtx().getPrintingPolicy(), + S.getASTContext().getPrintingPolicy(), /*Indentation=*/0); if (Object->getType()->isPointerType()) OS << "->"; @@ -188,18 +188,18 @@ void InterpFrame::describe(llvm::raw_ostream &OS) const { } else if (const auto *OCE = dyn_cast_if_present(CallExpr)) { OCE->getArg(0)->printPretty(OS, /*Helper=*/nullptr, - S.getCtx().getPrintingPolicy(), + S.getASTContext().getPrintingPolicy(), /*Indentation=*/0); OS << "."; } else if (const auto *M = dyn_cast(F)) { - print(OS, This, S.getCtx(), - S.getCtx().getLValueReferenceType( - S.getCtx().getRecordType(M->getParent()))); + print(OS, This, S.getASTContext(), + S.getASTContext().getLValueReferenceType( + S.getASTContext().getRecordType(M->getParent()))); OS << "."; } } - F->getNameForDiagnostic(OS, S.getCtx().getPrintingPolicy(), + F->getNameForDiagnostic(OS, S.getASTContext().getPrintingPolicy(), /*Qualified=*/false); OS << '('; unsigned Off = 0; @@ -212,7 +212,7 @@ void InterpFrame::describe(llvm::raw_ostream &OS) const { PrimType PrimTy = S.Ctx.classify(Ty).value_or(PT_Ptr); - TYPE_SWITCH(PrimTy, print(OS, stackRef(Off), S.getCtx(), Ty)); + TYPE_SWITCH(PrimTy, print(OS, stackRef(Off), S.getASTContext(), Ty)); Off += align(primSize(PrimTy)); if (I + 1 != N) OS << ", "; diff --git a/clang/lib/AST/ByteCode/InterpState.h b/clang/lib/AST/ByteCode/InterpState.h index 61ee54331c65d6..961ba5f5c28a09 100644 --- a/clang/lib/AST/ByteCode/InterpState.h +++ b/clang/lib/AST/ByteCode/InterpState.h @@ -59,7 +59,7 @@ class InterpState final : public State, public SourceMapper { Expr::EvalStatus &getEvalStatus() const override { return Parent.getEvalStatus(); } - ASTContext &getCtx() const override { return Parent.getCtx(); } + ASTContext &getASTContext() const override { return Parent.getASTContext(); } // Forward status checks and updates to the walker. bool checkingForUndefinedBehavior() const override { diff --git a/clang/lib/AST/ByteCode/State.cpp b/clang/lib/AST/ByteCode/State.cpp index 0d9dadec4b9581..b4db86e8d22c71 100644 --- a/clang/lib/AST/ByteCode/State.cpp +++ b/clang/lib/AST/ByteCode/State.cpp @@ -74,12 +74,12 @@ void State::addNotes(ArrayRef Diags) { } DiagnosticBuilder State::report(SourceLocation Loc, diag::kind DiagId) { - return getCtx().getDiagnostics().Report(Loc, DiagId); + return getASTContext().getDiagnostics().Report(Loc, DiagId); } /// Add a diagnostic to the diagnostics list. PartialDiagnostic &State::addDiag(SourceLocation Loc, diag::kind DiagId) { - PartialDiagnostic PD(DiagId, getCtx().getDiagAllocator()); + PartialDiagnostic PD(DiagId, getASTContext().getDiagAllocator()); getEvalStatus().Diag->push_back(std::make_pair(Loc, PD)); return getEvalStatus().Diag->back().second; } @@ -93,7 +93,8 @@ OptionalDiagnostic State::diag(SourceLocation Loc, diag::kind DiagId, } unsigned CallStackNotes = getCallStackDepth() - 1; - unsigned Limit = getCtx().getDiagnostics().getConstexprBacktraceLimit(); + unsigned Limit = + getASTContext().getDiagnostics().getConstexprBacktraceLimit(); if (Limit) CallStackNotes = std::min(CallStackNotes, Limit + 1); if (checkingPotentialConstantExpression()) @@ -113,7 +114,9 @@ OptionalDiagnostic State::diag(SourceLocation Loc, diag::kind DiagId, return OptionalDiagnostic(); } -const LangOptions &State::getLangOpts() const { return getCtx().getLangOpts(); } +const LangOptions &State::getLangOpts() const { + return getASTContext().getLangOpts(); +} void State::addCallStack(unsigned Limit) { // Determine which calls to skip, if any. diff --git a/clang/lib/AST/ByteCode/State.h b/clang/lib/AST/ByteCode/State.h index 44d6c037c5ad95..2cffce4bc2ae40 100644 --- a/clang/lib/AST/ByteCode/State.h +++ b/clang/lib/AST/ByteCode/State.h @@ -67,7 +67,7 @@ class State { virtual void setActiveDiagnostic(bool Flag) = 0; virtual void setFoldFailureDiagnostic(bool Flag) = 0; virtual Expr::EvalStatus &getEvalStatus() const = 0; - virtual ASTContext &getCtx() const = 0; + virtual ASTContext &getASTContext() const = 0; virtual bool hasPriorDiagnostic() = 0; virtual unsigned getCallStackDepth() = 0; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 826cc5f58bdf51..d46f57521a97d3 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -1030,7 +1030,7 @@ namespace { discardCleanups(); } - ASTContext &getCtx() const override { return Ctx; } + ASTContext &getASTContext() const override { return Ctx; } void setEvaluatingDecl(APValue::LValueBase Base, APValue &Value, EvaluatingDeclKind EDK = EvaluatingDeclKind::Ctor) { @@ -2327,9 +2327,9 @@ static bool CheckLValueConstantExpression(EvalInfo &Info, SourceLocation Loc, // In CUDA/HIP device compilation, only device side variables have // constant addresses. - if (Info.getCtx().getLangOpts().CUDA && - Info.getCtx().getLangOpts().CUDAIsDevice && - Info.getCtx().CUDAConstantEvalCtx.NoWrongSidedVars) { + if (Info.getASTContext().getLangOpts().CUDA && + Info.getASTContext().getLangOpts().CUDAIsDevice && + Info.getASTContext().CUDAConstantEvalCtx.NoWrongSidedVars) { if ((!Var->hasAttr() && !Var->hasAttr() && !Var->getType()->isCUDADeviceBuiltinSurfaceType() && @@ -5662,7 +5662,7 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, *Info.CurrentCall, hasSpecificAttr(AS->getAttrs()) && isa(SS)); - auto LO = Info.getCtx().getLangOpts(); + auto LO = Info.getASTContext().getLangOpts(); if (LO.CXXAssumptions && !LO.MSVCCompat) { for (auto *Attr : AS->getAttrs()) { auto *AA = dyn_cast(Attr); @@ -5673,7 +5673,7 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, if (Assumption->isValueDependent()) return ESR_Failed; - if (Assumption->HasSideEffects(Info.getCtx())) + if (Assumption->HasSideEffects(Info.getASTContext())) continue; bool Value; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 67e04327296849..9deaa4db81e027 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -173,18 +173,18 @@ getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) { } // static -std::string Driver::GetResourcesPath(StringRef BinaryPath, - StringRef CustomResourceDir) { +std::string Driver::GetResourcesPath(StringRef BinaryPath) { // Since the resource directory is embedded in the module hash, it's important // that all places that need it call this function, so that they get the // exact same string ("a/../b/" and "b/" get different hashes, for example). // Dir is bin/ or lib/, depending on where BinaryPath is. - std::string Dir = std::string(llvm::sys::path::parent_path(BinaryPath)); - + StringRef Dir = llvm::sys::path::parent_path(BinaryPath); SmallString<128> P(Dir); - if (CustomResourceDir != "") { - llvm::sys::path::append(P, CustomResourceDir); + + StringRef ConfiguredResourceDir(CLANG_RESOURCE_DIR); + if (!ConfiguredResourceDir.empty()) { + llvm::sys::path::append(P, ConfiguredResourceDir); } else { // On Windows, libclang.dll is in bin/. // On non-Windows, libclang.so/.dylib is in lib/. @@ -241,7 +241,7 @@ Driver::Driver(StringRef ClangExecutable, StringRef TargetTriple, #endif // Compute the path to the resource directory. - ResourceDir = GetResourcesPath(ClangExecutable, CLANG_RESOURCE_DIR); + ResourceDir = GetResourcesPath(ClangExecutable); } void Driver::setDriverMode(StringRef Value) { diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 1e6d9370073072..5099858a5672c2 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3138,7 +3138,7 @@ std::string CompilerInvocation::GetResourcesPath(const char *Argv0, void *MainAddr) { std::string ClangExecutable = llvm::sys::fs::getMainExecutable(Argv0, MainAddr); - return Driver::GetResourcesPath(ClangExecutable, CLANG_RESOURCE_DIR); + return Driver::GetResourcesPath(ClangExecutable); } static void GenerateHeaderSearchArgs(const HeaderSearchOptions &Opts, diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 746c67ff1e979f..d8719ab26cc83f 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -39,6 +39,7 @@ #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaCUDA.h" +#include "clang/Sema/SemaHLSL.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/SemaLambda.h" #include "clang/Sema/SemaObjC.h" @@ -6248,6 +6249,23 @@ static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, const TypeSourceI TSTToBeDeduced->getTemplateName().getAsTemplateDecl(), RhsT, Info) == TemplateDeductionResult::Success; } + case BTT_IsScalarizedLayoutCompatible: { + if (!LhsT->isVoidType() && !LhsT->isIncompleteArrayType() && + Self.RequireCompleteType(Lhs->getTypeLoc().getBeginLoc(), LhsT, + diag::err_incomplete_type)) + return true; + if (!RhsT->isVoidType() && !RhsT->isIncompleteArrayType() && + Self.RequireCompleteType(Rhs->getTypeLoc().getBeginLoc(), RhsT, + diag::err_incomplete_type)) + return true; + + DiagnoseVLAInCXXTypeTrait( + Self, Lhs, tok::kw___builtin_hlsl_is_scalarized_layout_compatible); + DiagnoseVLAInCXXTypeTrait( + Self, Rhs, tok::kw___builtin_hlsl_is_scalarized_layout_compatible); + + return Self.HLSL().IsScalarizedLayoutCompatible(LhsT, RhsT); + } default: llvm_unreachable("not a BTT"); } diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 17cb47f80590d9..714e8f5cfa9926 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -1524,3 +1524,85 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { } return false; } + +static void BuildFlattenedTypeList(QualType BaseTy, + llvm::SmallVectorImpl &List) { + llvm::SmallVector WorkList; + WorkList.push_back(BaseTy); + while (!WorkList.empty()) { + QualType T = WorkList.pop_back_val(); + T = T.getCanonicalType().getUnqualifiedType(); + assert(!isa(T) && "Matrix types not yet supported in HLSL"); + if (const auto *AT = dyn_cast(T)) { + llvm::SmallVector ElementFields; + // Generally I've avoided recursion in this algorithm, but arrays of + // structs could be time-consuming to flatten and churn through on the + // work list. Hopefully nesting arrays of structs containing arrays + // of structs too many levels deep is unlikely. + BuildFlattenedTypeList(AT->getElementType(), ElementFields); + // Repeat the element's field list n times. + for (uint64_t Ct = 0; Ct < AT->getZExtSize(); ++Ct) + List.insert(List.end(), ElementFields.begin(), ElementFields.end()); + continue; + } + // Vectors can only have element types that are builtin types, so this can + // add directly to the list instead of to the WorkList. + if (const auto *VT = dyn_cast(T)) { + List.insert(List.end(), VT->getNumElements(), VT->getElementType()); + continue; + } + if (const auto *RT = dyn_cast(T)) { + const RecordDecl *RD = RT->getDecl(); + if (RD->isUnion()) { + List.push_back(T); + continue; + } + const CXXRecordDecl *CXXD = dyn_cast(RD); + + llvm::SmallVector FieldTypes; + if (CXXD && CXXD->isStandardLayout()) + RD = CXXD->getStandardLayoutBaseWithFields(); + + for (const auto *FD : RD->fields()) + FieldTypes.push_back(FD->getType()); + // Reverse the newly added sub-range. + std::reverse(FieldTypes.begin(), FieldTypes.end()); + WorkList.insert(WorkList.end(), FieldTypes.begin(), FieldTypes.end()); + + // If this wasn't a standard layout type we may also have some base + // classes to deal with. + if (CXXD && !CXXD->isStandardLayout()) { + FieldTypes.clear(); + for (const auto &Base : CXXD->bases()) + FieldTypes.push_back(Base.getType()); + std::reverse(FieldTypes.begin(), FieldTypes.end()); + WorkList.insert(WorkList.end(), FieldTypes.begin(), FieldTypes.end()); + } + continue; + } + List.push_back(T); + } +} + +bool SemaHLSL::IsScalarizedLayoutCompatible(QualType T1, QualType T2) const { + if (T1.isNull() || T2.isNull()) + return false; + + T1 = T1.getCanonicalType().getUnqualifiedType(); + T2 = T2.getCanonicalType().getUnqualifiedType(); + + // If both types are the same canonical type, they're obviously compatible. + if (SemaRef.getASTContext().hasSameType(T1, T2)) + return true; + + llvm::SmallVector T1Types; + BuildFlattenedTypeList(T1, T1Types); + llvm::SmallVector T2Types; + BuildFlattenedTypeList(T2, T2Types); + + // Check the flattened type list + return llvm::equal(T1Types, T2Types, + [this](QualType LHS, QualType RHS) -> bool { + return SemaRef.IsLayoutCompatible(LHS, RHS); + }); +} diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 62287c2d26375c..b3854cd8f82220 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -6669,9 +6669,15 @@ QualType TreeTransform::TransformPackIndexingType(TypeLocBuilder &TLB, PackIndexingTypeLoc TL) { // Transform the index - ExprResult IndexExpr = getDerived().TransformExpr(TL.getIndexExpr()); - if (IndexExpr.isInvalid()) - return QualType(); + ExprResult IndexExpr; + { + EnterExpressionEvaluationContext ConstantContext( + SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated); + + IndexExpr = getDerived().TransformExpr(TL.getIndexExpr()); + if (IndexExpr.isInvalid()) + return QualType(); + } QualType Pattern = TL.getPattern(); const PackIndexingType *PIT = TL.getTypePtr(); @@ -15299,9 +15305,14 @@ TreeTransform::TransformPackIndexingExpr(PackIndexingExpr *E) { return E; // Transform the index - ExprResult IndexExpr = getDerived().TransformExpr(E->getIndexExpr()); - if (IndexExpr.isInvalid()) - return ExprError(); + ExprResult IndexExpr; + { + EnterExpressionEvaluationContext ConstantContext( + SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated); + IndexExpr = getDerived().TransformExpr(E->getIndexExpr()); + if (IndexExpr.isInvalid()) + return ExprError(); + } SmallVector ExpandedExprs; if (!E->expandsToEmptyPack() && E->getExpressions().empty()) { diff --git a/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp index ea09c43cc5ce90..dcf6801a73de2d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp @@ -288,12 +288,65 @@ void StackAddrEscapeChecker::checkPreStmt(const ReturnStmt *RS, EmitStackError(C, R, RetE); } +static const MemSpaceRegion *getStackOrGlobalSpaceRegion(const MemRegion *R) { + assert(R); + if (const auto *MemSpace = R->getMemorySpace()) { + if (const auto *SSR = MemSpace->getAs()) + return SSR; + if (const auto *GSR = MemSpace->getAs()) + return GSR; + } + // If R describes a lambda capture, it will be a symbolic region + // referring to a field region of another symbolic region. + if (const auto *SymReg = R->getBaseRegion()->getAs()) { + if (const auto *OriginReg = SymReg->getSymbol()->getOriginRegion()) + return getStackOrGlobalSpaceRegion(OriginReg); + } + return nullptr; +} + +std::optional printReferrer(const MemRegion *Referrer) { + assert(Referrer); + const StringRef ReferrerMemorySpace = [](const MemSpaceRegion *Space) { + if (isa(Space)) + return "static"; + if (isa(Space)) + return "global"; + assert(isa(Space)); + return "stack"; + }(getStackOrGlobalSpaceRegion(Referrer)); + + while (!Referrer->canPrintPretty()) { + if (const auto *SymReg = dyn_cast(Referrer); + SymReg && SymReg->getSymbol()->getOriginRegion()) { + Referrer = SymReg->getSymbol()->getOriginRegion()->getBaseRegion(); + } else if (isa(Referrer)) { + // Skip members of a class, it is handled in CheckExprLifetime.cpp as + // warn_bind_ref_member_to_parameter or + // warn_init_ptr_member_to_parameter_addr + return std::nullopt; + } else { + Referrer->dump(); + assert(false && "Unexpected referrer region type."); + return std::nullopt; + } + } + assert(Referrer); + assert(Referrer->canPrintPretty()); + + std::string buf; + llvm::raw_string_ostream os(buf); + os << ReferrerMemorySpace << " variable "; + Referrer->printPretty(os); + return buf; +} + void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS, CheckerContext &Ctx) const { if (!ChecksEnabled[CK_StackAddrEscapeChecker]) return; - ProgramStateRef State = Ctx.getState(); + ExplodedNode *Node = Ctx.getPredecessor(); // Iterate over all bindings to global variables and see if it contains // a memory region in the stack space. @@ -307,23 +360,22 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS, /// referred by an other stack variable from different stack frame. bool checkForDanglingStackVariable(const MemRegion *Referrer, const MemRegion *Referred) { - const auto *ReferrerMemSpace = - Referrer->getMemorySpace()->getAs(); + const auto *ReferrerMemSpace = getStackOrGlobalSpaceRegion(Referrer); const auto *ReferredMemSpace = Referred->getMemorySpace()->getAs(); if (!ReferrerMemSpace || !ReferredMemSpace) return false; - const auto *ReferrerFrame = ReferrerMemSpace->getStackFrame(); - const auto *ReferredFrame = ReferredMemSpace->getStackFrame(); + const auto *ReferrerStackSpace = + ReferrerMemSpace->getAs(); + if (!ReferrerStackSpace) + return false; - if (ReferrerMemSpace && ReferredMemSpace) { - if (ReferredFrame == PoppedFrame && - ReferrerFrame->isParentOf(PoppedFrame)) { - V.emplace_back(Referrer, Referred); - return true; - } + if (ReferredMemSpace->getStackFrame() == PoppedFrame && + ReferrerStackSpace->getStackFrame()->isParentOf(PoppedFrame)) { + V.emplace_back(Referrer, Referred); + return true; } return false; } @@ -352,6 +404,7 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS, }; CallBack Cb(Ctx); + ProgramStateRef State = Node->getState(); State->getStateManager().getStoreManager().iterBindings(State->getStore(), Cb); @@ -359,14 +412,14 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS, return; // Generate an error node. - ExplodedNode *N = Ctx.generateNonFatalErrorNode(State); + ExplodedNode *N = Ctx.generateNonFatalErrorNode(State, Node); if (!N) return; if (!BT_stackleak) BT_stackleak = std::make_unique(CheckNames[CK_StackAddrEscapeChecker], - "Stack address stored into global variable"); + "Stack address leaks outside of stack frame"); for (const auto &P : Cb.V) { const MemRegion *Referrer = P.first->getBaseRegion(); @@ -374,13 +427,13 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS, // Generate a report for this bug. const StringRef CommonSuffix = - "upon returning to the caller. This will be a dangling reference"; + " upon returning to the caller. This will be a dangling reference"; SmallString<128> Buf; llvm::raw_svector_ostream Out(Buf); const SourceRange Range = genName(Out, Referred, Ctx.getASTContext()); if (isa(Referrer)) { - Out << " is still referred to by a temporary object on the stack " + Out << " is still referred to by a temporary object on the stack" << CommonSuffix; auto Report = std::make_unique(*BT_stackleak, Out.str(), N); @@ -390,28 +443,12 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS, return; } - const StringRef ReferrerMemorySpace = [](const MemSpaceRegion *Space) { - if (isa(Space)) - return "static"; - if (isa(Space)) - return "global"; - assert(isa(Space)); - return "stack"; - }(Referrer->getMemorySpace()); - - // We should really only have VarRegions here. - // Anything else is really surprising, and we should get notified if such - // ever happens. - const auto *ReferrerVar = dyn_cast(Referrer); - if (!ReferrerVar) { - assert(false && "We should have a VarRegion here"); - continue; // Defensively skip this one. + auto ReferrerVariable = printReferrer(Referrer); + if (!ReferrerVariable) { + continue; } - const std::string ReferrerVarName = - ReferrerVar->getDecl()->getDeclName().getAsString(); - Out << " is still referred to by the " << ReferrerMemorySpace - << " variable '" << ReferrerVarName << "' " << CommonSuffix; + Out << " is still referred to by the " << *ReferrerVariable << CommonSuffix; auto Report = std::make_unique(*BT_stackleak, Out.str(), N); if (Range.isValid()) diff --git a/clang/test/Analysis/stack-addr-ps.c b/clang/test/Analysis/stack-addr-ps.c index e69ab4189b524f..2e14b7820be136 100644 --- a/clang/test/Analysis/stack-addr-ps.c +++ b/clang/test/Analysis/stack-addr-ps.c @@ -95,3 +95,34 @@ void callTestRegister(void) { char buf[20]; testRegister(buf); // no-warning } + +void top_level_leaking(int **out) { + int local = 42; + *out = &local; // no-warning FIXME +} + +void callee_leaking_via_param(int **out) { + int local = 1; + *out = &local; + // expected-warning@-1{{Address of stack memory associated with local variable 'local' is still referred to by the stack variable 'ptr'}} +} + +void caller_for_leaking_callee() { + int *ptr = 0; + callee_leaking_via_param(&ptr); +} + +void callee_nested_leaking(int **out) { + int local = 1; + *out = &local; + // expected-warning@-1{{Address of stack memory associated with local variable 'local' is still referred to by the stack variable 'ptr'}} +} + +void caller_mid_for_nested_leaking(int **mid) { + callee_nested_leaking(mid); +} + +void caller_for_nested_leaking() { + int *ptr = 0; + caller_mid_for_nested_leaking(&ptr); +} diff --git a/clang/test/Analysis/stack-addr-ps.cpp b/clang/test/Analysis/stack-addr-ps.cpp index bd856be2b8d690..95a6e3cbd25c7c 100644 --- a/clang/test/Analysis/stack-addr-ps.cpp +++ b/clang/test/Analysis/stack-addr-ps.cpp @@ -1,7 +1,10 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core -verify %s -Wno-undefined-bool-conversion +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify %s -Wno-undefined-bool-conversion typedef __INTPTR_TYPE__ intptr_t; +template +void clang_analyzer_dump(T x); + const int& g() { int s; return s; // expected-warning{{Address of stack memory associated with local variable 's' returned}} expected-warning{{reference to stack memory associated with local variable 's' returned}} @@ -161,3 +164,619 @@ C make1() { void test_copy_elision() { C c1 = make1(); } + +namespace leaking_via_direct_pointer { +void* returned_direct_pointer_top() { + int local = 42; + int* p = &local; + return p; // expected-warning{{associated with local variable 'local' returned}} +} + +int* returned_direct_pointer_callee() { + int local = 42; + int* p = &local; + return p; // expected-warning{{associated with local variable 'local' returned}} +} + +void returned_direct_pointer_caller() { + int* loc_ptr = nullptr; + loc_ptr = returned_direct_pointer_callee(); + (void)loc_ptr; +} + +void* global_ptr; + +void global_direct_pointer() { + int local = 42; + global_ptr = &local; +} // expected-warning{{local variable 'local' is still referred to by the global variable 'global_ptr'}} + +void static_direct_pointer_top() { + int local = 42; + static int* p = &local; + (void)p; +} // expected-warning{{local variable 'local' is still referred to by the static variable 'p'}} + +void static_direct_pointer_callee() { + int local = 42; + static int* p = &local; + (void)p; // expected-warning{{local variable 'local' is still referred to by the static variable 'p'}} +} + +void static_direct_pointer_caller() { + static_direct_pointer_callee(); +} + +void lambda_to_global_direct_pointer() { + auto lambda = [&] { + int local = 42; + global_ptr = &local; // expected-warning{{local variable 'local' is still referred to by the global variable 'global_ptr'}} + }; + lambda(); +} + +void lambda_to_context_direct_pointer() { + int *p = nullptr; + auto lambda = [&] { + int local = 42; + p = &local; // expected-warning{{local variable 'local' is still referred to by the stack variable 'p'}} + }; + lambda(); + (void)p; +} + +template +class MyFunction { + Callable* fptr; + public: + MyFunction(Callable* callable) :fptr(callable) {} +}; + +void* lambda_to_context_direct_pointer_uncalled() { + int *p = nullptr; + auto lambda = [&] { + int local = 42; + p = &local; // no-warning: analyzed only as top-level, ignored explicitly by the checker + }; + return new MyFunction(&lambda); +} + +void lambda_to_context_direct_pointer_lifetime_extended() { + int *p = nullptr; + auto lambda = [&] { + int&& local = 42; + p = &local; // expected-warning{{'int' lifetime extended by local variable 'local' is still referred to by the stack variable 'p'}} + }; + lambda(); + (void)p; +} + +template +void lambda_param_capture_direct_pointer_callee(Callback& callee) { + int local = 42; + callee(local); // expected-warning{{'local' is still referred to by the stack variable 'p'}} +} + +void lambda_param_capture_direct_pointer_caller() { + int* p = nullptr; + auto capt = [&p](int& param) { + p = ¶m; + }; + lambda_param_capture_direct_pointer_callee(capt); +} +} // namespace leaking_via_direct_pointer + +namespace leaking_via_ptr_to_ptr { +void** returned_ptr_to_ptr_top() { + int local = 42; + int* p = &local; + void** pp = (void**)&p; + return pp; // expected-warning{{associated with local variable 'p' returned}} +} + +void** global_pp; + +void global_ptr_local_to_ptr() { + int local = 42; + int* p = &local; + global_pp = (void**)&p; +} // expected-warning{{local variable 'p' is still referred to by the global variable 'global_pp'}} + +void global_ptr_to_ptr() { + int local = 42; + *global_pp = &local; // no-warning FIXME +} + +void *** global_ppp; + +void global_ptr_to_ptr_to_ptr() { + int local = 42; + **global_ppp = &local; // no-warning FIXME +} + +void** get_some_pp(); + +void static_ptr_to_ptr() { + int local = 42; + static void** pp = get_some_pp(); + *pp = &local; +} // no-warning False Negative, requires relating multiple bindings to cross the invented pointer. + +void param_ptr_to_ptr_top(void** pp) { + int local = 42; + *pp = &local; // no-warning FIXME +} + +void param_ptr_to_ptr_callee(void** pp) { + int local = 42; + *pp = &local; // expected-warning{{local variable 'local' is still referred to by the stack variable 'p'}} +} + +void param_ptr_to_ptr_caller() { + void* p = nullptr; + param_ptr_to_ptr_callee((void**)&p); +} + +void param_ptr_to_ptr_to_ptr_top(void*** ppp) { + int local = 42; + **ppp = &local; // no-warning FIXME +} + +void param_ptr_to_ptr_to_ptr_callee(void*** ppp) { + int local = 42; + **ppp = &local; // expected-warning{{local variable 'local' is still referred to by the stack variable 'pp'}} +} + +void param_ptr_to_ptr_to_ptr_caller(void** pp) { + param_ptr_to_ptr_to_ptr_callee(&pp); +} + +void lambda_to_context_ptr_to_ptr(int **pp) { + auto lambda = [&] { + int local = 42; + *pp = &local; // expected-warning{{local variable 'local' is still referred to by the stack variable 'pp'}} + }; + lambda(); + (void)*pp; +} + +void param_ptr_to_ptr_fptr(int **pp) { + int local = 42; + *pp = &local; // expected-warning{{local variable 'local' is still referred to by the stack variable 'p'}} +} + +void param_ptr_to_ptr_fptr_caller(void (*fptr)(int**)) { + int* p = nullptr; + fptr(&p); +} + +void param_ptr_to_ptr_caller_caller() { + void (*fptr)(int**) = param_ptr_to_ptr_fptr; + param_ptr_to_ptr_fptr_caller(fptr); +} +} // namespace leaking_via_ptr_to_ptr + +namespace leaking_via_ref_to_ptr { +void** make_ptr_to_ptr(); +void*& global_rtp = *make_ptr_to_ptr(); + +void global_ref_to_ptr() { + int local = 42; + int* p = &local; + global_rtp = p; // no-warning FIXME +} + +void static_ref_to_ptr() { + int local = 42; + static void*& p = *make_ptr_to_ptr(); + p = &local; + (void)p; +} // no-warning False Negative, requires relating multiple bindings to cross the invented pointer. + +void param_ref_to_ptr_top(void*& rp) { + int local = 42; + int* p = &local; + rp = p; // no-warning FIXME +} + +void param_ref_to_ptr_callee(void*& rp) { + int local = 42; + int* p = &local; + rp = p; // expected-warning{{local variable 'local' is still referred to by the stack variable 'p'}} +} + +void param_ref_to_ptr_caller() { + void* p = nullptr; + param_ref_to_ptr_callee(p); +} +} // namespace leaking_via_ref_to_ptr + +namespace leaking_via_arr_of_ptr_static_idx { +void** returned_arr_of_ptr_top() { + int local = 42; + int* p = &local; + void** arr = new void*[2]; + arr[1] = p; + return arr; +} // no-warning False Negative + +void** returned_arr_of_ptr_callee() { + int local = 42; + int* p = &local; + void** arr = new void*[2]; + arr[1] = p; + return arr; +} // no-warning False Negative + +void returned_arr_of_ptr_caller() { + void** arr = returned_arr_of_ptr_callee(); + (void)arr[1]; +} + +void* global_aop[2]; + +void global_arr_of_ptr() { + int local = 42; + int* p = &local; + global_aop[1] = p; +} // expected-warning{{local variable 'local' is still referred to by the global variable 'global_aop'}} + +void static_arr_of_ptr() { + int local = 42; + static void* arr[2]; + arr[1] = &local; + (void)arr[1]; +} // expected-warning{{local variable 'local' is still referred to by the static variable 'arr'}} + +void param_arr_of_ptr_top(void* arr[2]) { + int local = 42; + int* p = &local; + arr[1] = p; // no-warning FIXME +} + +void param_arr_of_ptr_callee(void* arr[2]) { + int local = 42; + int* p = &local; + arr[1] = p; // expected-warning{{local variable 'local' is still referred to by the stack variable 'arrStack'}} +} + +void param_arr_of_ptr_caller() { + void* arrStack[2]; + param_arr_of_ptr_callee(arrStack); + (void)arrStack[1]; +} +} // namespace leaking_via_arr_of_ptr_static_idx + +namespace leaking_via_arr_of_ptr_dynamic_idx { +void** returned_arr_of_ptr_top(int idx) { + int local = 42; + int* p = &local; + void** arr = new void*[2]; + arr[idx] = p; + return arr; +} // no-warning False Negative + +void** returned_arr_of_ptr_callee(int idx) { + int local = 42; + int* p = &local; + void** arr = new void*[2]; + arr[idx] = p; + return arr; +} // no-warning False Negative + +void returned_arr_of_ptr_caller(int idx) { + void** arr = returned_arr_of_ptr_callee(idx); + (void)arr[idx]; +} + +void* global_aop[2]; + +void global_arr_of_ptr(int idx) { + int local = 42; + int* p = &local; + global_aop[idx] = p; +} // expected-warning{{local variable 'local' is still referred to by the global variable 'global_aop'}} + +void static_arr_of_ptr(int idx) { + int local = 42; + static void* arr[2]; + arr[idx] = &local; + (void)arr[idx]; +} // expected-warning{{local variable 'local' is still referred to by the static variable 'arr'}} + +void param_arr_of_ptr_top(void* arr[2], int idx) { + int local = 42; + int* p = &local; + arr[idx] = p; // no-warning FIXME +} + +void param_arr_of_ptr_callee(void* arr[2], int idx) { + int local = 42; + int* p = &local; + arr[idx] = p; // expected-warning{{local variable 'local' is still referred to by the stack variable 'arrStack'}} +} + +void param_arr_of_ptr_caller(int idx) { + void* arrStack[2]; + param_arr_of_ptr_callee(arrStack, idx); + (void)arrStack[idx]; +} +} // namespace leaking_via_arr_of_ptr_dynamic_idx + +namespace leaking_via_struct_with_ptr { +struct S { + int* p; +}; + +S returned_struct_with_ptr_top() { + int local = 42; + S s; + s.p = &local; + return s; +} // no-warning False Negative, requires traversing returned LazyCompoundVals + +S returned_struct_with_ptr_callee() { + int local = 42; + S s; + s.p = &local; + return s; // expected-warning{{'local' is still referred to by the stack variable 's'}} +} + +void returned_struct_with_ptr_caller() { + S s = returned_struct_with_ptr_callee(); + (void)s.p; +} + +S global_s; + +void global_struct_with_ptr() { + int local = 42; + global_s.p = &local; +} // expected-warning{{'local' is still referred to by the global variable 'global_s'}} + +void static_struct_with_ptr() { + int local = 42; + static S s; + s.p = &local; + (void)s.p; +} // expected-warning{{'local' is still referred to by the static variable 's'}} +} // namespace leaking_via_struct_with_ptr + +namespace leaking_via_ref_to_struct_with_ptr { +struct S { + int* p; +}; + +S &global_s = *(new S); + +void global_ref_to_struct_with_ptr() { + int local = 42; + global_s.p = &local; // no-warning FIXME +} + +void static_ref_to_struct_with_ptr() { + int local = 42; + static S &s = *(new S); + s.p = &local; + (void)s.p; +} // no-warning False Negative, requires relating multiple bindings to cross a heap region. + +void param_ref_to_struct_with_ptr_top(S &s) { + int local = 42; + s.p = &local; // no-warning FIXME +} + +void param_ref_to_struct_with_ptr_callee(S &s) { + int local = 42; + s.p = &local; // expected-warning{{'local' is still referred to by the stack variable 'sStack'}} +} + +void param_ref_to_struct_with_ptr_caller() { + S sStack; + param_ref_to_struct_with_ptr_callee(sStack); +} + +template +void lambda_param_capture_callee(Callable& callee) { + int local = 42; + callee(local); // expected-warning{{'local' is still referred to by the stack variable 'p'}} +} + +void lambda_param_capture_caller() { + int* p = nullptr; + auto capt = [&p](int& param) { + p = ¶m; + }; + lambda_param_capture_callee(capt); +} +} // namespace leaking_via_ref_to_struct_with_ptr + +namespace leaking_via_ptr_to_struct_with_ptr { +struct S { + int* p; +}; + +S* returned_ptr_to_struct_with_ptr_top() { + int local = 42; + S* s = new S; + s->p = &local; + return s; +} // no-warning False Negative + +S* returned_ptr_to_struct_with_ptr_callee() { + int local = 42; + S* s = new S; + s->p = &local; + return s; +} // no-warning False Negative + +void returned_ptr_to_struct_with_ptr_caller() { + S* s = returned_ptr_to_struct_with_ptr_callee(); + (void)s->p; +} + +S* global_s; + +void global_ptr_to_struct_with_ptr() { + int local = 42; + global_s->p = &local; // no-warning FIXME +} + +void static_ptr_to_struct_with_ptr_new() { + int local = 42; + static S* s = new S; + s->p = &local; + (void)s->p; +} // no-warning False Negative, requires relating multiple bindings to cross a heap region. + +S* get_some_s(); + +void static_ptr_to_struct_with_ptr_generated() { + int local = 42; + static S* s = get_some_s(); + s->p = &local; +} // no-warning False Negative, requires relating multiple bindings to cross the invented pointer. + +void param_ptr_to_struct_with_ptr_top(S* s) { + int local = 42; + s->p = &local; // no-warning FIXME +} + +void param_ptr_to_struct_with_ptr_callee(S* s) { + int local = 42; + s->p = &local; // expected-warning{{'local' is still referred to by the stack variable 's'}} +} + +void param_ptr_to_struct_with_ptr_caller() { + S s; + param_ptr_to_struct_with_ptr_callee(&s); + (void)s.p; +} +} // namespace leaking_via_ptr_to_struct_with_ptr + +namespace leaking_via_arr_of_struct_with_ptr { +struct S { + int* p; +}; + +S* returned_ptr_to_struct_with_ptr_top() { + int local = 42; + S* s = new S[2]; + s[1].p = &local; + return s; +} // no-warning False Negative + +S* returned_ptr_to_struct_with_ptr_callee() { + int local = 42; + S* s = new S[2]; + s[1].p = &local; + return s; +} // no-warning False Negative + +void returned_ptr_to_struct_with_ptr_caller() { + S* s = returned_ptr_to_struct_with_ptr_callee(); + (void)s[1].p; +} + +S global_s[2]; + +void global_ptr_to_struct_with_ptr() { + int local = 42; + global_s[1].p = &local; +} // expected-warning{{'local' is still referred to by the global variable 'global_s'}} + +void static_ptr_to_struct_with_ptr_new() { + int local = 42; + static S* s = new S[2]; + s[1].p = &local; + (void)s[1].p; +} + +S* get_some_s(); + +void static_ptr_to_struct_with_ptr_generated() { + int local = 42; + static S* s = get_some_s(); + s[1].p = &local; +} // no-warning False Negative, requires relating multiple bindings to cross the invented pointer. + +void param_ptr_to_struct_with_ptr_top(S s[2]) { + int local = 42; + s[1].p = &local; // no-warning FIXME +} + +void param_ptr_to_struct_with_ptr_callee(S s[2]) { + int local = 42; + s[1].p = &local; // expected-warning{{'local' is still referred to by the stack variable 's'}} +} + +void param_ptr_to_struct_with_ptr_caller() { + S s[2]; + param_ptr_to_struct_with_ptr_callee(s); + (void)s[1].p; +} +} // namespace leaking_via_arr_of_struct_with_ptr + +namespace leaking_via_nested_and_indirect { +struct NestedAndTransitive { + int** p; + NestedAndTransitive* next[3]; +}; + +NestedAndTransitive global_nat; + +void global_nested_and_transitive() { + int local = 42; + *global_nat.next[2]->next[1]->p = &local; // no-warning FIXME +} + +void param_nested_and_transitive_top(NestedAndTransitive* nat) { + int local = 42; + *nat->next[2]->next[1]->p = &local; // no-warning FIXME +} + +void param_nested_and_transitive_callee(NestedAndTransitive* nat) { + int local = 42; + *nat->next[2]->next[1]->p = &local; // expected-warning{{local variable 'local' is still referred to by the stack variable 'natCaller'}} +} + +void param_nested_and_transitive_caller(NestedAndTransitive natCaller) { + param_nested_and_transitive_callee(&natCaller); +} + +} // namespace leaking_via_nested_and_indirect + +namespace leaking_as_member { +class CRef { + int& ref; // expected-note{{reference member declared here}} + CRef(int x) : ref(x) {} + // expected-warning@-1 {{binding reference member 'ref' to stack allocated parameter 'x'}} +}; + +class CPtr { + int* ptr; + void memFun(int x) { + ptr = &x; + } +}; +} // namespace leaking_as_member + +namespace origin_region_limitation { +void leaker(int ***leakerArg) { + int local; + clang_analyzer_dump(*leakerArg); // expected-warning{{&SymRegion{reg_$0}}} + // Incorrect message: 'arg', after it is reinitialized with value returned by 'tweak' + // is no longer relevant. + // The message must refer to 'original_arg' instead, but there is no easy way to + // connect the SymRegion stored in 'original_arg' and 'original_arg' as variable. + **leakerArg = &local; // expected-warning{{ 'local' is still referred to by the stack variable 'arg'}} +} + +int **tweak(); + +void foo(int **arg) { + int **original_arg = arg; + arg = tweak(); + leaker(&original_arg); +} +} // namespace origin_region_limitation diff --git a/clang/test/SemaCXX/cxx2c-pack-indexing.cpp b/clang/test/SemaCXX/cxx2c-pack-indexing.cpp index 9ea90a4c3e30fd..7d7e808746217f 100644 --- a/clang/test/SemaCXX/cxx2c-pack-indexing.cpp +++ b/clang/test/SemaCXX/cxx2c-pack-indexing.cpp @@ -231,3 +231,31 @@ struct type_info { namespace GH93650 { auto func(auto... inputArgs) { return typeid(inputArgs...[0]); } } // namespace GH93650 + + +namespace GH105900 { + +template +struct types { + template + static constexpr __SIZE_TYPE__ get_index() { return idx; } + + template + static auto x() -> opts...[get_index()] {} +}; + +template +struct vars { + template + static constexpr __SIZE_TYPE__ get_index() { return idx; } + + template + static auto x() -> decltype(opts...[get_index()]) {return 0;} +}; + +void f() { + types::x<0>(); + vars<0>::x<0>(); +} + +} diff --git a/clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatible.hlsl b/clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatible.hlsl new file mode 100644 index 00000000000000..db46a8e1414953 --- /dev/null +++ b/clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatible.hlsl @@ -0,0 +1,132 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -verify %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -fnative-half-type -verify %s +// expected-no-diagnostics + +// Case 1: How many ways can I come up with to represent three float values? +struct ThreeFloats1 { + float X, Y, Z; +}; + +struct ThreeFloats2 { + float X[3]; +}; + +struct ThreeFloats3 { + float3 V; +}; + +struct ThreeFloats4 { + float2 V; + float F; +}; + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(float3, float[3]), ""); +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(float3, ThreeFloats1), ""); +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(float3, ThreeFloats2), ""); +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(float3, ThreeFloats3), ""); +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(float3, ThreeFloats4), ""); + +// Case 2: structs and base classes and arrays, oh my! +struct Dog { + int Leg[4]; + bool Tail; + float Fur; +}; + +struct Shiba { + int4 StubbyLegs; + bool CurlyTail; + struct Coating { + float Fur; + } F; +}; + +struct FourLegged { + int FR, FL, BR, BL; +}; + +struct Doggo : FourLegged { + bool WaggyBit; + float Fuzz; +}; + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Dog, Shiba), ""); +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Dog, Doggo), ""); + +// Case 3: Arrays of structs inside structs + +struct Cat { + struct Leg { + int L; + } Legs[4]; + struct Other { + bool Tail; + float Furs; + } Bits; +}; + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Dog, Cat), ""); + +// case 4: Arrays of structs inside arrays of structs. +struct Pets { + Dog Puppers[6]; + Cat Kitties[4]; +}; + +struct Animals { + Dog Puppers[2]; + Cat Kitties[8]; +}; + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Pets, Animals), ""); + +// Case 5: Turtles all the way down... + +typedef int Turtle; + +enum Ninja : Turtle { + Leonardo, + Donatello, + Michelangelo, + Raphael, +}; + +enum NotNinja : Turtle { + Fred, + Mikey, +}; + +enum Mammals : uint { + Dog, + Cat, +}; + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Ninja, NotNinja), ""); +_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(Ninja, Mammals), ""); + +// Case 6: Some basic types. +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(int, int32_t), ""); +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(uint, uint32_t), ""); +_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(int, uint), ""); +_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(int, float), ""); + +// Even though half and float may be the same size we don't want them to be +// layout compatible since they are different types. +_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(half, float), ""); + +// Case 6: Empty classes... because they're fun. + +struct NotEmpty { int X; }; +struct Empty {}; +struct AlsoEmpty {}; + +struct DerivedEmpty : Empty {}; + +struct DerivedNotEmpty : Empty { int X; }; +struct DerivedEmptyNotEmptyBase : NotEmpty {}; + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Empty, AlsoEmpty), ""); +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Empty, DerivedEmpty), ""); + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(NotEmpty, DerivedNotEmpty), ""); +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(NotEmpty, DerivedEmptyNotEmptyBase), ""); diff --git a/clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatibleErrors.hlsl b/clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatibleErrors.hlsl new file mode 100644 index 00000000000000..4c96795da7fd0c --- /dev/null +++ b/clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatibleErrors.hlsl @@ -0,0 +1,64 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -verify %s + +// Some things that don't work! + +// Case 1: Both types must be complete! +struct Defined { + int X; +}; + + +struct Undefined; // expected-note {{forward declaration of 'Undefined'}} + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Undefined, Defined), ""); // expected-error{{incomplete type 'Undefined' where a complete type is required}} + +// Case 2: No variable length arrays! + +void fn(int X) { + // expected-error@#vla {{variable length arrays are not supported for the current target}} + // expected-error@#vla {{variable length arrays are not supported in '__builtin_hlsl_is_scalarized_layout_compatible'}} + // expected-error@#vla {{static assertion failed due to requirement '__builtin_hlsl_is_scalarized_layout_compatible(int[4], int[X])'}} + // expected-warning@#vla {{variable length arrays in C++ are a Clang extension}} + _Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(int[4], int[X]), ""); // #vla +} + +// Case 3: Make this always fail for unions. +// HLSL doesn't really support unions, and the places where scalarized layouts +// are valid is probably going to be really confusing for unions, so we should +// just make sure unions are never scalarized compatible with anything other +// than themselves. + +union Wah { + int OhNo; + float NotAgain; +}; + +struct OneInt { + int I; +}; + +struct OneFloat { + float F; +}; + +struct HasUnion { + int I; + Wah W; +}; + +struct HasUnionSame { + int I; + Wah W; +}; + +struct HasUnionDifferent { + Wah W; + int I; +}; + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Wah, Wah), "Identical types are always compatible"); +_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(Wah, OneInt), "Unions are not compatible with anything else"); +_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(Wah, OneFloat), "Unions are not compatible with anything else"); + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(HasUnion, HasUnionSame), ""); +_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(HasUnion, HasUnionDifferent), ""); diff --git a/compiler-rt/lib/rtsan/tests/CMakeLists.txt b/compiler-rt/lib/rtsan/tests/CMakeLists.txt index 3b783c90c26585..0320bbad592186 100644 --- a/compiler-rt/lib/rtsan/tests/CMakeLists.txt +++ b/compiler-rt/lib/rtsan/tests/CMakeLists.txt @@ -60,14 +60,13 @@ endif() foreach(arch ${RTSAN_TEST_ARCH}) set(RtsanTestObjects) - # TODO: Re-enable once -fsanitize=realtime exists in clang driver - #generate_compiler_rt_tests(RtsanTestObjects - # RtsanUnitTests "Rtsan-${arch}-Test" ${arch} - # COMPILE_DEPS ${RTSAN_UNITTEST_HEADERS} - # SOURCES ${RTSAN_INST_TEST_SOURCES} ${COMPILER_RT_GOOGLETEST_SOURCES} - # DEPS rtsan - # CFLAGS ${RTSAN_UNITTEST_CFLAGS} -fsanitize=realtime - # LINK_FLAGS ${RTSAN_UNITTEST_LINK_FLAGS} -fsanitize=realtime) + generate_compiler_rt_tests(RtsanTestObjects + RtsanUnitTests "Rtsan-${arch}-Test" ${arch} + COMPILE_DEPS ${RTSAN_UNITTEST_HEADERS} + SOURCES ${RTSAN_INST_TEST_SOURCES} ${COMPILER_RT_GOOGLETEST_SOURCES} + DEPS rtsan + CFLAGS ${RTSAN_UNITTEST_CFLAGS} -fsanitize=realtime + LINK_FLAGS ${RTSAN_UNITTEST_LINK_FLAGS} -fsanitize=realtime) set(RTSAN_TEST_RUNTIME RTRtsanTest.${arch}) if(APPLE) diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp index 97afb1eefb6401..6e7ab016a4c6b2 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp @@ -145,7 +145,7 @@ TEST(TestRtsan, CopyingALambdaWithLargeCaptureDiesWhenRealtime) { auto lambda = [lots_of_data]() mutable { // Stop everything getting optimised out lots_of_data[3] = 0.25f; - EXPECT_EQ(16, lots_of_data.size()); + EXPECT_EQ(16u, lots_of_data.size()); EXPECT_EQ(0.25f, lots_of_data[3]); }; auto Func = [&]() { InvokeStdFunction(lambda); }; @@ -156,11 +156,17 @@ TEST(TestRtsan, CopyingALambdaWithLargeCaptureDiesWhenRealtime) { TEST(TestRtsan, AccessingALargeAtomicVariableDiesWhenRealtime) { std::atomic small_atomic{0.0f}; ASSERT_TRUE(small_atomic.is_lock_free()); - RealtimeInvoke([&small_atomic]() { float x = small_atomic.load(); }); + RealtimeInvoke([&small_atomic]() { + float x = small_atomic.load(); + return x; + }); std::atomic> large_atomic; ASSERT_FALSE(large_atomic.is_lock_free()); - auto Func = [&]() { auto x = large_atomic.load(); }; + auto Func = [&]() { + std::array x = large_atomic.load(); + return x; + }; ExpectRealtimeDeath(Func); ExpectNonRealtimeSurvival(Func); } diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp index 8861104068c8e9..5b88cf64612942 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp @@ -184,19 +184,25 @@ TEST_F(RtsanFileTest, OpenatDiesWhenRealtime) { ExpectNonRealtimeSurvival(func); } -TEST_F(RtsanFileTest, OpenCreatesFileWithProperMode) { - const int mode = S_IRGRP | S_IROTH | S_IRUSR | S_IWUSR; - - const int fd = open(GetTemporaryFilePath(), O_CREAT | O_WRONLY, mode); - ASSERT_THAT(fd, Ne(-1)); - close(fd); - - struct stat st; - ASSERT_THAT(stat(GetTemporaryFilePath(), &st), Eq(0)); - - // Mask st_mode to get permission bits only - ASSERT_THAT(st.st_mode & 0777, Eq(mode)); -} +// FIXME: This fails on the build machines, but not locally! +// see https://github.com/llvm/llvm-project/pull/105732#issuecomment-2310286530 +// Value of: st.st_mode & 0777 +// Expected: is equal to 420 +// Actual: 384 +// TEST_F(RtsanFileTest, OpenCreatesFileWithProperMode) { +// const int mode = S_IRGRP | S_IROTH | S_IRUSR | S_IWUSR; +// +// const int fd = open(GetTemporaryFilePath(), O_CREAT | O_WRONLY, mode); +// ASSERT_THAT(fd, Ne(-1)); +// close(fd); +// +// struct stat st; +// ASSERT_THAT(stat(GetTemporaryFilePath(), &st), Eq(0)); +// +// // Mask st_mode to get permission bits only +// +// //ASSERT_THAT(st.st_mode & 0777, Eq(mode)); FAILED ASSERTION +// } TEST_F(RtsanFileTest, CreatDiesWhenRealtime) { auto func = [this]() { creat(GetTemporaryFilePath(), S_IWOTH | S_IROTH); }; @@ -321,7 +327,7 @@ TEST(TestRtsanInterceptors, PthreadCreateDiesWhenRealtime) { auto Func = []() { pthread_t thread{}; const pthread_attr_t attr{}; - struct thread_info *thread_info; + struct thread_info *thread_info{}; pthread_create(&thread, &attr, &FakeThreadEntryPoint, thread_info); }; ExpectRealtimeDeath(Func, "pthread_create"); diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h index 27f8697db7838f..985e2392641ae2 100644 --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -72,6 +72,15 @@ namespace { struct CachedBlock { static constexpr u16 CacheIndexMax = UINT16_MAX; static constexpr u16 InvalidEntry = CacheIndexMax; + // * MaxReleasedCachePages default is currently 4 + // - We arrived at this value after noticing that mapping + // in larger memory regions performs better than releasing + // memory and forcing a cache hit. According to the data, + // it suggests that beyond 4 pages, the release execution time is + // longer than the map execution time. In this way, the default + // is dependent on the platform. + // TODO: set MaxReleasedCachePages back to 4U + static constexpr uptr MaxReleasedCachePages = 0U; uptr CommitBase = 0; uptr CommitSize = 0; @@ -90,8 +99,9 @@ struct CachedBlock { template class MapAllocatorNoCache { public: void init(UNUSED s32 ReleaseToOsInterval) {} - CachedBlock retrieve(UNUSED uptr Size, UNUSED uptr Alignment, - UNUSED uptr HeadersSize, UNUSED uptr &EntryHeaderPos) { + CachedBlock retrieve(UNUSED uptr MaxAllowedFragmentedBytes, UNUSED uptr Size, + UNUSED uptr Alignment, UNUSED uptr HeadersSize, + UNUSED uptr &EntryHeaderPos) { return {}; } void store(UNUSED Options Options, UNUSED uptr CommitBase, @@ -121,7 +131,7 @@ template class MapAllocatorNoCache { } }; -static const uptr MaxUnusedCachePages = 4U; +static const uptr MaxUnreleasedCachePages = 4U; template bool mapSecondary(const Options &Options, uptr CommitBase, uptr CommitSize, @@ -151,9 +161,11 @@ bool mapSecondary(const Options &Options, uptr CommitBase, uptr CommitSize, } } - const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * PageSize; - if (useMemoryTagging(Options) && CommitSize > MaxUnusedCacheBytes) { - const uptr UntaggedPos = Max(AllocPos, CommitBase + MaxUnusedCacheBytes); + const uptr MaxUnreleasedCacheBytes = MaxUnreleasedCachePages * PageSize; + if (useMemoryTagging(Options) && + CommitSize > MaxUnreleasedCacheBytes) { + const uptr UntaggedPos = + Max(AllocPos, CommitBase + MaxUnreleasedCacheBytes); return MemMap.remap(CommitBase, UntaggedPos - CommitBase, "scudo:secondary", MAP_MEMTAG | Flags) && MemMap.remap(UntaggedPos, CommitBase + CommitSize - UntaggedPos, @@ -334,13 +346,13 @@ class MapAllocatorCache { } } - CachedBlock retrieve(uptr Size, uptr Alignment, uptr HeadersSize, - uptr &EntryHeaderPos) EXCLUDES(Mutex) { + CachedBlock retrieve(uptr MaxAllowedFragmentedPages, uptr Size, + uptr Alignment, uptr HeadersSize, uptr &EntryHeaderPos) + EXCLUDES(Mutex) { const uptr PageSize = getPageSizeCached(); // 10% of the requested size proved to be the optimal choice for // retrieving cached blocks after testing several options. constexpr u32 FragmentedBytesDivisor = 10; - bool Found = false; CachedBlock Entry; EntryHeaderPos = 0; { @@ -348,47 +360,100 @@ class MapAllocatorCache { CallsToRetrieve++; if (EntriesCount == 0) return {}; - u32 OptimalFitIndex = 0; + u16 RetrievedIndex = CachedBlock::InvalidEntry; uptr MinDiff = UINTPTR_MAX; - for (u32 I = LRUHead; I != CachedBlock::InvalidEntry; + + // Since allocation sizes don't always match cached memory chunk sizes + // we allow some memory to be unused (called fragmented bytes). The + // amount of unused bytes is exactly EntryHeaderPos - CommitBase. + // + // CommitBase CommitBase + CommitSize + // V V + // +---+------------+-----------------+---+ + // | | | | | + // +---+------------+-----------------+---+ + // ^ ^ ^ + // Guard EntryHeaderPos Guard-page-end + // page-begin + // + // [EntryHeaderPos, CommitBase + CommitSize) contains the user data as + // well as the header metadata. If EntryHeaderPos - CommitBase exceeds + // MaxAllowedFragmentedPages * PageSize, the cached memory chunk is + // not considered valid for retrieval. + for (u16 I = LRUHead; I != CachedBlock::InvalidEntry; I = Entries[I].Next) { const uptr CommitBase = Entries[I].CommitBase; const uptr CommitSize = Entries[I].CommitSize; const uptr AllocPos = roundDown(CommitBase + CommitSize - Size, Alignment); const uptr HeaderPos = AllocPos - HeadersSize; + const uptr MaxAllowedFragmentedBytes = + MaxAllowedFragmentedPages * PageSize; if (HeaderPos > CommitBase + CommitSize) continue; + // TODO: Remove AllocPos > CommitBase + MaxAllowedFragmentedBytes + // and replace with Diff > MaxAllowedFragmentedBytes if (HeaderPos < CommitBase || - AllocPos > CommitBase + PageSize * MaxUnusedCachePages) { + AllocPos > CommitBase + MaxAllowedFragmentedBytes) { continue; } - Found = true; - const uptr Diff = HeaderPos - CommitBase; - // immediately use a cached block if it's size is close enough to the - // requested size. - const uptr MaxAllowedFragmentedBytes = - (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor; - if (Diff <= MaxAllowedFragmentedBytes) { - OptimalFitIndex = I; - EntryHeaderPos = HeaderPos; - break; - } - // keep track of the smallest cached block + + const uptr Diff = roundDown(HeaderPos, PageSize) - CommitBase; + + // Keep track of the smallest cached block // that is greater than (AllocSize + HeaderSize) - if (Diff > MinDiff) + if (Diff >= MinDiff) continue; - OptimalFitIndex = I; + MinDiff = Diff; + RetrievedIndex = I; EntryHeaderPos = HeaderPos; + + // Immediately use a cached block if its size is close enough to the + // requested size + const uptr OptimalFitThesholdBytes = + (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor; + if (Diff <= OptimalFitThesholdBytes) + break; } - if (Found) { - Entry = Entries[OptimalFitIndex]; - remove(OptimalFitIndex); + if (RetrievedIndex != CachedBlock::InvalidEntry) { + Entry = Entries[RetrievedIndex]; + remove(RetrievedIndex); SuccessfulRetrieves++; } } + // The difference between the retrieved memory chunk and the request + // size is at most MaxAllowedFragmentedPages + // + // / MaxAllowedFragmentedPages * PageSize \ + // +--------------------------+-------------+ + // | | | + // +--------------------------+-------------+ + // \ Bytes to be released / ^ + // | + // (may or may not be committed) + // + // The maximum number of bytes released to the OS is capped by + // MaxReleasedCachePages + // + // TODO : Consider making MaxReleasedCachePages configurable since + // the release to OS API can vary across systems. + if (Entry.Time != 0) { + const uptr FragmentedBytes = + roundDown(EntryHeaderPos, PageSize) - Entry.CommitBase; + const uptr MaxUnreleasedCacheBytes = MaxUnreleasedCachePages * PageSize; + if (FragmentedBytes > MaxUnreleasedCacheBytes) { + const uptr MaxReleasedCacheBytes = + CachedBlock::MaxReleasedCachePages * PageSize; + uptr BytesToRelease = + roundUp(Min(MaxReleasedCacheBytes, + FragmentedBytes - MaxUnreleasedCacheBytes), + PageSize); + Entry.MemMap.releaseAndZeroPagesToOS(Entry.CommitBase, BytesToRelease); + } + } + return Entry; } @@ -659,8 +724,13 @@ MapAllocator::tryAllocateFromCache(const Options &Options, uptr Size, FillContentsMode FillContents) { CachedBlock Entry; uptr EntryHeaderPos; + uptr MaxAllowedFragmentedPages = MaxUnreleasedCachePages; + + if (UNLIKELY(useMemoryTagging(Options))) + MaxAllowedFragmentedPages += CachedBlock::MaxReleasedCachePages; - Entry = Cache.retrieve(Size, Alignment, getHeadersSize(), EntryHeaderPos); + Entry = Cache.retrieve(MaxAllowedFragmentedPages, Size, Alignment, + getHeadersSize(), EntryHeaderPos); if (!Entry.isValid()) return nullptr; diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp index e85b6abdb36d22..3638f1c36ddd9b 100644 --- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp @@ -281,8 +281,8 @@ struct MapAllocatorCacheTest : public Test { std::unique_ptr Cache = std::make_unique(); const scudo::uptr PageSize = scudo::getPageSizeCached(); - // The current test allocation size is set to the minimum size - // needed for the scudo allocator to fall back to the secondary allocator + // The current test allocation size is set to the maximum + // cache entry size static constexpr scudo::uptr TestAllocSize = CacheConfig::getDefaultMaxEntrySize(); @@ -327,7 +327,7 @@ TEST_F(MapAllocatorCacheTest, CacheOrder) { for (scudo::uptr I = CacheConfig::getEntriesArraySize(); I > 0; I--) { scudo::uptr EntryHeaderPos; scudo::CachedBlock Entry = - Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos); + Cache->retrieve(0, TestAllocSize, PageSize, 0, EntryHeaderPos); EXPECT_EQ(Entry.MemMap.getBase(), MemMaps[I - 1].getBase()); } @@ -336,6 +336,30 @@ TEST_F(MapAllocatorCacheTest, CacheOrder) { MemMap.unmap(); } +TEST_F(MapAllocatorCacheTest, PartialChunkHeuristicRetrievalTest) { + const scudo::uptr FragmentedPages = + 1 + scudo::CachedBlock::MaxReleasedCachePages; + scudo::uptr EntryHeaderPos; + scudo::CachedBlock Entry; + scudo::MemMapT MemMap = allocate(PageSize + FragmentedPages * PageSize); + Cache->store(Options, MemMap.getBase(), MemMap.getCapacity(), + MemMap.getBase(), MemMap); + + // FragmentedPages > MaxAllowedFragmentedPages so PageSize + // cannot be retrieved from the cache + Entry = Cache->retrieve(/*MaxAllowedFragmentedPages=*/0, PageSize, PageSize, + 0, EntryHeaderPos); + EXPECT_FALSE(Entry.isValid()); + + // FragmentedPages == MaxAllowedFragmentedPages so PageSize + // can be retrieved from the cache + Entry = + Cache->retrieve(FragmentedPages, PageSize, PageSize, 0, EntryHeaderPos); + EXPECT_TRUE(Entry.isValid()); + + MemMap.unmap(); +} + TEST_F(MapAllocatorCacheTest, MemoryLeakTest) { std::vector MemMaps; // Fill the cache above MaxEntriesCount to force an eviction @@ -351,7 +375,7 @@ TEST_F(MapAllocatorCacheTest, MemoryLeakTest) { for (scudo::uptr I = CacheConfig::getDefaultMaxEntriesCount(); I > 0; I--) { scudo::uptr EntryHeaderPos; RetrievedEntries.push_back( - Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos)); + Cache->retrieve(0, TestAllocSize, PageSize, 0, EntryHeaderPos)); EXPECT_EQ(MemMaps[I].getBase(), RetrievedEntries.back().MemMap.getBase()); } diff --git a/compiler-rt/lib/scudo/standalone/tests/timing_test.cpp b/compiler-rt/lib/scudo/standalone/tests/timing_test.cpp index 23f0a02ea4277b..a762aee48f7c63 100644 --- a/compiler-rt/lib/scudo/standalone/tests/timing_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/timing_test.cpp @@ -154,7 +154,7 @@ TEST_F(ScudoTimingTest, VerifyMax) { unsigned long long MaxNs = std::strtoull(&end[6], &end, 10); ASSERT_TRUE(end != nullptr); - EXPECT_GT(MaxNs, AvgNs); + EXPECT_GE(MaxNs, AvgNs); } TEST_F(ScudoTimingTest, VerifyMultipleTimerCalls) { diff --git a/compiler-rt/test/rtsan/CMakeLists.txt b/compiler-rt/test/rtsan/CMakeLists.txt index e1f9eb39408dc1..59fc5a29703fea 100644 --- a/compiler-rt/test/rtsan/CMakeLists.txt +++ b/compiler-rt/test/rtsan/CMakeLists.txt @@ -1,14 +1,3 @@ - - - - -###### -# TODO: Full lit tests coming in a future review when we introduce the codegen -###### - - - - set(RTSAN_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(RTSAN_TESTSUITES) diff --git a/compiler-rt/test/rtsan/basic.cpp b/compiler-rt/test/rtsan/basic.cpp new file mode 100644 index 00000000000000..ec7382cb0ecaff --- /dev/null +++ b/compiler-rt/test/rtsan/basic.cpp @@ -0,0 +1,21 @@ +// RUN: %clangxx -fsanitize=realtime %s -o %t +// RUN: not %run %t 2>&1 | FileCheck %s +// UNSUPPORTED: ios + +// Intent: Ensure that an intercepted call in a [[clang::nonblocking]] function +// is flagged as an error. Basic smoke test. + +#include +#include + +void violation() [[clang::nonblocking]] { + void *ptr = malloc(2); + printf("ptr: %p\n", ptr); // ensure we don't optimize out the malloc +} + +int main() { + violation(); + return 0; + // CHECK: {{.*Real-time violation.*}} + // CHECK: {{.*malloc*}} +} diff --git a/compiler-rt/test/rtsan/inactive.cpp b/compiler-rt/test/rtsan/inactive.cpp new file mode 100644 index 00000000000000..69edc63a4cfd41 --- /dev/null +++ b/compiler-rt/test/rtsan/inactive.cpp @@ -0,0 +1,26 @@ +// RUN: %clangxx %s -o %t +// RUN: %run %t 2>&1 | FileCheck %s +// UNSUPPORTED: ios + +// Intent: Ensure [[clang::nonblocking]] has no impact if -fsanitize=realtime is not used + +#include +#include + +// In this test, we don't use the -fsanitize=realtime flag, so nothing +// should happen here +void violation() [[clang::nonblocking]] { + void *ptr = malloc(2); + printf("ptr: %p\n", ptr); // ensure we don't optimize out the malloc +} + +int main() { + printf("Starting run\n"); + violation(); + printf("No violations ended the program\n"); + return 0; + // CHECK: {{.*Starting run.*}} + // CHECK NOT: {{.*Real-time violation.*}} + // CHECK NOT: {{.*malloc*}} + // CHECK: {{.*No violations ended the program.*}} +} diff --git a/compiler-rt/test/sanitizer_common/lit.common.cfg.py b/compiler-rt/test/sanitizer_common/lit.common.cfg.py index 04af4816eb6e78..5406e8838f2fcf 100644 --- a/compiler-rt/test/sanitizer_common/lit.common.cfg.py +++ b/compiler-rt/test/sanitizer_common/lit.common.cfg.py @@ -18,6 +18,9 @@ tool_options = "HWASAN_OPTIONS" if not config.has_lld: config.unsupported = True +elif config.tool_name == "rtsan": + tool_cflags = ["-fsanitize=realtime"] + tool_options = "RTSAN_OPTIONS" elif config.tool_name == "tsan": tool_cflags = ["-fsanitize=thread"] tool_options = "TSAN_OPTIONS" diff --git a/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h b/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h index 8cb6e92e41d97d..9a70b7fbfad2b6 100644 --- a/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h +++ b/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h @@ -153,20 +153,11 @@ struct AliasAnalysis { /// Return true, if Target or Pointer attribute is set. bool isTargetOrPointer() const; - /// Return true, if the memory source's `valueType` is a reference type - /// to an object of derived type that contains a component with POINTER - /// attribute. - bool isRecordWithPointerComponent() const; - bool isDummyArgument() const; bool isData() const; bool isBoxData() const; mlir::Type getType() const; - - /// Return true, if `ty` is a reference type to a boxed - /// POINTER object or a raw fir::PointerType. - static bool isPointerReference(mlir::Type ty); }; friend llvm::raw_ostream &operator<<(llvm::raw_ostream &os, @@ -183,6 +174,15 @@ struct AliasAnalysis { /// will stop at [hl]fir.declare if it represents a dummy /// argument declaration (i.e. it has the dummy_scope operand). Source getSource(mlir::Value, bool getInstantiationPoint = false); + +private: + /// Return true, if `ty` is a reference type to an object of derived type + /// that contains a component with POINTER attribute. + static bool isRecordWithPointerComponent(mlir::Type ty); + + /// Return true, if `ty` is a reference type to a boxed + /// POINTER object or a raw fir::PointerType. + static bool isPointerReference(mlir::Type ty); }; inline bool operator==(const AliasAnalysis::Source::SourceOrigin &lhs, diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td index e95af629ef32f1..f643674f1d5d6b 100644 --- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td @@ -161,10 +161,11 @@ def cuf_DataTransferOp : cuf_Op<"data_transfer", []> { let arguments = (ins Arg:$src, Arg:$dst, + Optional:$shape, cuf_DataTransferKindAttr:$transfer_kind); let assemblyFormat = [{ - $src `to` $dst attr-dict `:` type(operands) + $src `to` $dst (`,` $shape^ `:` type($shape) )? attr-dict `:` type($src) `,` type($dst) }]; let hasVerifier = 1; diff --git a/flang/include/flang/Runtime/numeric.h b/flang/include/flang/Runtime/numeric.h index e051e864316630..6e1979790e3c61 100644 --- a/flang/include/flang/Runtime/numeric.h +++ b/flang/include/flang/Runtime/numeric.h @@ -377,6 +377,8 @@ CppTypeFor RTDECL(SelectedCharKind)( // SELECTED_INT_KIND CppTypeFor RTDECL(SelectedIntKind)( const char *, int, void *, int); +CppTypeFor RTDECL(SelectedIntKindMasked)( + const char *, int, void *, int, int); // SELECTED_LOGICAL_KIND CppTypeFor RTDECL(SelectedLogicalKind)( @@ -385,6 +387,8 @@ CppTypeFor RTDECL(SelectedLogicalKind)( // SELECTED_REAL_KIND CppTypeFor RTDECL(SelectedRealKind)( const char *, int, void *, int, void *, int, void *, int); +CppTypeFor RTDECL(SelectedRealKindMasked)( + const char *, int, void *, int, void *, int, void *, int, int); // SPACING CppTypeFor RTDECL(Spacing4)( diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h index cf0350735b5b94..b4db6689a94271 100644 --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -755,7 +755,7 @@ class Symbol { OmpDeclarativeAllocateDirective, OmpExecutableAllocateDirective, OmpDeclareSimd, OmpDeclareTarget, OmpThreadprivate, OmpDeclareReduction, OmpFlushed, OmpCriticalLock, OmpIfSpecified, OmpNone, OmpPreDetermined, - OmpImplicit); + OmpImplicit, OmpFromStmtFunction); using Flags = common::EnumSet; const Scope &owner() const { return *owner_; } diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index ccbb481f472d81..24cd6b22b89259 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -4272,18 +4272,19 @@ class FirConverter : public Fortran::lower::AbstractConverter { base = convertOp.getValue(); // Special case if the rhs is a constant. if (matchPattern(base.getDefiningOp(), mlir::m_Constant())) { - builder.create(loc, base, lhsVal, - transferKindAttr); + builder.create( + loc, base, lhsVal, /*shape=*/mlir::Value{}, transferKindAttr); } else { auto associate = hlfir::genAssociateExpr( loc, builder, rhs, rhs.getType(), ".cuf_host_tmp"); builder.create(loc, associate.getBase(), lhsVal, + /*shape=*/mlir::Value{}, transferKindAttr); builder.create(loc, associate); } } else { - builder.create(loc, rhsVal, lhsVal, - transferKindAttr); + builder.create( + loc, rhsVal, lhsVal, /*shape=*/mlir::Value{}, transferKindAttr); } return; } @@ -4293,6 +4294,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { auto transferKindAttr = cuf::DataTransferKindAttr::get( builder.getContext(), cuf::DataTransferKind::DeviceHost); builder.create(loc, rhsVal, lhsVal, + /*shape=*/mlir::Value{}, transferKindAttr); return; } @@ -4303,6 +4305,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { auto transferKindAttr = cuf::DataTransferKindAttr::get( builder.getContext(), cuf::DataTransferKind::DeviceDevice); builder.create(loc, rhsVal, lhsVal, + /*shape=*/mlir::Value{}, transferKindAttr); return; } @@ -4346,8 +4349,8 @@ class FirConverter : public Fortran::lower::AbstractConverter { addSymbol(sym, hlfir::translateToExtendedValue(loc, builder, temp).first, /*forced=*/true); - builder.create(loc, addr, temp, - transferKindAttr); + builder.create( + loc, addr, temp, /*shape=*/mlir::Value{}, transferKindAttr); ++nbDeviceResidentObject; } } diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp index e1a193edc004a7..1b2f926e21bed8 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp @@ -402,6 +402,15 @@ void DataSharingProcessor::collectSymbols( /*collectSymbols=*/true, /*collectHostAssociatedSymbols=*/true); + // Add implicitly referenced symbols from statement functions. + if (curScope) { + for (const auto &sym : curScope->GetSymbols()) { + if (sym->test(semantics::Symbol::Flag::OmpFromStmtFunction) && + sym->test(flag)) + allSymbols.insert(&*sym); + } + } + llvm::SetVector symbolsInNestedRegions; collectSymbolsInNestedRegions(eval, flag, symbolsInNestedRegions); diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp index 2084962fde729a..e88da5a8ebae19 100644 --- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp +++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp @@ -60,7 +60,15 @@ void AliasAnalysis::Source::print(llvm::raw_ostream &os) const { attributes.Dump(os, EnumToString); } -bool AliasAnalysis::Source::isPointerReference(mlir::Type ty) { +bool AliasAnalysis::isRecordWithPointerComponent(mlir::Type ty) { + auto eleTy = fir::dyn_cast_ptrEleTy(ty); + if (!eleTy) + return false; + // TO DO: Look for pointer components + return mlir::isa(eleTy); +} + +bool AliasAnalysis::isPointerReference(mlir::Type ty) { auto eleTy = fir::dyn_cast_ptrEleTy(ty); if (!eleTy) return false; @@ -86,15 +94,7 @@ bool AliasAnalysis::Source::isBoxData() const { origin.isData; } -bool AliasAnalysis::Source::isRecordWithPointerComponent() const { - auto eleTy = fir::dyn_cast_ptrEleTy(valueType); - if (!eleTy) - return false; - // TO DO: Look for pointer components - return mlir::isa(eleTy); -} - -AliasResult AliasAnalysis::alias(Value lhs, Value rhs) { +AliasResult AliasAnalysis::alias(mlir::Value lhs, mlir::Value rhs) { // TODO: alias() has to be aware of the function scopes. // After MLIR inlining, the current implementation may // not recognize non-aliasing entities. @@ -111,6 +111,7 @@ AliasResult AliasAnalysis::alias(Value lhs, Value rhs) { // it aliases with everything if (lhsSrc.kind >= SourceKind::Indirect || rhsSrc.kind >= SourceKind::Indirect) { + LLVM_DEBUG(llvm::dbgs() << " aliasing because of indirect access\n"); return AliasResult::MayAlias; } @@ -169,10 +170,12 @@ AliasResult AliasAnalysis::alias(Value lhs, Value rhs) { // Box for POINTER component inside an object of a derived type // may alias box of a POINTER object, as well as boxes for POINTER // components inside two objects of derived types may alias. - if ((src1->isRecordWithPointerComponent() && src2->isTargetOrPointer()) || - (src2->isRecordWithPointerComponent() && src1->isTargetOrPointer()) || - (src1->isRecordWithPointerComponent() && - src2->isRecordWithPointerComponent())) { + if ((isRecordWithPointerComponent(src1->valueType) && + src2->isTargetOrPointer()) || + (isRecordWithPointerComponent(src2->valueType) && + src1->isTargetOrPointer()) || + (isRecordWithPointerComponent(src1->valueType) && + isRecordWithPointerComponent(src2->valueType))) { LLVM_DEBUG(llvm::dbgs() << " aliasing because of pointer components\n"); return AliasResult::MayAlias; } @@ -310,7 +313,7 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v, // TODO: Take followBoxData into account when setting the pointer // attribute - if (Source::isPointerReference(ty)) + if (isPointerReference(ty)) attributes.set(Attribute::Pointer); global = llvm::cast(op).getSymbol(); breakFromLoop = true; @@ -387,7 +390,7 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v, if (fir::valueHasFirAttribute(v, fir::getTargetAttrName())) attributes.set(Attribute::Target); - if (Source::isPointerReference(ty)) + if (isPointerReference(ty)) attributes.set(Attribute::Pointer); } diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp index f7b36b208a7deb..3b4ad95cafe6b5 100644 --- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp +++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp @@ -99,6 +99,11 @@ llvm::LogicalResult cuf::AllocateOp::verify() { llvm::LogicalResult cuf::DataTransferOp::verify() { mlir::Type srcTy = getSrc().getType(); mlir::Type dstTy = getDst().getType(); + if (getShape()) { + if (!fir::isa_ref_type(srcTy) || !fir::isa_ref_type(dstTy)) + return emitOpError() + << "shape can only be specified on data transfer with references"; + } if ((fir::isa_ref_type(srcTy) && fir::isa_ref_type(dstTy)) || (fir::isa_box_type(srcTy) && fir::isa_box_type(dstTy)) || (fir::isa_ref_type(srcTy) && fir::isa_box_type(dstTy)) || diff --git a/flang/lib/Parser/io-parsers.cpp b/flang/lib/Parser/io-parsers.cpp index ca0dbedc8da427..25b09efd40c529 100644 --- a/flang/lib/Parser/io-parsers.cpp +++ b/flang/lib/Parser/io-parsers.cpp @@ -27,7 +27,8 @@ TYPE_PARSER(construct(variable / lookAhead(space / ",);\n"_ch)) || construct(fileUnitNumber) || construct(star)) // R1202 file-unit-number -> scalar-int-expr -TYPE_PARSER(construct(scalarIntExpr / !"="_tok)) +TYPE_PARSER(construct( + scalarIntExpr / (lookAhead(space >> ",)"_ch) || atEndOfStmt))) // R1204 open-stmt -> OPEN ( connect-spec-list ) TYPE_CONTEXT_PARSER("OPEN statement"_en_US, diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp index c01d512b4653de..804ada7d11e020 100644 --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -207,11 +207,13 @@ void Prescanner::Statement() { toks.Put(id, GetProvenance(at_)); if (auto replaced{preprocessor_.MacroReplacement(toks, *this)}) { auto newLineClass{ClassifyLine(*replaced, GetCurrentProvenance())}; - disableSourceContinuation_ = - newLineClass.kind != LineClassification::Kind::Source; if (newLineClass.kind == LineClassification::Kind::CompilerDirective) { directiveSentinel_ = newLineClass.sentinel; + disableSourceContinuation_ = false; + } else { + disableSourceContinuation_ = + newLineClass.kind != LineClassification::Kind::Source; } } } @@ -1114,39 +1116,33 @@ bool Prescanner::SkipCommentLine(bool afterAmpersand) { SkipToEndOfLine(); omitNewline_ = true; } - return false; - } - auto lineClass{ClassifyLine(nextLine_)}; - if (lineClass.kind == LineClassification::Kind::Comment) { - NextLine(); - return true; } else if (inPreprocessorDirective_) { - return false; - } else if (afterAmpersand && - (lineClass.kind == - LineClassification::Kind::ConditionalCompilationDirective || - lineClass.kind == LineClassification::Kind::DefinitionDirective || - lineClass.kind == LineClassification::Kind::PreprocessorDirective || - lineClass.kind == LineClassification::Kind::IncludeDirective || - lineClass.kind == LineClassification::Kind::IncludeLine)) { - SkipToEndOfLine(); - omitNewline_ = true; - skipLeadingAmpersand_ = true; - return false; - } else if (lineClass.kind == - LineClassification::Kind::ConditionalCompilationDirective || - lineClass.kind == LineClassification::Kind::PreprocessorDirective) { - // Allow conditional compilation directives (e.g., #ifdef) to affect - // continuation lines. - // Allow other preprocessor directives, too, except #include - // (when it does not follow '&'), #define, and #undef (because - // they cannot be allowed to affect preceding text on a - // continued line). - preprocessor_.Directive(TokenizePreprocessorDirective(), *this); - return true; } else { - return false; + auto lineClass{ClassifyLine(nextLine_)}; + if (lineClass.kind == LineClassification::Kind::Comment) { + NextLine(); + return true; + } else if (lineClass.kind == + LineClassification::Kind::ConditionalCompilationDirective || + lineClass.kind == LineClassification::Kind::PreprocessorDirective) { + // Allow conditional compilation directives (e.g., #ifdef) to affect + // continuation lines. + // Allow other preprocessor directives, too, except #include + // (when it does not follow '&'), #define, and #undef (because + // they cannot be allowed to affect preceding text on a + // continued line). + preprocessor_.Directive(TokenizePreprocessorDirective(), *this); + return true; + } else if (afterAmpersand && + (lineClass.kind == LineClassification::Kind::DefinitionDirective || + lineClass.kind == LineClassification::Kind::IncludeDirective || + lineClass.kind == LineClassification::Kind::IncludeLine)) { + SkipToEndOfLine(); + omitNewline_ = true; + skipLeadingAmpersand_ = true; + } } + return false; } const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) { diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index 4708d51d3af4dd..c7ec8733655648 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -56,6 +56,10 @@ static void CheckImplicitInterfaceArg(evaluate::ActualArgument &arg, "%VAL argument must be a scalar numeric or logical expression"_err_en_US); } if (const auto *expr{arg.UnwrapExpr()}) { + if (const Symbol * base{GetFirstSymbol(*expr)}; + base && IsFunctionResult(*base)) { + context.NoteDefinedSymbol(*base); + } if (IsBOZLiteral(*expr)) { messages.Say("BOZ argument requires an explicit interface"_err_en_US); } else if (evaluate::IsNullPointer(*expr)) { @@ -79,10 +83,6 @@ static void CheckImplicitInterfaceArg(evaluate::ActualArgument &arg, messages.Say( "VOLATILE argument requires an explicit interface"_err_en_US); } - if (const Symbol & base{named->GetFirstSymbol()}; - IsFunctionResult(base)) { - context.NoteDefinedSymbol(base); - } } else if (auto argChars{characteristics::DummyArgument::FromActual( "actual argument", *expr, context.foldingContext(), /*forImplicitInterface=*/true)}) { diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index de3fa8794caedf..734c34276b13b9 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -256,6 +256,9 @@ static bool IsBlockData(const Symbol &symbol) { } void CheckHelper::Check(const Symbol &symbol) { + if (symbol.has()) { + return; + } if (symbol.name().size() > common::maxNameLen && &symbol == &symbol.GetUltimate()) { if (context_.ShouldWarn(common::LanguageFeature::LongNames)) { diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index cc9f1cc7ed2691..4aecb8b8e7b479 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -91,11 +91,12 @@ template class DirectiveAttributeVisitor { void SetContextAssociatedLoopLevel(std::int64_t level) { GetContext().associatedLoopLevel = level; } - Symbol &MakeAssocSymbol(const SourceName &name, Symbol &prev, Scope &scope) { + Symbol &MakeAssocSymbol( + const SourceName &name, const Symbol &prev, Scope &scope) { const auto pair{scope.try_emplace(name, Attrs{}, HostAssocDetails{prev})}; return *pair.first->second; } - Symbol &MakeAssocSymbol(const SourceName &name, Symbol &prev) { + Symbol &MakeAssocSymbol(const SourceName &name, const Symbol &prev) { return MakeAssocSymbol(name, prev, currScope()); } void AddDataSharingAttributeObject(SymbolRef object) { @@ -108,6 +109,7 @@ template class DirectiveAttributeVisitor { const parser::Name *GetLoopIndex(const parser::DoConstruct &); const parser::DoConstruct *GetDoConstructIf( const parser::ExecutionPartConstruct &); + Symbol *DeclareNewPrivateAccessEntity(const Symbol &, Symbol::Flag, Scope &); Symbol *DeclarePrivateAccessEntity( const parser::Name &, Symbol::Flag, Scope &); Symbol *DeclarePrivateAccessEntity(Symbol &, Symbol::Flag, Scope &); @@ -736,6 +738,9 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { std::optional); void IssueNonConformanceWarning( llvm::omp::Directive D, parser::CharBlock source); + + void CreateImplicitSymbols( + const Symbol *symbol, std::optional setFlag = std::nullopt); }; template @@ -771,6 +776,19 @@ const parser::DoConstruct *DirectiveAttributeVisitor::GetDoConstructIf( return parser::Unwrap(x); } +template +Symbol *DirectiveAttributeVisitor::DeclareNewPrivateAccessEntity( + const Symbol &object, Symbol::Flag flag, Scope &scope) { + assert(object.owner() != currScope()); + auto &symbol{MakeAssocSymbol(object.name(), object, scope)}; + symbol.set(flag); + if (flag == Symbol::Flag::OmpCopyIn) { + // The symbol in copyin clause must be threadprivate entity. + symbol.set(Symbol::Flag::OmpThreadprivate); + } + return &symbol; +} + template Symbol *DirectiveAttributeVisitor::DeclarePrivateAccessEntity( const parser::Name &name, Symbol::Flag flag, Scope &scope) { @@ -785,13 +803,7 @@ template Symbol *DirectiveAttributeVisitor::DeclarePrivateAccessEntity( Symbol &object, Symbol::Flag flag, Scope &scope) { if (object.owner() != currScope()) { - auto &symbol{MakeAssocSymbol(object.name(), object, scope)}; - symbol.set(flag); - if (flag == Symbol::Flag::OmpCopyIn) { - // The symbol in copyin clause must be threadprivate entity. - symbol.set(Symbol::Flag::OmpThreadprivate); - } - return &symbol; + return DeclareNewPrivateAccessEntity(object, flag, scope); } else { object.set(flag); return &object; @@ -2031,24 +2043,152 @@ void OmpAttributeVisitor::Post(const parser::OpenMPAllocatorsConstruct &x) { PopContext(); } +static bool IsPrivatizable(const Symbol *sym) { + auto *misc{sym->detailsIf()}; + return !IsProcedure(*sym) && !IsNamedConstant(*sym) && + !sym->owner().IsDerivedType() && + sym->owner().kind() != Scope::Kind::ImpliedDos && + !sym->detailsIf() && + !sym->detailsIf() && + (!misc || + (misc->kind() != MiscDetails::Kind::ComplexPartRe && + misc->kind() != MiscDetails::Kind::ComplexPartIm && + misc->kind() != MiscDetails::Kind::KindParamInquiry && + misc->kind() != MiscDetails::Kind::LenParamInquiry && + misc->kind() != MiscDetails::Kind::ConstructName)); +} + +void OmpAttributeVisitor::CreateImplicitSymbols( + const Symbol *symbol, std::optional setFlag) { + if (!IsPrivatizable(symbol)) { + return; + } + + // Implicitly determined DSAs + // OMP 5.2 5.1.1 - Variables Referenced in a Construct + Symbol *lastDeclSymbol = nullptr; + std::optional prevDSA; + for (int dirDepth{0}; dirDepth < (int)dirContext_.size(); ++dirDepth) { + DirContext &dirContext = dirContext_[dirDepth]; + std::optional dsa; + + for (auto symMap : dirContext.objectWithDSA) { + // if the `symbol` already has a data-sharing attribute + if (symMap.first->name() == symbol->name()) { + dsa = symMap.second; + break; + } + } + + // When handling each implicit rule for a given symbol, one of the + // following 3 actions may be taken: + // 1. Declare a new private symbol. + // 2. Create a new association symbol with no flags, that will represent + // a shared symbol in the current scope. Note that symbols without + // any private flags are considered as shared. + // 3. Use the last declared private symbol, by inserting a new symbol + // in the scope being processed, associated with it. + // If no private symbol was declared previously, then no association + // is needed and the symbol from the enclosing scope will be + // inherited by the current one. + // + // Because of how symbols are collected in lowering, not inserting a new + // symbol in the last case could lead to the conclusion that a symbol + // from an enclosing construct was declared in the current construct, + // which would result in wrong privatization code being generated. + // Consider the following example: + // + // !$omp parallel default(private) ! p1 + // !$omp parallel default(private) shared(x) ! p2 + // x = 10 + // !$omp end parallel + // !$omp end parallel + // + // If a new x symbol was not inserted in the inner parallel construct + // (p2), it would use the x symbol definition from the enclosing scope. + // Then, when p2's default symbols were collected in lowering, the x + // symbol from the outer parallel construct (p1) would be collected, as + // it would have the private flag set. + // This would make x appear to be defined in p2, causing it to be + // privatized in p2 and its privatization in p1 to be skipped. + auto makePrivateSymbol = [&](Symbol::Flag flag) { + const Symbol *hostSymbol = + lastDeclSymbol ? lastDeclSymbol : &symbol->GetUltimate(); + lastDeclSymbol = DeclareNewPrivateAccessEntity( + *hostSymbol, flag, context_.FindScope(dirContext.directiveSource)); + if (setFlag) { + lastDeclSymbol->set(*setFlag); + } + return lastDeclSymbol; + }; + auto makeSharedSymbol = [&]() { + const Symbol *hostSymbol = + lastDeclSymbol ? lastDeclSymbol : &symbol->GetUltimate(); + MakeAssocSymbol(symbol->name(), *hostSymbol, + context_.FindScope(dirContext.directiveSource)); + }; + auto useLastDeclSymbol = [&]() { + if (lastDeclSymbol) { + makeSharedSymbol(); + } + }; + + bool taskGenDir = llvm::omp::taskGeneratingSet.test(dirContext.directive); + bool targetDir = llvm::omp::allTargetSet.test(dirContext.directive); + bool parallelDir = llvm::omp::allParallelSet.test(dirContext.directive); + bool teamsDir = llvm::omp::allTeamsSet.test(dirContext.directive); + + if (dsa.has_value()) { + if (dsa.value() == Symbol::Flag::OmpShared && + (parallelDir || taskGenDir || teamsDir)) + makeSharedSymbol(); + // Private symbols will have been declared already. + prevDSA = dsa; + continue; + } + + if (dirContext.defaultDSA == Symbol::Flag::OmpPrivate || + dirContext.defaultDSA == Symbol::Flag::OmpFirstPrivate || + dirContext.defaultDSA == Symbol::Flag::OmpShared) { + // 1) default + // Allowed only with parallel, teams and task generating constructs. + assert(parallelDir || taskGenDir || teamsDir); + if (dirContext.defaultDSA != Symbol::Flag::OmpShared) + makePrivateSymbol(dirContext.defaultDSA); + else + makeSharedSymbol(); + dsa = dirContext.defaultDSA; + } else if (parallelDir) { + // 2) parallel -> shared + makeSharedSymbol(); + dsa = Symbol::Flag::OmpShared; + } else if (!taskGenDir && !targetDir) { + // 3) enclosing context + useLastDeclSymbol(); + dsa = prevDSA; + } else if (targetDir) { + // TODO 4) not mapped target variable -> firstprivate + dsa = prevDSA; + } else if (taskGenDir) { + // TODO 5) dummy arg in orphaned taskgen construct -> firstprivate + if (prevDSA == Symbol::Flag::OmpShared) { + // 6) shared in enclosing context -> shared + makeSharedSymbol(); + dsa = Symbol::Flag::OmpShared; + } else { + // 7) firstprivate + dsa = Symbol::Flag::OmpFirstPrivate; + makePrivateSymbol(*dsa)->set(Symbol::Flag::OmpImplicit); + } + } + prevDSA = dsa; + } +} + // For OpenMP constructs, check all the data-refs within the constructs // and adjust the symbol for each Name if necessary void OmpAttributeVisitor::Post(const parser::Name &name) { auto *symbol{name.symbol}; - auto IsPrivatizable = [](const Symbol *sym) { - auto *misc{sym->detailsIf()}; - return !IsProcedure(*sym) && !IsNamedConstant(*sym) && - !sym->owner().IsDerivedType() && - sym->owner().kind() != Scope::Kind::ImpliedDos && - !sym->detailsIf() && - !sym->detailsIf() && - (!misc || - (misc->kind() != MiscDetails::Kind::ComplexPartRe && - misc->kind() != MiscDetails::Kind::ComplexPartIm && - misc->kind() != MiscDetails::Kind::KindParamInquiry && - misc->kind() != MiscDetails::Kind::LenParamInquiry && - misc->kind() != MiscDetails::Kind::ConstructName)); - }; if (symbol && !dirContext_.empty() && GetContext().withinConstruct) { if (IsPrivatizable(symbol) && !IsObjectWithDSA(*symbol)) { @@ -2076,125 +2216,20 @@ void OmpAttributeVisitor::Post(const parser::Name &name) { if (found->test(semantics::Symbol::Flag::OmpThreadprivate)) return; } - if (!IsPrivatizable(symbol)) { - return; - } - - // Implicitly determined DSAs - // OMP 5.2 5.1.1 - Variables Referenced in a Construct - Symbol *lastDeclSymbol = nullptr; - std::optional prevDSA; - for (int dirDepth{0}; dirDepth < (int)dirContext_.size(); ++dirDepth) { - DirContext &dirContext = dirContext_[dirDepth]; - std::optional dsa; - for (auto symMap : dirContext.objectWithDSA) { - // if the `symbol` already has a data-sharing attribute - if (symMap.first->name() == name.symbol->name()) { - dsa = symMap.second; - break; - } - } - - // When handling each implicit rule for a given symbol, one of the - // following 3 actions may be taken: - // 1. Declare a new private symbol. - // 2. Create a new association symbol with no flags, that will represent - // a shared symbol in the current scope. Note that symbols without - // any private flags are considered as shared. - // 3. Use the last declared private symbol, by inserting a new symbol - // in the scope being processed, associated with it. - // If no private symbol was declared previously, then no association - // is needed and the symbol from the enclosing scope will be - // inherited by the current one. - // - // Because of how symbols are collected in lowering, not inserting a new - // symbol in the last case could lead to the conclusion that a symbol - // from an enclosing construct was declared in the current construct, - // which would result in wrong privatization code being generated. - // Consider the following example: - // - // !$omp parallel default(private) ! p1 - // !$omp parallel default(private) shared(x) ! p2 - // x = 10 - // !$omp end parallel - // !$omp end parallel - // - // If a new x symbol was not inserted in the inner parallel construct - // (p2), it would use the x symbol definition from the enclosing scope. - // Then, when p2's default symbols were collected in lowering, the x - // symbol from the outer parallel construct (p1) would be collected, as - // it would have the private flag set. - // This would make x appear to be defined in p2, causing it to be - // privatized in p2 and its privatization in p1 to be skipped. - auto makePrivateSymbol = [&](Symbol::Flag flag) { - Symbol *hostSymbol = - lastDeclSymbol ? lastDeclSymbol : &symbol->GetUltimate(); - lastDeclSymbol = DeclarePrivateAccessEntity( - *hostSymbol, flag, context_.FindScope(dirContext.directiveSource)); - return lastDeclSymbol; - }; - auto makeSharedSymbol = [&]() { - Symbol *hostSymbol = - lastDeclSymbol ? lastDeclSymbol : &symbol->GetUltimate(); - MakeAssocSymbol(symbol->name(), *hostSymbol, - context_.FindScope(dirContext.directiveSource)); - }; - auto useLastDeclSymbol = [&]() { - if (lastDeclSymbol) - MakeAssocSymbol(symbol->name(), *lastDeclSymbol, - context_.FindScope(dirContext.directiveSource)); - }; - - bool taskGenDir = llvm::omp::taskGeneratingSet.test(dirContext.directive); - bool targetDir = llvm::omp::allTargetSet.test(dirContext.directive); - bool parallelDir = llvm::omp::allParallelSet.test(dirContext.directive); - bool teamsDir = llvm::omp::allTeamsSet.test(dirContext.directive); - - if (dsa.has_value()) { - if (dsa.value() == Symbol::Flag::OmpShared && - (parallelDir || taskGenDir || teamsDir)) - makeSharedSymbol(); - // Private symbols will have been declared already. - prevDSA = dsa; - continue; - } - - if (dirContext.defaultDSA == Symbol::Flag::OmpPrivate || - dirContext.defaultDSA == Symbol::Flag::OmpFirstPrivate || - dirContext.defaultDSA == Symbol::Flag::OmpShared) { - // 1) default - // Allowed only with parallel, teams and task generating constructs. - assert(parallelDir || taskGenDir || teamsDir); - if (dirContext.defaultDSA != Symbol::Flag::OmpShared) - makePrivateSymbol(dirContext.defaultDSA); - else - makeSharedSymbol(); - dsa = dirContext.defaultDSA; - } else if (parallelDir) { - // 2) parallel -> shared - makeSharedSymbol(); - dsa = Symbol::Flag::OmpShared; - } else if (!taskGenDir && !targetDir) { - // 3) enclosing context - useLastDeclSymbol(); - dsa = prevDSA; - } else if (targetDir) { - // TODO 4) not mapped target variable -> firstprivate - dsa = prevDSA; - } else if (taskGenDir) { - // TODO 5) dummy arg in orphaned taskgen construct -> firstprivate - if (prevDSA == Symbol::Flag::OmpShared) { - // 6) shared in enclosing context -> shared - makeSharedSymbol(); - dsa = Symbol::Flag::OmpShared; - } else { - // 7) firstprivate - dsa = Symbol::Flag::OmpFirstPrivate; - makePrivateSymbol(*dsa)->set(Symbol::Flag::OmpImplicit); + if (auto *stmtFunction{symbol->detailsIf()}; + stmtFunction && stmtFunction->stmtFunction()) { + // Each non-dummy argument from a statement function must be handled too, + // as if it was explicitly referenced. + semantics::UnorderedSymbolSet symbols{ + CollectSymbols(stmtFunction->stmtFunction().value())}; + for (const auto &sym : symbols) { + if (!IsStmtFunctionDummy(sym) && !IsObjectWithDSA(*sym)) { + CreateImplicitSymbols(&*sym, Symbol::Flag::OmpFromStmtFunction); } } - prevDSA = dsa; + } else { + CreateImplicitSymbols(symbol); } } // within OpenMP construct } diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index c0478fd4390076..ec8f854f64d103 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -1797,6 +1797,9 @@ void AttrsVisitor::SetBindNameOn(Symbol &symbol) { } auto last{label->find_last_not_of(" ")}; label = label->substr(first, last - first + 1); + } else if (symbol.GetIsExplicitBindName()) { + // don't try to override explicit binding name with default + return; } else if (ClassifyProcedure(symbol) == ProcedureDefinitionClass::Internal) { // BIND(C) does not give an implicit binding label to internal procedures. return; diff --git a/flang/runtime/external-unit.cpp b/flang/runtime/external-unit.cpp index 8009151a8a370c..d17a92622f8448 100644 --- a/flang/runtime/external-unit.cpp +++ b/flang/runtime/external-unit.cpp @@ -65,9 +65,13 @@ ExternalFileUnit *ExternalFileUnit::LookUpOrCreateAnonymous(int unit, bool exists{false}; ExternalFileUnit *result{GetUnitMap().LookUpOrCreate(unit, handler, exists)}; if (result && !exists) { + common::optional action; + if (dir == Direction::Output) { + action = Action::ReadWrite; + } if (!result->OpenAnonymousUnit( dir == Direction::Input ? OpenStatus::Unknown : OpenStatus::Replace, - Action::ReadWrite, Position::Rewind, Convert::Unknown, handler)) { + action, Position::Rewind, Convert::Unknown, handler)) { // fort.N isn't a writable file if (ExternalFileUnit * closed{LookUpForClose(result->unitNumber())}) { closed->DestroyClosed(); diff --git a/flang/runtime/numeric.cpp b/flang/runtime/numeric.cpp index 40bacf07157a27..b5e0851a16cd1e 100644 --- a/flang/runtime/numeric.cpp +++ b/flang/runtime/numeric.cpp @@ -95,20 +95,22 @@ template inline RT_API_ATTRS T Scale(T x, std::int64_t p) { } // SELECTED_INT_KIND (16.9.169) -template -inline RT_API_ATTRS CppTypeFor SelectedIntKind(T x) { - if (x <= 2) { +template +inline RT_API_ATTRS CppTypeFor SelectedIntKind( + X x, M mask) { +#if !defined __SIZEOF_INT128__ || defined FLANG_RUNTIME_NO_INTEGER_16 + mask &= ~(1 << 16); +#endif + if (x <= 2 && (mask & (1 << 1))) { return 1; - } else if (x <= 4) { + } else if (x <= 4 && (mask & (1 << 2))) { return 2; - } else if (x <= 9) { + } else if (x <= 9 && (mask & (1 << 4))) { return 4; - } else if (x <= 18) { + } else if (x <= 18 && (mask & (1 << 8))) { return 8; -#if defined __SIZEOF_INT128__ && !defined FLANG_RUNTIME_NO_INTEGER_16 - } else if (x <= 38) { + } else if (x <= 38 && (mask & (1 << 16))) { return 16; -#endif } return -1; } @@ -130,60 +132,52 @@ inline RT_API_ATTRS CppTypeFor SelectedLogicalKind( } // SELECTED_REAL_KIND (16.9.170) -template +template inline RT_API_ATTRS CppTypeFor SelectedRealKind( - P p, R r, D d) { + P p, R r, D d, M mask) { if (d != 2) { return -5; } - -#ifndef FLANG_RUNTIME_NO_REAL_2 - constexpr bool hasReal2{true}; -#else - constexpr bool hasReal2{false}; +#ifdef FLANG_RUNTIME_NO_REAL_2 + mask &= ~(1 << 2); #endif -#ifndef FLANG_RUNTIME_NO_REAL_3 - constexpr bool hasReal3{true}; -#else - constexpr bool hasReal3{false}; +#ifdef FLANG_RUNTIME_NO_REAL_3 + mask &= ~(1 << 3); #endif -#if defined LDBL_MANT_DIG == 64 && !defined FLANG_RUNTIME_NO_REAL_10 - constexpr bool hasReal10{true}; -#else - constexpr bool hasReal10{false}; +#if LDBL_MANT_DIG < 64 || defined FLANG_RUNTIME_NO_REAL_10 + mask &= ~(1 << 10); #endif -#if (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && \ - !defined FLANG_RUNTIME_NO_REAL_16 - constexpr bool hasReal16{true}; -#else - constexpr bool hasReal16{false}; +#if LDBL_MANT_DIG < 64 || defined FLANG_RUNTIME_NO_REAL_16 + mask &= ~(1 << 16); #endif int error{0}; int kind{0}; - if (hasReal2 && p <= 3) { + if (p <= 3 && (mask & (1 << 2))) { kind = 2; - } else if (p <= 6) { + } else if (p <= 6 && (mask & (1 << 4))) { kind = 4; - } else if (p <= 15) { + } else if (p <= 15 && (mask & (1 << 8))) { kind = 8; - } else if (hasReal10 && p <= 18) { + } else if (p <= 18 && (mask & (1 << 10))) { kind = 10; - } else if (hasReal16 && p <= 33) { + } else if (p <= 33 && (mask & (1 << 16))) { kind = 16; } else { error -= 1; } - if (r <= 4) { - kind = kind < 2 ? (hasReal2 ? 2 : 4) : kind; - } else if (r <= 37) { - kind = kind < 3 ? (hasReal3 && p != 3 ? 3 : 4) : kind; - } else if (r <= 307) { + if (r <= 4 && (mask & (1 << 2))) { + kind = kind < 2 ? 2 : kind; + } else if (r <= 37 && p != 3 && (mask & (1 << 3))) { + kind = kind < 3 ? 3 : kind; + } else if (r <= 37 && (mask & (1 << 4))) { + kind = kind < 4 ? 4 : kind; + } else if (r <= 307 && (mask & (1 << 8))) { kind = kind < 8 ? 8 : kind; - } else if (hasReal10 && r <= 4931) { + } else if (r <= 4931 && (mask & (1 << 10))) { kind = kind < 10 ? 10 : kind; - } else if (hasReal16 && r <= 4931) { + } else if (r <= 4931 && (mask & (1 << 16))) { kind = kind < 16 ? 16 : kind; } else { error -= 2; @@ -790,6 +784,12 @@ CppTypeFor RTDEF(SelectedCharKind)( // SELECTED_INT_KIND CppTypeFor RTDEF(SelectedIntKind)( const char *source, int line, void *x, int xKind) { + return RTNAME(SelectedIntKindMasked)(source, line, x, xKind, + (1 << 1) | (1 << 2) | (1 << 4) | (1 << 8) | (1 << 16)); +} + +CppTypeFor RTDEF(SelectedIntKindMasked)( + const char *source, int line, void *x, int xKind, int mask) { #ifdef __SIZEOF_INT128__ CppTypeFor r = GetIntArgValue>( @@ -798,7 +798,7 @@ CppTypeFor RTDEF(SelectedIntKind)( std::int64_t r = GetIntArgValue( source, line, x, xKind, /*defaultValue*/ 0, /*resKind*/ 8); #endif - return SelectedIntKind(r); + return SelectedIntKind(r, mask); } // SELECTED_LOGICAL_KIND @@ -819,6 +819,14 @@ CppTypeFor RTDEF(SelectedLogicalKind)( CppTypeFor RTDEF(SelectedRealKind)(const char *source, int line, void *precision, int pKind, void *range, int rKind, void *radix, int dKind) { + return RTNAME(SelectedRealKindMasked)(source, line, precision, pKind, range, + rKind, radix, dKind, + (1 << 2) | (1 << 3) | (1 << 4) | (1 << 8) | (1 << 10) | (1 << 16)); +} + +CppTypeFor RTDEF(SelectedRealKindMasked)( + const char *source, int line, void *precision, int pKind, void *range, + int rKind, void *radix, int dKind, int mask) { #ifdef __SIZEOF_INT128__ CppTypeFor p = GetIntArgValue>( @@ -837,7 +845,7 @@ CppTypeFor RTDEF(SelectedRealKind)(const char *source, std::int64_t d = GetIntArgValue( source, line, radix, dKind, /*defaultValue*/ 2, /*resKind*/ 8); #endif - return SelectedRealKind(p, r, d); + return SelectedRealKind(p, r, d, mask); } CppTypeFor RTDEF(Spacing4)( diff --git a/flang/test/Analysis/AliasAnalysis/alias-analysis-9.fir b/flang/test/Analysis/AliasAnalysis/ptr-component.fir similarity index 100% rename from flang/test/Analysis/AliasAnalysis/alias-analysis-9.fir rename to flang/test/Analysis/AliasAnalysis/ptr-component.fir diff --git a/flang/test/Fir/cuf-invalid.fir b/flang/test/Fir/cuf-invalid.fir index 06e08d14b2435c..e9aeaa281e2a85 100644 --- a/flang/test/Fir/cuf-invalid.fir +++ b/flang/test/Fir/cuf-invalid.fir @@ -94,3 +94,34 @@ func.func @_QPsub1() { cuf.free %0 : !fir.ref {data_attr = #cuf.cuda} return } + +// ----- + +func.func @_QPsub1(%arg0: !fir.ref> {cuf.data_attr = #cuf.cuda, fir.bindc_name = "adev"}, %arg1: !fir.ref> {fir.bindc_name = "ahost"}, %arg2: !fir.ref {fir.bindc_name = "n"}, %arg3: !fir.ref {fir.bindc_name = "m"}) { + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg2 dummy_scope %0 {uniq_name = "_QFsub1En"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = hlfir.declare %arg3 dummy_scope %0 {uniq_name = "_QFsub1Em"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %3 = fir.load %1#0 : !fir.ref + %4 = fir.load %2#0 : !fir.ref + %5 = arith.muli %3, %4 : i32 + %6 = fir.convert %5 : (i32) -> i64 + %7 = fir.convert %6 : (i64) -> index + %c0 = arith.constant 0 : index + %8 = arith.cmpi sgt, %7, %c0 : index + %9 = arith.select %8, %7, %c0 : index + %10 = fir.shape %9 : (index) -> !fir.shape<1> + %11:2 = hlfir.declare %arg0(%10) dummy_scope %0 {data_attr = #cuf.cuda, uniq_name = "_QFsub1Eadev"} : (!fir.ref>, !fir.shape<1>, !fir.dscope) -> (!fir.box>, !fir.ref>) + %12 = fir.load %1#0 : !fir.ref + %13 = fir.load %2#0 : !fir.ref + %14 = arith.muli %12, %13 : i32 + %15 = fir.convert %14 : (i32) -> i64 + %16 = fir.convert %15 : (i64) -> index + %c0_0 = arith.constant 0 : index + %17 = arith.cmpi sgt, %16, %c0_0 : index + %18 = arith.select %17, %16, %c0_0 : index + %19 = fir.shape %18 : (index) -> !fir.shape<1> + %20:2 = hlfir.declare %arg1(%19) dummy_scope %0 {uniq_name = "_QFsub1Eahost"} : (!fir.ref>, !fir.shape<1>, !fir.dscope) -> (!fir.box>, !fir.ref>) + // expected-error@+1{{'cuf.data_transfer' op shape can only be specified on data transfer with references}} + cuf.data_transfer %20#0 to %11#0, %19 : !fir.shape<1> {transfer_kind = #cuf.cuda_transfer} : !fir.box>, !fir.box> + return +} diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 index 833976ff284a86..5f09371bbaba2e 100644 --- a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 +++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 @@ -57,6 +57,5 @@ end program compilation_to_obj ! LLVM: @[[GLOB_VAR:[^[:space:]]+]]t = internal global ! LLVM: define internal void @_QQmain..omp_par -! LLVM: %[[LOCAL_VAR:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 -! LLVM-NEXT: %[[GLOB_VAL:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @[[GLOB_VAR]]t, align 8 -! LLVM-NEXT: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[GLOB_VAL]], ptr %[[LOCAL_VAR]], align 8 +! LLVM: %[[GLOB_VAL:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @[[GLOB_VAR]]t, align 8 +! LLVM-NEXT: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[GLOB_VAL]], ptr %{{.*}}, align 8 diff --git a/flang/test/Lower/OpenMP/statement-function.f90 b/flang/test/Lower/OpenMP/statement-function.f90 new file mode 100644 index 00000000000000..6cdbcb6e141c7e --- /dev/null +++ b/flang/test/Lower/OpenMP/statement-function.f90 @@ -0,0 +1,43 @@ +! Test privatization within OpenMP constructs containing statement functions. +! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s + +!CHECK-LABEL: func @_QPtest_implicit_use +!CHECK: %[[IEXP:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_implicit_useEiexp"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[IIMP:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_implicit_useEiimp"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: omp.parallel private({{.*firstprivate.*}} %[[IEXP]]#0 -> %[[PRIV_IEXP:.*]] : !fir.ref, +!CHECK-SAME: {{.*firstprivate.*}} %[[IIMP]]#0 -> %[[PRIV_IIMP:.*]] : !fir.ref) +!CHECK: %{{.*}}:2 = hlfir.declare %[[PRIV_IEXP]] {uniq_name = "_QFtest_implicit_useEiexp"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %{{.*}}:2 = hlfir.declare %[[PRIV_IIMP]] {uniq_name = "_QFtest_implicit_useEiimp"} : (!fir.ref) -> (!fir.ref, !fir.ref) +subroutine test_implicit_use() + implicit none + integer :: iexp, iimp + integer, external :: ifun + integer :: sf + + sf(iexp)=ifun(iimp)+iexp + !$omp parallel default(firstprivate) + iexp = sf(iexp) + !$omp end parallel +end subroutine + +!CHECK-LABEL: func @_QPtest_implicit_use2 +!CHECK: %[[IEXP:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_implicit_use2Eiexp"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[IIMP:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_implicit_use2Eiimp"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: omp.task +!CHECK: %[[PRIV_IEXP:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_implicit_use2Eiexp"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[TEMP0:.*]] = fir.load %[[IEXP]]#0 : !fir.ref +!CHECK: hlfir.assign %[[TEMP0]] to %[[PRIV_IEXP]]#0 temporary_lhs : i32, !fir.ref +!CHECK: %[[PRIV_IIMP:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_implicit_use2Eiimp"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[TEMP1:.*]] = fir.load %[[IIMP]]#0 : !fir.ref +!CHECK: hlfir.assign %[[TEMP1]] to %[[PRIV_IIMP]]#0 temporary_lhs : i32, !fir.ref +subroutine test_implicit_use2() + implicit none + integer :: iexp, iimp + integer, external :: ifun + integer :: sf + + sf(iexp)=ifun(iimp) + !$omp task + iexp = sf(iexp) + !$omp end task +end subroutine diff --git a/flang/test/Parser/recovery05.f90 b/flang/test/Parser/recovery05.f90 new file mode 100644 index 00000000000000..9c8c3689b27bd5 --- /dev/null +++ b/flang/test/Parser/recovery05.f90 @@ -0,0 +1,5 @@ +! RUN: not %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s +continue +! CHECK: error: expected end of statement +flush iostat=1 +end diff --git a/flang/test/Preprocessing/line-in-contin.F90 b/flang/test/Preprocessing/line-in-contin.F90 index 138e579bffaa28..28efbd02d3ae89 100644 --- a/flang/test/Preprocessing/line-in-contin.F90 +++ b/flang/test/Preprocessing/line-in-contin.F90 @@ -1,8 +1,10 @@ -! RUN: %flang_fc1 -E %s 2>&1 | FileCheck %s -! CHECK: call foo( 0.) -! CHECK: call foo( 1.) -! CHECK: call foo( 2.) -! CHECK: call foo( 3.) +! RUN: %flang_fc1 -fopenmp -E %s 2>&1 | FileCheck %s +! CHECK: call foo(0.) +! CHECK: call foo(1.) +! CHECK: call foo(2.) +! CHECK: call foo(3.) +! CHECK: !$omp parallel do default(none) private(j) +! CHECK: !$omp end parallel do call foo( & # 100 "bar.h" & 0.) @@ -17,4 +19,16 @@ # 103 "bar.h" & 3. & ) +!$omp parallel do & +#ifdef undef +!$omp garbage & +#else +!$omp default(none) & +#endif +!$omp private(j) + do j=1,100 + end do +!$omp end & +# 104 "bar.h" +!$omp parallel do end diff --git a/flang/test/Semantics/data08.f90 b/flang/test/Semantics/data08.f90 index 7e12a71d117728..84cd6d1c125dbb 100644 --- a/flang/test/Semantics/data08.f90 +++ b/flang/test/Semantics/data08.f90 @@ -1,5 +1,5 @@ ! RUN: %flang_fc1 -fdebug-dump-symbols -pedantic %s 2>&1 | FileCheck %s \ -! RUN: --check-prefixes=%if system-aix %{"CHECK","BE"%} \ +! RUN: --check-prefixes=%if target={{.*-aix.*|sparc.*}} %{"CHECK","BE"%} \ ! RUN: %else %{"CHECK","LE"%} ! CHECK: DATA statement value initializes 'jx' of type 'INTEGER(4)' with CHARACTER diff --git a/flang/test/Semantics/declarations03.f90 b/flang/test/Semantics/declarations03.f90 index 65b07e7d5c6567..8e6f0a4aaf6bd6 100644 --- a/flang/test/Semantics/declarations03.f90 +++ b/flang/test/Semantics/declarations03.f90 @@ -50,6 +50,9 @@ module m !ERROR: BIND_C attribute was already specified on 's5' integer, bind(c, name="ss2") :: s5 + integer, bind(c, name="s6explicit") :: s6 + dimension s6(10) ! caused spurious error + end subroutine common1() diff --git a/flang/test/Semantics/resolve82.f90 b/flang/test/Semantics/resolve82.f90 index 88339742efdb36..989ce1d837c705 100644 --- a/flang/test/Semantics/resolve82.f90 +++ b/flang/test/Semantics/resolve82.f90 @@ -34,6 +34,7 @@ end function procFunc real y common /blk/ y protected y + logical,protected,external,pointer :: z contains @@ -60,3 +61,8 @@ subroutine testProcDecl(arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11) end subroutine testProcDecl end module m + +subroutine subb() + !Ensure no spurious error from a benign UseError + use m, testProcDecl=>z +end diff --git a/flang/test/Semantics/undef-result01.f90 b/flang/test/Semantics/undef-result01.f90 index bf6af11a8d7b92..08e7fe1e448998 100644 --- a/flang/test/Semantics/undef-result01.f90 +++ b/flang/test/Semantics/undef-result01.f90 @@ -148,3 +148,8 @@ function defdByAssociate() s = 1. end associate end + +function defdByElementArgToImplicit() result(r) + real r(1) + call define(r(1)) +end diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index d22bd1153598eb..60aa7f5ccb319a 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -211,10 +211,12 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdio.fileno libc.src.stdio.fprintf libc.src.stdio.fscanf + libc.src.stdio.vfscanf libc.src.stdio.printf libc.src.stdio.remove libc.src.stdio.rename libc.src.stdio.scanf + libc.src.stdio.vscanf libc.src.stdio.snprintf libc.src.stdio.sprintf libc.src.stdio.asprintf diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index 1a647737ec455a..9a2746dcb86f87 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -210,10 +210,12 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdio.fileno libc.src.stdio.fprintf libc.src.stdio.fscanf + libc.src.stdio.vfscanf libc.src.stdio.printf libc.src.stdio.remove libc.src.stdio.rename libc.src.stdio.scanf + libc.src.stdio.vscanf libc.src.stdio.snprintf libc.src.stdio.sprintf libc.src.stdio.asprintf diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index bac1e3cfa85da7..141dc70463d64a 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -210,10 +210,12 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdio.fileno libc.src.stdio.fprintf libc.src.stdio.fscanf + libc.src.stdio.vfscanf libc.src.stdio.printf libc.src.stdio.remove libc.src.stdio.rename libc.src.stdio.scanf + libc.src.stdio.vscanf libc.src.stdio.snprintf libc.src.stdio.sprintf libc.src.stdio.asprintf diff --git a/libc/newhdrgen/yaml/stdio.yaml b/libc/newhdrgen/yaml/stdio.yaml index 43438699b58409..fd116bbc00895d 100644 --- a/libc/newhdrgen/yaml/stdio.yaml +++ b/libc/newhdrgen/yaml/stdio.yaml @@ -178,6 +178,14 @@ functions: - type: FILE *__restrict - type: const char *__restrict - type: '...' + - name: vfscanf + standards: + - stdc + return_type: int + arguments: + - type: FILE *__restrict + - type: const char *__restrict + - type: va_list - name: fseek standards: - stdc @@ -284,6 +292,13 @@ functions: arguments: - type: const char *__restrict - type: '...' + - name: vscanf + standards: + - stdc + return_type: int + arguments: + - type: const char *__restrict + - type: va_list - name: setbuf standards: - stdc diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 6d8be9f8e4016d..026cc72b458a77 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -1042,6 +1042,12 @@ def StdC : StandardSpec<"stdc"> { [ArgSpec, ArgSpec] >, + FunctionSpec< + "vscanf", + RetValSpec, + [ArgSpec, + ArgSpec] + >, FunctionSpec< "fscanf", RetValSpec, @@ -1049,6 +1055,13 @@ def StdC : StandardSpec<"stdc"> { ArgSpec, ArgSpec] >, + FunctionSpec< + "vfscanf", + RetValSpec, + [ArgSpec, + ArgSpec, + ArgSpec] + >, FunctionSpec< "sprintf", RetValSpec, diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt index 372b8fc8192455..b9bc904471df9a 100644 --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -143,6 +143,16 @@ add_entrypoint_object( ${scanf_deps} ) +add_entrypoint_object( + vfscanf + SRCS + vfscanf.cpp + HDRS + vfscanf.h + DEPENDS + ${scanf_deps} +) + add_entrypoint_object( scanf SRCS @@ -153,6 +163,16 @@ add_entrypoint_object( ${scanf_deps} ) +add_entrypoint_object( + vscanf + SRCS + vscanf.cpp + HDRS + vscanf.h + DEPENDS + ${scanf_deps} +) + add_entrypoint_object( sprintf SRCS diff --git a/libc/src/stdio/vfscanf.cpp b/libc/src/stdio/vfscanf.cpp new file mode 100644 index 00000000000000..220576522d0fdb --- /dev/null +++ b/libc/src/stdio/vfscanf.cpp @@ -0,0 +1,34 @@ +//===-- Implementation of vfscanf -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/vfscanf.h" + +#include "src/__support/File/file.h" +#include "src/__support/arg_list.h" +#include "src/__support/macros/config.h" +#include "src/stdio/scanf_core/vfscanf_internal.h" + +#include "hdr/types/FILE.h" +#include + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, vfscanf, + (::FILE *__restrict stream, const char *__restrict format, + va_list vlist)) { + internal::ArgList args(vlist); // This holder class allows for easier copying + // and pointer semantics, as well as handling + // destruction automatically. + va_end(vlist); + int ret_val = scanf_core::vfscanf_internal(stream, format, args); + // This is done to avoid including stdio.h in the internals. On most systems + // EOF is -1, so this will be transformed into just "return ret_val". + return (ret_val == -1) ? EOF : ret_val; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/vfscanf.h b/libc/src/stdio/vfscanf.h new file mode 100644 index 00000000000000..1a0a12d9eb4cd3 --- /dev/null +++ b/libc/src/stdio/vfscanf.h @@ -0,0 +1,24 @@ +//===-- Implementation header of vfscanf ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_VFSCANF_H +#define LLVM_LIBC_SRC_STDIO_VFSCANF_H + +#include "hdr/types/FILE.h" +#include "src/__support/macros/config.h" + +#include + +namespace LIBC_NAMESPACE_DECL { + +int vfscanf(::FILE *__restrict stream, const char *__restrict format, + va_list vlist); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_STDIO_VFSCANF_H diff --git a/libc/src/stdio/vscanf.cpp b/libc/src/stdio/vscanf.cpp new file mode 100644 index 00000000000000..64f5cc1d6962a1 --- /dev/null +++ b/libc/src/stdio/vscanf.cpp @@ -0,0 +1,40 @@ +//===-- Implementation of vscanf --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/vscanf.h" + +#include "src/__support/File/file.h" +#include "src/__support/arg_list.h" +#include "src/__support/macros/config.h" +#include "src/stdio/scanf_core/vfscanf_internal.h" + +#include "hdr/types/FILE.h" +#include + +#ifndef LIBC_COPT_STDIO_USE_SYSTEM_FILE +#define SCANF_STDIN LIBC_NAMESPACE::stdin +#else // LIBC_COPT_STDIO_USE_SYSTEM_FILE +#define SCANF_STDIN ::stdin +#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, vscanf, + (const char *__restrict format, va_list vlist)) { + internal::ArgList args(vlist); // This holder class allows for easier copying + // and pointer semantics, as well as handling + // destruction automatically. + va_end(vlist); + int ret_val = scanf_core::vfscanf_internal( + reinterpret_cast<::FILE *>(SCANF_STDIN), format, args); + // This is done to avoid including stdio.h in the internals. On most systems + // EOF is -1, so this will be transformed into just "return ret_val". + return (ret_val == -1) ? EOF : ret_val; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/vscanf.h b/libc/src/stdio/vscanf.h new file mode 100644 index 00000000000000..5c59b91128ea32 --- /dev/null +++ b/libc/src/stdio/vscanf.h @@ -0,0 +1,23 @@ +//===-- Implementation header of vscanf -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_VSCANF_H +#define LLVM_LIBC_SRC_STDIO_VSCANF_H + +#include "hdr/types/FILE.h" +#include "src/__support/macros/config.h" + +#include + +namespace LIBC_NAMESPACE_DECL { + +int vscanf(const char *__restrict format, va_list vlist); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_STDIO_VSCANF_H diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt index 8b05b928a02695..d7b88186b5704a 100644 --- a/libc/test/src/stdio/CMakeLists.txt +++ b/libc/test/src/stdio/CMakeLists.txt @@ -286,6 +286,20 @@ add_libc_test( ${use_system_file} ) +add_libc_test( + vfscanf_test + SUITE + libc_stdio_unittests + SRCS + vfscanf_test.cpp + DEPENDS + libc.src.stdio.vfscanf + ${fscanf_test_deps} + libc.src.__support.CPP.string_view + COMPILE_OPTIONS + ${use_system_file} +) + if(LIBC_CONF_SCANF_DISABLE_FLOAT) list(APPEND sscanf_test_copts "-DLIBC_COPT_SCANF_DISABLE_FLOAT") endif() diff --git a/libc/test/src/stdio/vfscanf_test.cpp b/libc/test/src/stdio/vfscanf_test.cpp new file mode 100644 index 00000000000000..7a9cbf7f123880 --- /dev/null +++ b/libc/test/src/stdio/vfscanf_test.cpp @@ -0,0 +1,98 @@ +//===-- Unittests for vfscanf ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/string_view.h" + +#ifndef LIBC_COPT_STDIO_USE_SYSTEM_FILE +#include "src/stdio/fclose.h" +#include "src/stdio/ferror.h" +#include "src/stdio/fopen.h" +#include "src/stdio/fwrite.h" +#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE + +#include "src/stdio/vfscanf.h" + +#include "test/UnitTest/Test.h" + +#include + +namespace scanf_test { +#ifndef LIBC_COPT_STDIO_USE_SYSTEM_FILE +using LIBC_NAMESPACE::fclose; +using LIBC_NAMESPACE::ferror; +using LIBC_NAMESPACE::fopen; +using LIBC_NAMESPACE::fwrite; +#else // defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE) +using ::fclose; +using ::ferror; +using ::fopen; +using ::fwrite; +#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE +} // namespace scanf_test + +static int call_vfscanf(::FILE *stream, const char *__restrict format, ...) { + va_list vlist; + va_start(vlist, format); + int ret = LIBC_NAMESPACE::vfscanf(stream, format, vlist); + va_end(vlist); + return ret; +} + +TEST(LlvmLibcFScanfTest, WriteToFile) { + const char *FILENAME = "fscanf_output.test"; + auto FILE_PATH = libc_make_test_file_path(FILENAME); + ::FILE *file = scanf_test::fopen(FILE_PATH, "w"); + ASSERT_FALSE(file == nullptr); + + int read; + + constexpr char simple[] = "A simple string with no conversions.\n"; + + ASSERT_EQ(sizeof(simple) - 1, + scanf_test::fwrite(simple, 1, sizeof(simple) - 1, file)); + + constexpr char numbers[] = "1234567890\n"; + + ASSERT_EQ(sizeof(numbers) - 1, + scanf_test::fwrite(numbers, 1, sizeof(numbers) - 1, file)); + + constexpr char numbers_and_more[] = "1234 and more\n"; + + ASSERT_EQ(sizeof(numbers_and_more) - 1, + scanf_test::fwrite(numbers_and_more, 1, + sizeof(numbers_and_more) - 1, file)); + + read = call_vfscanf(file, "Reading from a write-only file should fail."); + EXPECT_LT(read, 0); + + ASSERT_EQ(0, scanf_test::fclose(file)); + + file = scanf_test::fopen(FILE_PATH, "r"); + ASSERT_FALSE(file == nullptr); + + char data[50]; + read = call_vfscanf(file, "%[A-Za-z .\n]", data); + ASSERT_EQ(read, 1); + ASSERT_STREQ(simple, data); + + read = call_vfscanf(file, "%s", data); + ASSERT_EQ(read, 1); + ASSERT_EQ(LIBC_NAMESPACE::cpp::string_view(numbers, 10), + LIBC_NAMESPACE::cpp::string_view(data)); + + // The format string starts with a space to handle the fact that the %s leaves + // a trailing \n and %c doesn't strip leading whitespace. + read = call_vfscanf(file, " %50c", data); + ASSERT_EQ(read, 1); + ASSERT_EQ( + LIBC_NAMESPACE::cpp::string_view(numbers_and_more), + LIBC_NAMESPACE::cpp::string_view(data, sizeof(numbers_and_more) - 1)); + + ASSERT_EQ(scanf_test::ferror(file), 0); + ASSERT_EQ(scanf_test::fclose(file), 0); +} diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index a1506e115fe70f..f6d3142c1e2d3e 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -442,7 +442,7 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_inplace_vector`` *unimplemented* ---------------------------------------------------------- ----------------- - ``__cpp_lib_is_virtual_base_of`` *unimplemented* + ``__cpp_lib_is_virtual_base_of`` ``202406L`` ---------------------------------------------------------- ----------------- ``__cpp_lib_is_within_lifetime`` *unimplemented* ---------------------------------------------------------- ----------------- diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst index fe9f4c1973cdb4..bc28f380945bc3 100644 --- a/libcxx/docs/ReleaseNotes/20.rst +++ b/libcxx/docs/ReleaseNotes/20.rst @@ -38,7 +38,7 @@ What's New in Libc++ 20.0.0? Implemented Papers ------------------ -- TODO +- P2985R0: A type trait for detecting virtual base classes (`Github `__) Improvements and New Features diff --git a/libcxx/docs/Status/Cxx2cPapers.csv b/libcxx/docs/Status/Cxx2cPapers.csv index dd62bcc2555ffc..d95cb11f483c00 100644 --- a/libcxx/docs/Status/Cxx2cPapers.csv +++ b/libcxx/docs/Status/Cxx2cPapers.csv @@ -68,7 +68,7 @@ "`P2389R2 `__","``dextents`` Index Type Parameter","2024-06 (St. Louis)","|Complete|","19.0","" "`P3168R2 `__","Give ``std::optional`` Range Support","2024-06 (St. Louis)","","","|ranges|" "`P3217R0 `__","Adjoints to 'Enabling list-initialization for algorithms': find_last","2024-06 (St. Louis)","","","" -"`P2985R0 `__","A type trait for detecting virtual base classes","2024-06 (St. Louis)","","","" +"`P2985R0 `__","A type trait for detecting virtual base classes","2024-06 (St. Louis)","|Complete|","20.0","" "`P0843R14 `__","``inplace_vector``","2024-06 (St. Louis)","","","" "`P3235R3 `__","``std::print`` more types faster with less memory","2024-06 (St. Louis)","","","|format| |DR|" "`P2968R2 `__","Make ``std::ignore`` a first-class object","2024-06 (St. Louis)","|Complete|","19.0","" diff --git a/libcxx/include/__type_traits/is_base_of.h b/libcxx/include/__type_traits/is_base_of.h index 090abeeb54dccb..488b63719eb600 100644 --- a/libcxx/include/__type_traits/is_base_of.h +++ b/libcxx/include/__type_traits/is_base_of.h @@ -26,6 +26,18 @@ template inline constexpr bool is_base_of_v = __is_base_of(_Bp, _Dp); #endif +#if _LIBCPP_STD_VER >= 26 +# if __has_builtin(__builtin_is_virtual_base_of) + +template +struct _LIBCPP_TEMPLATE_VIS is_virtual_base_of : public bool_constant<__builtin_is_virtual_base_of(_Base, _Derived)> {}; + +template +inline constexpr bool is_virtual_base_of_v = __builtin_is_virtual_base_of(_Base, _Derived); + +# endif +#endif + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___TYPE_TRAITS_IS_BASE_OF_H diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list index b14d2cb6c78036..b8e3d05588f96e 100644 --- a/libcxx/include/forward_list +++ b/libcxx/include/forward_list @@ -620,8 +620,7 @@ inline void __forward_list_base<_Tp, _Alloc>::swap(__forward_list_base& __x) _NOEXCEPT_(!__node_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v<__node_allocator>) #endif { - std::__swap_allocator( - __alloc(), __x.__alloc(), integral_constant()); + std::__swap_allocator(__alloc(), __x.__alloc()); using std::swap; swap(__before_begin()->__next_, __x.__before_begin()->__next_); } diff --git a/libcxx/include/string b/libcxx/include/string index 6e93a6230cc2c0..cdc1afedbdf52f 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -1213,7 +1213,7 @@ public: } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 operator __self_view() const _NOEXCEPT { - return __self_view(data(), size()); + return __self_view(typename __self_view::__assume_valid(), data(), size()); } _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_STRING_INTERNAL_MEMORY_ACCESS basic_string& @@ -1822,7 +1822,7 @@ public: #if _LIBCPP_STD_VER >= 20 constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(__self_view __sv) const noexcept { - return __self_view(data(), size()).starts_with(__sv); + return __self_view(typename __self_view::__assume_valid(), data(), size()).starts_with(__sv); } constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(value_type __c) const noexcept { @@ -1834,7 +1834,7 @@ public: } constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(__self_view __sv) const noexcept { - return __self_view(data(), size()).ends_with(__sv); + return __self_view(typename __self_view::__assume_valid(), data(), size()).ends_with(__sv); } constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(value_type __c) const noexcept { @@ -1848,15 +1848,15 @@ public: #if _LIBCPP_STD_VER >= 23 constexpr _LIBCPP_HIDE_FROM_ABI bool contains(__self_view __sv) const noexcept { - return __self_view(data(), size()).contains(__sv); + return __self_view(typename __self_view::__assume_valid(), data(), size()).contains(__sv); } constexpr _LIBCPP_HIDE_FROM_ABI bool contains(value_type __c) const noexcept { - return __self_view(data(), size()).contains(__c); + return __self_view(typename __self_view::__assume_valid(), data(), size()).contains(__c); } constexpr _LIBCPP_HIDE_FROM_ABI bool contains(const value_type* __s) const { - return __self_view(data(), size()).contains(__s); + return __self_view(typename __self_view::__assume_valid(), data(), size()).contains(__s); } #endif diff --git a/libcxx/include/string_view b/libcxx/include/string_view index 2a03ee99e9ab52..cf97e3a9be314d 100644 --- a/libcxx/include/string_view +++ b/libcxx/include/string_view @@ -211,6 +211,7 @@ namespace std { #include <__functional/hash.h> #include <__functional/unary_function.h> #include <__fwd/ostream.h> +#include <__fwd/string.h> #include <__fwd/string_view.h> #include <__iterator/bounded_iter.h> #include <__iterator/concepts.h> @@ -689,6 +690,9 @@ private: const value_type* __data_; size_type __size_; + + template + friend class basic_string; }; _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(basic_string_view); diff --git a/libcxx/include/type_traits b/libcxx/include/type_traits index 7f231cd09df510..5937d4fdc9e1a7 100644 --- a/libcxx/include/type_traits +++ b/libcxx/include/type_traits @@ -144,6 +144,7 @@ namespace std // Relationships between types: template struct is_same; template struct is_base_of; + template struct is_virtual_base_of; // C++26 template struct is_convertible; template struct is_nothrow_convertible; // C++20 @@ -391,6 +392,8 @@ namespace std = is_same::value; // C++17 template inline constexpr bool is_base_of_v = is_base_of::value; // C++17 + template inline constexpr bool is_virtual_base_of_v + = is_virtual_base_of::value; // C++26 template inline constexpr bool is_convertible_v = is_convertible::value; // C++17 template inline constexpr bool is_invocable_v diff --git a/libcxx/include/vector b/libcxx/include/vector index 3aa23d8fc1e243..a858f458f44308 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -1821,8 +1821,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::swap(vector& __x) std::swap(this->__begin_, __x.__begin_); std::swap(this->__end_, __x.__end_); std::swap(this->__end_cap(), __x.__end_cap()); - std::__swap_allocator( - this->__alloc(), __x.__alloc(), integral_constant()); + std::__swap_allocator(this->__alloc(), __x.__alloc()); } template @@ -2820,8 +2819,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector::swap(vector& __x) std::swap(this->__begin_, __x.__begin_); std::swap(this->__size_, __x.__size_); std::swap(this->__cap(), __x.__cap()); - std::__swap_allocator( - this->__alloc(), __x.__alloc(), integral_constant()); + std::__swap_allocator(this->__alloc(), __x.__alloc()); } template diff --git a/libcxx/include/version b/libcxx/include/version index fe64343eafbc9c..a19be2d294afd3 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -531,7 +531,9 @@ __cpp_lib_void_t 201411L // # define __cpp_lib_generate_random 202403L // # define __cpp_lib_hazard_pointer 202306L // # define __cpp_lib_inplace_vector 202406L -// # define __cpp_lib_is_virtual_base_of 202406L +# if __has_builtin(__builtin_is_virtual_base_of) +# define __cpp_lib_is_virtual_base_of 202406L +# endif // # define __cpp_lib_is_within_lifetime 202306L // # define __cpp_lib_linalg 202311L # undef __cpp_lib_mdspan diff --git a/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp b/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp index 2dc7f5c7654193..db17221e515d3a 100644 --- a/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp +++ b/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include "test_macros.h" diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.compile.pass.cpp index bb69ca7368aafa..1cbf2699a95bcc 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.compile.pass.cpp @@ -857,16 +857,16 @@ # error "__cpp_lib_is_swappable should have the value 201603L in c++26" # endif -# if !defined(_LIBCPP_VERSION) +# if __has_builtin(__builtin_is_virtual_base_of) # ifndef __cpp_lib_is_virtual_base_of # error "__cpp_lib_is_virtual_base_of should be defined in c++26" # endif # if __cpp_lib_is_virtual_base_of != 202406L # error "__cpp_lib_is_virtual_base_of should have the value 202406L in c++26" # endif -# else // _LIBCPP_VERSION +# else # ifdef __cpp_lib_is_virtual_base_of -# error "__cpp_lib_is_virtual_base_of should not be defined because it is unimplemented in libc++!" +# error "__cpp_lib_is_virtual_base_of should not be defined when the requirement '__has_builtin(__builtin_is_virtual_base_of)' is not met!" # endif # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp index b8bad696f1bae0..bd2959d55dc20d 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp @@ -7172,16 +7172,16 @@ # error "__cpp_lib_is_swappable should have the value 201603L in c++26" # endif -# if !defined(_LIBCPP_VERSION) +# if __has_builtin(__builtin_is_virtual_base_of) # ifndef __cpp_lib_is_virtual_base_of # error "__cpp_lib_is_virtual_base_of should be defined in c++26" # endif # if __cpp_lib_is_virtual_base_of != 202406L # error "__cpp_lib_is_virtual_base_of should have the value 202406L in c++26" # endif -# else // _LIBCPP_VERSION +# else # ifdef __cpp_lib_is_virtual_base_of -# error "__cpp_lib_is_virtual_base_of should not be defined because it is unimplemented in libc++!" +# error "__cpp_lib_is_virtual_base_of should not be defined when the requirement '__has_builtin(__builtin_is_virtual_base_of)' is not met!" # endif # endif diff --git a/libcxx/test/std/utilities/meta/meta.rel/is_virtual_base_of.pass.cpp b/libcxx/test/std/utilities/meta/meta.rel/is_virtual_base_of.pass.cpp new file mode 100644 index 00000000000000..6b34d56e2c6f45 --- /dev/null +++ b/libcxx/test/std/utilities/meta/meta.rel/is_virtual_base_of.pass.cpp @@ -0,0 +1,166 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23 + +// These compilers don't support __builtin_is_virtual_base_of yet. +// UNSUPPORTED: clang-17, clang-18, clang-19, gcc-14, apple-clang-16, apple-clang-17 + +// + +// std::is_virtual_base_of + +#include +#include + +template +void test() { + // Test the type of the variables + { + static_assert(std::is_same_v::value)>); + static_assert(std::is_same_v)>); + } + + // Test their value + { + static_assert(std::is_virtual_base_of::value == expected); + static_assert(std::is_virtual_base_of::value == expected); + static_assert(std::is_virtual_base_of::value == expected); + static_assert(std::is_virtual_base_of::value == expected); + + static_assert(std::is_virtual_base_of_v == expected); + static_assert(std::is_virtual_base_of_v == expected); + static_assert(std::is_virtual_base_of_v == expected); + static_assert(std::is_virtual_base_of_v == expected); + } + + // Check the relationship with is_base_of. If it's not a base of, it can't be a virtual base of. + { static_assert(!std::is_base_of_v ? !std::is_virtual_base_of_v : true); } + + // Make sure they can be referenced at runtime + { + bool const& a = std::is_virtual_base_of::value; + bool const& b = std::is_virtual_base_of_v; + assert(a == expected); + assert(b == expected); + } +} + +struct Incomplete; +struct Unrelated {}; +union IncompleteUnion; +union Union { + int i; + float f; +}; + +class Base {}; +class Derived : Base {}; +class Derived2 : Base {}; +class Derived2a : Derived {}; +class Derived2b : Derived {}; +class Derived3Virtual : virtual Derived2a, virtual Derived2b {}; + +struct DerivedTransitiveViaNonVirtual : Derived3Virtual {}; +struct DerivedTransitiveViaVirtual : virtual Derived3Virtual {}; + +template +struct CrazyDerived : T {}; +template +struct CrazyDerivedVirtual : virtual T {}; + +struct DerivedPrivate : private virtual Base {}; +struct DerivedProtected : protected virtual Base {}; +struct DerivedPrivatePrivate : private DerivedPrivate {}; +struct DerivedPrivateProtected : private DerivedProtected {}; +struct DerivedProtectedPrivate : protected DerivedProtected {}; +struct DerivedProtectedProtected : protected DerivedProtected {}; +struct DerivedTransitivePrivate : private Derived, private Derived2 {}; + +int main(int, char**) { + // Test with non-virtual inheritance + { + test(); + test(); + test(); + test(); + test(); + test(); + + // Derived must be a complete type if Base and Derived are non-union class types + // test(); + } + + // Test with virtual inheritance + { + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test>(); + test, Base>(); + test>(); + test, Base>(); + } + + // Test unrelated types + { + test(); + test(); + test(); + test(); + test(); + test(); + } + + // Test scalar types + { + test(); + test(); + test(); + test(); + + test(); + test(); + test(); + + test(); + test(); + test(); + } + + // Test unions + { + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + } + + return 0; +} diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp index 578efb90f7f1ab..9a5efe7b5fe32f 100644 --- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp @@ -228,8 +228,8 @@ int main(int, char**) // But the rvalue to lvalue reference binding isn't allowed according to // [over.match.ref] despite Clang accepting it. test_is_constructible>(); -#ifndef TEST_COMPILER_GCC - test_is_constructible>(); +#ifndef TEST_COMPILER_CLANG + test_is_not_constructible>(); #endif static_assert(std::is_constructible>::value, ""); diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index b041b08f02aac5..f402d4de2275e5 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -784,7 +784,8 @@ def add_version_header(tc): "c++26": 202406 # P2985R0 A type trait for detecting virtual base classes }, "headers": ["type_traits"], - "unimplemented": True, + "test_suite_guard": "__has_builtin(__builtin_is_virtual_base_of)", + "libcxx_guard": "__has_builtin(__builtin_is_virtual_base_of)", }, { "name": "__cpp_lib_is_within_lifetime", diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index 1dfff0a90f4aee..a5f155bc05bc9e 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -494,20 +494,8 @@ void SymbolTable::resolveRemainingUndefines() { StringRef name = undef->getName(); // A weak alias may have been resolved, so check for that. - if (Defined *d = undef->getWeakAlias()) { - // We want to replace Sym with D. However, we can't just blindly - // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an - // internal symbol, and internal symbols are stored as "unparented" - // Symbols. For that reason we need to check which type of symbol we - // are dealing with and copy the correct number of bytes. - if (isa(d)) - memcpy(sym, d, sizeof(DefinedRegular)); - else if (isa(d)) - memcpy(sym, d, sizeof(DefinedAbsolute)); - else - memcpy(sym, d, sizeof(SymbolUnion)); + if (undef->resolveWeakAlias()) continue; - } // If we can resolve a symbol by removing __imp_ prefix, do that. // This odd rule is for compatibility with MSVC linker. diff --git a/lld/COFF/Symbols.cpp b/lld/COFF/Symbols.cpp index ff8ad1e619116f..b098abb80d6f1e 100644 --- a/lld/COFF/Symbols.cpp +++ b/lld/COFF/Symbols.cpp @@ -136,6 +136,29 @@ Defined *Undefined::getWeakAlias() { return nullptr; } +bool Undefined::resolveWeakAlias() { + Defined *d = getWeakAlias(); + if (!d) + return false; + + // We want to replace Sym with D. However, we can't just blindly + // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an + // internal symbol, and internal symbols are stored as "unparented" + // Symbols. For that reason we need to check which type of symbol we + // are dealing with and copy the correct number of bytes. + StringRef name = getName(); + if (isa(d)) + memcpy(this, d, sizeof(DefinedRegular)); + else if (isa(d)) + memcpy(this, d, sizeof(DefinedAbsolute)); + else + memcpy(this, d, sizeof(SymbolUnion)); + + nameData = name.data(); + nameSize = name.size(); + return true; +} + MemoryBufferRef LazyArchive::getMemberBuffer() { Archive::Child c = CHECK(sym.getMember(), "could not get the member for symbol " + diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h index 56b137d56873aa..c427a062dc82b2 100644 --- a/lld/COFF/Symbols.h +++ b/lld/COFF/Symbols.h @@ -341,6 +341,9 @@ class Undefined : public Symbol { // symbol by searching the chain of fallback symbols. Returns the symbol if // successful, otherwise returns null. Defined *getWeakAlias(); + + // If this symbol is external weak, replace this object with aliased symbol. + bool resolveWeakAlias(); }; // Windows-specific classes. diff --git a/lld/test/COFF/symtab.test b/lld/test/COFF/symtab.test index 45e8ed39737a46..6ef2b4d47503c7 100644 --- a/lld/test/COFF/symtab.test +++ b/lld/test/COFF/symtab.test @@ -86,6 +86,15 @@ # CHECK-NEXT: StorageClass: External (0x2) # CHECK-NEXT: AuxSymbolCount: 0 # CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: weak_main +# CHECK-NEXT: Value: 0 +# CHECK-NEXT: Section: .text (1) +# CHECK-NEXT: BaseType: Null (0x0) +# CHECK-NEXT: ComplexType: Null (0x0) +# CHECK-NEXT: StorageClass: External (0x2) +# CHECK-NEXT: AuxSymbolCount: 0 +# CHECK-NEXT: } # CHECK-NEXT: ] # NO: Symbols [ @@ -237,4 +246,13 @@ symbols: SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_NULL StorageClass: IMAGE_SYM_CLASS_LABEL + - Name: weak_main + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_WEAK_EXTERNAL + WeakExternal: + TagIndex: 10 + Characteristics: IMAGE_WEAK_EXTERN_SEARCH_ALIAS ... diff --git a/lldb/include/lldb/Host/Socket.h b/lldb/include/lldb/Host/Socket.h index 573c881f727d8f..304a91bdf6741b 100644 --- a/lldb/include/lldb/Host/Socket.h +++ b/lldb/include/lldb/Host/Socket.h @@ -19,6 +19,7 @@ #include "lldb/Utility/Status.h" #ifdef _WIN32 +#include "lldb/Host/Pipe.h" #include "lldb/Host/windows/windows.h" #include #include @@ -32,12 +33,35 @@ namespace lldb_private { #if defined(_WIN32) typedef SOCKET NativeSocket; +typedef lldb::pipe_t shared_fd_t; #else typedef int NativeSocket; +typedef NativeSocket shared_fd_t; #endif +class Socket; class TCPSocket; class UDPSocket; +class SharedSocket { +public: + static const shared_fd_t kInvalidFD; + + SharedSocket(const Socket *socket, Status &error); + + shared_fd_t GetSendableFD() { return m_fd; } + + Status CompleteSending(lldb::pid_t child_pid); + + static Status GetNativeSocket(shared_fd_t fd, NativeSocket &socket); + +private: +#ifdef _WIN32 + Pipe m_socket_pipe; + NativeSocket m_socket; +#endif + shared_fd_t m_fd; +}; + class Socket : public IOObject { public: enum SocketProtocol { diff --git a/lldb/include/lldb/Utility/Status.h b/lldb/include/lldb/Utility/Status.h index fa5768141fa45d..a80ebe89e562dd 100644 --- a/lldb/include/lldb/Utility/Status.h +++ b/lldb/include/lldb/Utility/Status.h @@ -181,11 +181,12 @@ class Status { bool Success() const; protected: - /// Member variables - ValueType m_code = 0; ///< Status code as an integer value. - lldb::ErrorType m_type = - lldb::eErrorTypeInvalid; ///< The type of the above error code. - mutable std::string m_string; ///< A string representation of the error code. + /// Status code as an integer value. + ValueType m_code = 0; + /// The type of the above error code. + lldb::ErrorType m_type = lldb::eErrorTypeInvalid; + /// A string representation of the error code. + mutable std::string m_string; private: explicit Status(const llvm::formatv_object_base &payload) { SetErrorToGenericError(); diff --git a/lldb/source/Host/common/Socket.cpp b/lldb/source/Host/common/Socket.cpp index 7364a12280cfdd..aabd562b0557c6 100644 --- a/lldb/source/Host/common/Socket.cpp +++ b/lldb/source/Host/common/Socket.cpp @@ -56,10 +56,12 @@ using namespace lldb_private; typedef const char *set_socket_option_arg_type; typedef char *get_socket_option_arg_type; const NativeSocket Socket::kInvalidSocketValue = INVALID_SOCKET; +const shared_fd_t SharedSocket::kInvalidFD = LLDB_INVALID_PIPE; #else // #if defined(_WIN32) typedef const void *set_socket_option_arg_type; typedef void *get_socket_option_arg_type; const NativeSocket Socket::kInvalidSocketValue = -1; +const shared_fd_t SharedSocket::kInvalidFD = Socket::kInvalidSocketValue; #endif // #if defined(_WIN32) static bool IsInterrupted() { @@ -70,6 +72,80 @@ static bool IsInterrupted() { #endif } +SharedSocket::SharedSocket(const Socket *socket, Status &error) { +#ifdef _WIN32 + m_socket = socket->GetNativeSocket(); + m_fd = kInvalidFD; + + // Create a pipe to transfer WSAPROTOCOL_INFO to the child process. + error = m_socket_pipe.CreateNew(true); + if (error.Fail()) + return; + + m_fd = m_socket_pipe.GetReadPipe(); +#else + m_fd = socket->GetNativeSocket(); + error = Status(); +#endif +} + +Status SharedSocket::CompleteSending(lldb::pid_t child_pid) { +#ifdef _WIN32 + // Transfer WSAPROTOCOL_INFO to the child process. + m_socket_pipe.CloseReadFileDescriptor(); + + WSAPROTOCOL_INFO protocol_info; + if (::WSADuplicateSocket(m_socket, child_pid, &protocol_info) == + SOCKET_ERROR) { + int last_error = ::WSAGetLastError(); + return Status("WSADuplicateSocket() failed, error: %d", last_error); + } + + size_t num_bytes; + Status error = + m_socket_pipe.WriteWithTimeout(&protocol_info, sizeof(protocol_info), + std::chrono::seconds(10), num_bytes); + if (error.Fail()) + return error; + if (num_bytes != sizeof(protocol_info)) + return Status("WriteWithTimeout(WSAPROTOCOL_INFO) failed: %d bytes", + num_bytes); +#endif + return Status(); +} + +Status SharedSocket::GetNativeSocket(shared_fd_t fd, NativeSocket &socket) { +#ifdef _WIN32 + socket = Socket::kInvalidSocketValue; + // Read WSAPROTOCOL_INFO from the parent process and create NativeSocket. + WSAPROTOCOL_INFO protocol_info; + { + Pipe socket_pipe(fd, LLDB_INVALID_PIPE); + size_t num_bytes; + Status error = + socket_pipe.ReadWithTimeout(&protocol_info, sizeof(protocol_info), + std::chrono::seconds(10), num_bytes); + if (error.Fail()) + return error; + if (num_bytes != sizeof(protocol_info)) { + return Status( + "socket_pipe.ReadWithTimeout(WSAPROTOCOL_INFO) failed: % d bytes", + num_bytes); + } + } + socket = ::WSASocket(FROM_PROTOCOL_INFO, FROM_PROTOCOL_INFO, + FROM_PROTOCOL_INFO, &protocol_info, 0, 0); + if (socket == INVALID_SOCKET) { + return Status("WSASocket(FROM_PROTOCOL_INFO) failed: error %d", + ::WSAGetLastError()); + } + return Status(); +#else + socket = fd; + return Status(); +#endif +} + struct SocketScheme { const char *m_scheme; const Socket::SocketProtocol m_protocol; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp index 3bb303c0e1931f..6de851081598fd 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp @@ -52,8 +52,8 @@ static bool DefaultComputeClangResourceDirectory(FileSpec &lldb_shlib_spec, Log *log = GetLog(LLDBLog::Host); std::string raw_path = lldb_shlib_spec.GetPath(); llvm::StringRef parent_dir = llvm::sys::path::parent_path(raw_path); - const std::string clang_resource_path = - clang::driver::Driver::GetResourcesPath("bin/lldb", CLANG_RESOURCE_DIR); + static const std::string clang_resource_path = + clang::driver::Driver::GetResourcesPath("bin/lldb"); static const llvm::StringRef kResourceDirSuffixes[] = { // LLVM.org's build of LLDB uses the clang resource directory placed diff --git a/lldb/tools/lldb-server/lldb-platform.cpp b/lldb/tools/lldb-server/lldb-platform.cpp index 82a3a0d6b4e51c..75f51132aa9cc6 100644 --- a/lldb/tools/lldb-server/lldb-platform.cpp +++ b/lldb/tools/lldb-server/lldb-platform.cpp @@ -47,108 +47,6 @@ using namespace llvm; // option descriptors for getopt_long_only() -#ifdef _WIN32 -typedef pipe_t shared_fd_t; -const shared_fd_t kInvalidSharedFD = LLDB_INVALID_PIPE; -#else -typedef NativeSocket shared_fd_t; -const shared_fd_t kInvalidSharedFD = Socket::kInvalidSocketValue; -#endif - -class SharedSocket { -public: - SharedSocket(Connection *conn, Status &error) { - m_fd = kInvalidSharedFD; - - const Socket *socket = - static_cast(conn->GetReadObject().get()); - if (socket == nullptr) { - error = Status("invalid conn socket"); - return; - } - -#ifdef _WIN32 - m_socket = socket->GetNativeSocket(); - - // Create a pipe to transfer WSAPROTOCOL_INFO to the child process. - error = m_socket_pipe.CreateNew(true); - if (error.Fail()) - return; - - m_fd = m_socket_pipe.GetReadPipe(); -#else - m_fd = socket->GetNativeSocket(); - error = Status(); -#endif - } - - shared_fd_t GetSendableFD() { return m_fd; } - - Status CompleteSending(lldb::pid_t child_pid) { -#ifdef _WIN32 - // Transfer WSAPROTOCOL_INFO to the child process. - m_socket_pipe.CloseReadFileDescriptor(); - - WSAPROTOCOL_INFO protocol_info; - if (::WSADuplicateSocket(m_socket, child_pid, &protocol_info) == - SOCKET_ERROR) { - int last_error = ::WSAGetLastError(); - return Status("WSADuplicateSocket() failed, error: %d", last_error); - } - - size_t num_bytes; - Status error = - m_socket_pipe.WriteWithTimeout(&protocol_info, sizeof(protocol_info), - std::chrono::seconds(10), num_bytes); - if (error.Fail()) - return error; - if (num_bytes != sizeof(protocol_info)) - return Status("WriteWithTimeout(WSAPROTOCOL_INFO) failed: %d bytes", - num_bytes); -#endif - return Status(); - } - - static Status GetNativeSocket(shared_fd_t fd, NativeSocket &socket) { -#ifdef _WIN32 - socket = Socket::kInvalidSocketValue; - // Read WSAPROTOCOL_INFO from the parent process and create NativeSocket. - WSAPROTOCOL_INFO protocol_info; - { - Pipe socket_pipe(fd, LLDB_INVALID_PIPE); - size_t num_bytes; - Status error = - socket_pipe.ReadWithTimeout(&protocol_info, sizeof(protocol_info), - std::chrono::seconds(10), num_bytes); - if (error.Fail()) - return error; - if (num_bytes != sizeof(protocol_info)) { - return Status( - "socket_pipe.ReadWithTimeout(WSAPROTOCOL_INFO) failed: % d bytes", - num_bytes); - } - } - socket = ::WSASocket(FROM_PROTOCOL_INFO, FROM_PROTOCOL_INFO, - FROM_PROTOCOL_INFO, &protocol_info, 0, 0); - if (socket == INVALID_SOCKET) { - return Status("WSASocket(FROM_PROTOCOL_INFO) failed: error %d", - ::WSAGetLastError()); - } - return Status(); -#else - socket = fd; - return Status(); -#endif - } - -private: -#ifdef _WIN32 - Pipe m_socket_pipe; - NativeSocket m_socket; -#endif - shared_fd_t m_fd; -}; - static int g_debug = 0; static int g_verbose = 0; static int g_server = 0; @@ -259,13 +157,13 @@ static void spawn_process_reaped(lldb::pid_t pid, int signal, int status) { gdbserver_portmap.FreePortForProcess(pid); } -static Status spawn_process(const char *progname, Connection *conn, +static Status spawn_process(const char *progname, const Socket *conn_socket, uint16_t gdb_port, uint16_t port_offset, const lldb_private::Args &args, const std::string &log_file, const StringRef log_channels) { Status error; - SharedSocket shared_socket(conn, error); + SharedSocket shared_socket(conn_socket, error); if (error.Fail()) return error; @@ -363,7 +261,7 @@ int main_platform(int argc, char *argv[]) { StringRef log_channels; // e.g. "lldb process threads:gdb-remote default:linux all" - shared_fd_t fd = kInvalidSharedFD; + shared_fd_t fd = SharedSocket::kInvalidFD; int min_gdbserver_port = 0; int max_gdbserver_port = 0; @@ -480,7 +378,7 @@ int main_platform(int argc, char *argv[]) { } // Print usage and exit if no listening port is specified. - if (listen_host_port.empty() && fd == kInvalidSharedFD) + if (listen_host_port.empty() && fd == SharedSocket::kInvalidFD) show_usage = true; if (show_usage || option_error) { @@ -494,7 +392,7 @@ int main_platform(int argc, char *argv[]) { lldb_private::Args inferior_arguments; inferior_arguments.SetArguments(argc, const_cast(argv)); - if (fd != kInvalidSharedFD) { + if (fd != SharedSocket::kInvalidFD) { // Child process will handle the connection and exit. Log *log = GetLog(LLDBLog::Platform); if (!listen_host_port.empty()) { @@ -510,13 +408,14 @@ int main_platform(int argc, char *argv[]) { return socket_error; } - Connection *conn = - new ConnectionFileDescriptor(new TCPSocket(socket, true, false)); GDBRemoteCommunicationServerPlatform platform(Socket::ProtocolTcp, "tcp"); if (port_offset > 0) platform.SetPortOffset(port_offset); platform.SetPortMap(std::move(gdbserver_portmap)); - platform.SetConnection(std::unique_ptr(conn)); + platform.SetConnection( + std::unique_ptr(new ConnectionFileDescriptor( + new TCPSocket(socket, /*should_close=*/true, + /*child_processes_inherit=*/false)))); client_handle(platform, inferior_arguments); return 0; } @@ -578,8 +477,11 @@ int main_platform(int argc, char *argv[]) { fprintf(stderr, "no available gdbserver port for connection - dropping...\n"); } else { - error = spawn_process(progname, conn, *available_port, port_offset, - inferior_arguments, log_file, log_channels); + const Socket *conn_socket = + static_cast(conn->GetReadObject().get()); + error = + spawn_process(progname, conn_socket, *available_port, port_offset, + inferior_arguments, log_file, log_channels); if (error.Fail()) { { diff --git a/lldb/unittests/Expression/ClangParserTest.cpp b/lldb/unittests/Expression/ClangParserTest.cpp index 6f682f6c97fdb5..fab4487c737195 100644 --- a/lldb/unittests/Expression/ClangParserTest.cpp +++ b/lldb/unittests/Expression/ClangParserTest.cpp @@ -42,8 +42,8 @@ TEST_F(ClangHostTest, ComputeClangResourceDirectory) { #else std::string path_to_liblldb = "C:\\foo\\bar\\lib\\"; #endif - std::string path_to_clang_dir = clang::driver::Driver::GetResourcesPath( - path_to_liblldb + "liblldb", CLANG_RESOURCE_DIR); + std::string path_to_clang_dir = + clang::driver::Driver::GetResourcesPath(path_to_liblldb + "liblldb"); llvm::SmallString<256> path_to_clang_lib_dir_real; llvm::sys::fs::real_path(path_to_clang_dir, path_to_clang_lib_dir_real); diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h index 932a2a94ab1f1a..d5a63c8dd627a0 100644 --- a/llvm/include/llvm/CodeGen/CallingConvLower.h +++ b/llvm/include/llvm/CodeGen/CallingConvLower.h @@ -81,16 +81,16 @@ class CCValAssign { } public: - static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, + static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom = false) { CCValAssign Ret(HTP, ValNo, ValVT, LocVT, IsCustom); - Ret.Data = Register(RegNo); + Ret.Data = Register(Reg); return Ret; } - static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, + static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP) { - return getReg(ValNo, ValVT, RegNo, LocVT, HTP, /*IsCustom=*/true); + return getReg(ValNo, ValVT, Reg, LocVT, HTP, /*IsCustom=*/true); } static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, @@ -112,7 +112,7 @@ class CCValAssign { return Ret; } - void convertToReg(unsigned RegNo) { Data = Register(RegNo); } + void convertToReg(MCRegister Reg) { Data = Register(Reg); } void convertToMem(int64_t Offset) { Data = Offset; } @@ -346,7 +346,7 @@ class CCState { /// AllocateReg - Attempt to allocate one of the specified registers. If none /// are available, return zero. Otherwise, return the first one available, /// marking it and any aliases as allocated. - MCPhysReg AllocateReg(ArrayRef Regs) { + MCRegister AllocateReg(ArrayRef Regs) { unsigned FirstUnalloc = getFirstUnallocated(Regs); if (FirstUnalloc == Regs.size()) return MCRegister(); // Didn't find the reg. diff --git a/llvm/include/llvm/CodeGen/LiveVariables.h b/llvm/include/llvm/CodeGen/LiveVariables.h index b73850bb757ec3..89d1b5edf3fa63 100644 --- a/llvm/include/llvm/CodeGen/LiveVariables.h +++ b/llvm/include/llvm/CodeGen/LiveVariables.h @@ -253,8 +253,8 @@ class LiveVariables { return false; bool Removed = false; - for (MachineOperand &MO : MI.operands()) { - if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) { + for (MachineOperand &MO : MI.all_defs()) { + if (MO.getReg() == Reg) { MO.setIsDead(false); Removed = true; break; diff --git a/llvm/include/llvm/MC/MCInst.h b/llvm/include/llvm/MC/MCInst.h index 578b7328970b76..b3d615b4392f55 100644 --- a/llvm/include/llvm/MC/MCInst.h +++ b/llvm/include/llvm/MC/MCInst.h @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/bit.h" +#include "llvm/MC/MCRegister.h" #include "llvm/Support/SMLoc.h" #include #include @@ -66,15 +67,15 @@ class MCOperand { bool isInst() const { return Kind == kInst; } /// Returns the register number. - unsigned getReg() const { + MCRegister getReg() const { assert(isReg() && "This is not a register operand!"); return RegVal; } /// Set the register number. - void setReg(unsigned Reg) { + void setReg(MCRegister Reg) { assert(isReg() && "This is not a register operand!"); - RegVal = Reg; + RegVal = Reg.id(); } int64_t getImm() const { @@ -131,10 +132,10 @@ class MCOperand { InstVal = Val; } - static MCOperand createReg(unsigned Reg) { + static MCOperand createReg(MCRegister Reg) { MCOperand Op; Op.Kind = kRegister; - Op.RegVal = Reg; + Op.RegVal = Reg.id(); return Op; } diff --git a/llvm/include/llvm/MC/MCInstBuilder.h b/llvm/include/llvm/MC/MCInstBuilder.h index d06ed4c6c840a9..de45ffb4b2dc7c 100644 --- a/llvm/include/llvm/MC/MCInstBuilder.h +++ b/llvm/include/llvm/MC/MCInstBuilder.h @@ -34,7 +34,7 @@ class MCInstBuilder { } /// Add a new register operand. - MCInstBuilder &addReg(unsigned Reg) { + MCInstBuilder &addReg(MCRegister Reg) { Inst.addOperand(MCOperand::createReg(Reg)); return *this; } diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h index 13ad1c38f3b3b0..32905c1e9a424a 100644 --- a/llvm/include/llvm/MC/MCPseudoProbe.h +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -54,20 +54,21 @@ #ifndef LLVM_MC_MCPSEUDOPROBE_H #define LLVM_MC_MCPSEUDOPROBE_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator.h" #include "llvm/IR/PseudoProbe.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/ErrorOr.h" -#include -#include +#include #include #include #include #include #include -#include #include namespace llvm { @@ -86,7 +87,7 @@ enum class MCPseudoProbeFlag { struct MCPseudoProbeFuncDesc { uint64_t FuncGUID = 0; uint64_t FuncHash = 0; - std::string FuncName; + StringRef FuncName; MCPseudoProbeFuncDesc(uint64_t GUID, uint64_t Hash, StringRef Name) : FuncGUID(GUID), FuncHash(Hash), FuncName(Name){}; @@ -100,17 +101,24 @@ class MCDecodedPseudoProbe; using InlineSite = std::tuple; using MCPseudoProbeInlineStack = SmallVector; // GUID to PseudoProbeFuncDesc map -using GUIDProbeFunctionMap = - std::unordered_map; -// Address to pseudo probes map. -using AddressProbesMap = std::map>; +class GUIDProbeFunctionMap : public std::vector { +public: + auto find(uint64_t GUID) const { + auto CompareDesc = [](const MCPseudoProbeFuncDesc &Desc, uint64_t GUID) { + return Desc.FuncGUID < GUID; + }; + auto It = llvm::lower_bound(*this, GUID, CompareDesc); + if (It->FuncGUID != GUID) + return end(); + return It; + } +}; class MCDecodedPseudoProbeInlineTree; class MCPseudoProbeBase { protected: - uint64_t Guid; - uint64_t Index; + uint32_t Index; uint32_t Discriminator; uint8_t Attributes; uint8_t Type; @@ -120,14 +128,12 @@ class MCPseudoProbeBase { const static uint32_t PseudoProbeFirstId = 1; public: - MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T, uint32_t D) - : Guid(G), Index(I), Discriminator(D), Attributes(At), Type(T) {} + MCPseudoProbeBase(uint64_t I, uint64_t At, uint8_t T, uint32_t D) + : Index(I), Discriminator(D), Attributes(At), Type(T) {} bool isEntry() const { return Index == PseudoProbeFirstId; } - uint64_t getGuid() const { return Guid; } - - uint64_t getIndex() const { return Index; } + uint32_t getIndex() const { return Index; } uint32_t getDiscriminator() const { return Discriminator; } @@ -157,18 +163,20 @@ class MCPseudoProbeBase { /// uses an address from a temporary label created at the current address in the /// current section. class MCPseudoProbe : public MCPseudoProbeBase { + uint64_t Guid; MCSymbol *Label; public: MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attributes, uint32_t Discriminator) - : MCPseudoProbeBase(Guid, Index, Attributes, Type, Discriminator), + : MCPseudoProbeBase(Index, Attributes, Type, Discriminator), Guid(Guid), Label(Label) { assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8"); assert(Attributes <= 0xFF && "Probe attributes too big to encode, exceeding 2^16"); } + uint64_t getGuid() const { return Guid; }; MCSymbol *getLabel() const { return Label; } void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const; }; @@ -181,11 +189,11 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase { MCDecodedPseudoProbeInlineTree *InlineTree; public: - MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K, - uint8_t At, uint32_t D, - MCDecodedPseudoProbeInlineTree *Tree) - : MCPseudoProbeBase(G, I, At, static_cast(K), D), Address(Ad), + MCDecodedPseudoProbe(uint64_t Ad, uint32_t I, PseudoProbeType K, uint8_t At, + uint32_t D, MCDecodedPseudoProbeInlineTree *Tree) + : MCPseudoProbeBase(I, At, static_cast(K), D), Address(Ad), InlineTree(Tree){}; + uint64_t getGuid() const; uint64_t getAddress() const { return Address; } @@ -211,21 +219,39 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase { bool ShowName) const; }; -template -class MCPseudoProbeInlineTreeBase { - struct InlineSiteHash { - uint64_t operator()(const InlineSite &Site) const { - return std::get<0>(Site) ^ std::get<1>(Site); - } - }; +// Address to pseudo probes map. +class AddressProbesMap + : public std::vector> { + auto getIt(uint64_t Addr) const { + auto CompareProbe = [](const MCDecodedPseudoProbe &Probe, uint64_t Addr) { + return Probe.getAddress() < Addr; + }; + return llvm::lower_bound(*this, Addr, CompareProbe); + } +public: + // Returns range of probes within [\p From, \p To) address range. + auto find(uint64_t From, uint64_t To) const { + return llvm::make_range(getIt(From), getIt(To)); + } + // Returns range of probes with given \p Address. + auto find(uint64_t Address) const { + auto FromIt = getIt(Address); + if (FromIt == end() || FromIt->get().getAddress() != Address) + return llvm::make_range(end(), end()); + auto ToIt = getIt(Address + 1); + return llvm::make_range(FromIt, ToIt); + } +}; + +template +class MCPseudoProbeInlineTreeBase { protected: // Track children (e.g. inlinees) of current context - using InlinedProbeTreeMap = std::unordered_map< - InlineSite, std::unique_ptr, InlineSiteHash>; InlinedProbeTreeMap Children; // Set of probes that come with the function. - std::vector Probes; + ProbesType Probes; MCPseudoProbeInlineTreeBase() { static_assert(std::is_base_of::value, @@ -240,12 +266,10 @@ class MCPseudoProbeInlineTreeBase { bool isRoot() const { return Guid == 0; } InlinedProbeTreeMap &getChildren() { return Children; } const InlinedProbeTreeMap &getChildren() const { return Children; } - std::vector &getProbes() { return Probes; } - const std::vector &getProbes() const { return Probes; } - void addProbes(ProbeType Probe) { Probes.push_back(Probe); } + const ProbesType &getProbes() const { return Probes; } // Caller node of the inline site - MCPseudoProbeInlineTreeBase *Parent = - nullptr; + MCPseudoProbeInlineTreeBase *Parent = nullptr; DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) { auto Ret = Children.emplace( Site, std::make_unique(Site)); @@ -259,9 +283,17 @@ class MCPseudoProbeInlineTreeBase { // instance is created as the root of a tree. // A real instance of this class is created for each function, either a // not inlined function that has code in .text section or an inlined function. +struct InlineSiteHash { + uint64_t operator()(const InlineSite &Site) const { + return std::get<0>(Site) ^ std::get<1>(Site); + } +}; class MCPseudoProbeInlineTree - : public MCPseudoProbeInlineTreeBase { + : public MCPseudoProbeInlineTreeBase< + std::vector, MCPseudoProbeInlineTree, + std::unordered_map, + InlineSiteHash>> { public: MCPseudoProbeInlineTree() = default; MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; } @@ -277,16 +309,31 @@ class MCPseudoProbeInlineTree // inline tree node for the decoded pseudo probe class MCDecodedPseudoProbeInlineTree - : public MCPseudoProbeInlineTreeBase { -public: - InlineSite ISite; + : public MCPseudoProbeInlineTreeBase< + MCDecodedPseudoProbe *, MCDecodedPseudoProbeInlineTree, + MutableArrayRef> { + uint32_t NumProbes = 0; + uint32_t ProbeId = 0; +public: MCDecodedPseudoProbeInlineTree() = default; - MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){}; + MCDecodedPseudoProbeInlineTree(const InlineSite &Site, + MCDecodedPseudoProbeInlineTree *Parent) + : ProbeId(std::get<1>(Site)) { + this->Guid = std::get<0>(Site); + this->Parent = Parent; + } // Return false if it's a dummy inline site bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); } + InlineSite getInlineSite() const { return InlineSite(Guid, ProbeId); } + void setProbes(MutableArrayRef ProbesRef) { + Probes = ProbesRef.data(); + NumProbes = ProbesRef.size(); + } + auto getProbes() const { + return MutableArrayRef(Probes, NumProbes); + } }; /// Instances of this class represent the pseudo probes inserted into a compile @@ -336,9 +383,25 @@ class MCPseudoProbeTable { }; class MCPseudoProbeDecoder { + // Decoded pseudo probes vector. + std::vector PseudoProbeVec; + // Injected pseudo probes, identified by the containing inline tree node. + // Need to keep injected probes separately for two reasons: + // 1) Probes cannot be added to the PseudoProbeVec: appending may cause + // reallocation so that pointers to its elements will become invalid. + // 2) Probes belonging to function record must be contiguous in PseudoProbeVec + // as owning InlineTree references them with an ArrayRef to save space. + std::unordered_map> + InjectedProbeMap; + // Decoded inline records vector. + std::vector InlineTreeVec; + // GUID to PseudoProbeFuncDesc map. GUIDProbeFunctionMap GUID2FuncDescMap; + BumpPtrAllocator FuncNameAllocator; + // Address to probes map. AddressProbesMap Address2ProbesMap; @@ -370,16 +433,18 @@ class MCPseudoProbeDecoder { // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map. bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size); + // Decode pseudo_probe section to count the number of probes and inlined + // function records for each function record. + template + bool countRecords(bool &Discard, uint32_t &ProbeCount, uint32_t &InlinedCount, + const Uint64Set &GuidFilter); + // Decode pseudo_probe section to build address to probes map for specifed // functions only. bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size, const Uint64Set &GuildFilter, const Uint64Map &FuncStartAddrs); - bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur, - uint64_t &LastAddr, const Uint64Set &GuildFilter, - const Uint64Map &FuncStartAddrs); - // Print pseudo_probe_desc section info void printGUID2FuncDescMap(raw_ostream &OS); @@ -422,6 +487,34 @@ class MCPseudoProbeDecoder { const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const { return DummyInlineRoot; } + + void addInjectedProbe(const MCDecodedPseudoProbe &Probe, uint64_t Address) { + const MCDecodedPseudoProbeInlineTree *Parent = Probe.getInlineTreeNode(); + InjectedProbeMap[Parent].emplace_back(Probe).setAddress(Address); + } + + size_t + getNumInjectedProbes(const MCDecodedPseudoProbeInlineTree *Parent) const { + auto It = InjectedProbeMap.find(Parent); + if (It == InjectedProbeMap.end()) + return 0; + return It->second.size(); + } + + auto getInjectedProbes(MCDecodedPseudoProbeInlineTree *Parent) { + auto It = InjectedProbeMap.find(Parent); + assert(It != InjectedProbeMap.end()); + return iterator_range(It->second); + } + +private: + // Recursively parse an inlining tree encoded in pseudo_probe section. Returns + // whether the the top-level node should be skipped. + template + bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur, + uint64_t &LastAddr, const Uint64Set &GuildFilter, + const Uint64Map &FuncStartAddrs, + const uint32_t CurChildIndex); }; } // end namespace llvm diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index b8a28669cdd074..32e23ddfcafeed 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -111,6 +111,7 @@ class ConstantInt; class Context; class Function; class Instruction; +class FenceInst; class SelectInst; class ExtractElementInst; class InsertElementInst; @@ -249,6 +250,7 @@ class Value { friend class Context; // For getting `Val`. friend class User; // For getting `Val`. friend class Use; // For getting `Val`. + friend class FenceInst; // For getting `Val`. friend class SelectInst; // For getting `Val`. friend class ExtractElementInst; // For getting `Val`. friend class InsertElementInst; // For getting `Val`. @@ -678,6 +680,7 @@ class Instruction : public sandboxir::User { /// A SandboxIR Instruction may map to multiple LLVM IR Instruction. This /// returns its topmost LLVM IR instruction. llvm::Instruction *getTopmostLLVMInstruction() const; + friend class FenceInst; // For getTopmostLLVMInstruction(). friend class SelectInst; // For getTopmostLLVMInstruction(). friend class ExtractElementInst; // For getTopmostLLVMInstruction(). friend class InsertElementInst; // For getTopmostLLVMInstruction(). @@ -882,6 +885,33 @@ template class SingleLLVMInstructionImpl : public Instruction { #endif }; +class FenceInst : public SingleLLVMInstructionImpl { + FenceInst(llvm::FenceInst *FI, Context &Ctx) + : SingleLLVMInstructionImpl(ClassID::Fence, Opcode::Fence, FI, Ctx) {} + friend Context; // For constructor; + +public: + static FenceInst *create(AtomicOrdering Ordering, BBIterator WhereIt, + BasicBlock *WhereBB, Context &Ctx, + SyncScope::ID SSID = SyncScope::System); + /// Returns the ordering constraint of this fence instruction. + AtomicOrdering getOrdering() const { + return cast(Val)->getOrdering(); + } + /// Sets the ordering constraint of this fence instruction. May only be + /// Acquire, Release, AcquireRelease, or SequentiallyConsistent. + void setOrdering(AtomicOrdering Ordering); + /// Returns the synchronization scope ID of this fence instruction. + SyncScope::ID getSyncScopeID() const { + return cast(Val)->getSyncScopeID(); + } + /// Sets the synchronization scope ID of this fence instruction. + void setSyncScopeID(SyncScope::ID SSID); + static bool classof(const Value *From) { + return From->getSubclassID() == ClassID::Fence; + } +}; + class SelectInst : public SingleLLVMInstructionImpl { /// Use Context::createSelectInst(). Don't call the /// constructor directly. @@ -2854,6 +2884,8 @@ class Context { IRBuilder LLVMIRBuilder; auto &getLLVMIRBuilder() { return LLVMIRBuilder; } + FenceInst *createFenceInst(llvm::FenceInst *SI); + friend FenceInst; // For createFenceInst() SelectInst *createSelectInst(llvm::SelectInst *SI); friend SelectInst; // For createSelectInst() InsertElementInst *createInsertElementInst(llvm::InsertElementInst *IEI); diff --git a/llvm/include/llvm/SandboxIR/SandboxIRValues.def b/llvm/include/llvm/SandboxIR/SandboxIRValues.def index 14cb2d72ad3af6..7bac00fb2918a4 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIRValues.def +++ b/llvm/include/llvm/SandboxIR/SandboxIRValues.def @@ -37,6 +37,7 @@ DEF_USER(ConstantInt, ConstantInt) DEF_INSTR(Opaque, OP(Opaque), OpaqueInst) DEF_INSTR(ExtractElement, OP(ExtractElement), ExtractElementInst) DEF_INSTR(InsertElement, OP(InsertElement), InsertElementInst) +DEF_INSTR(Fence, OP(Fence), FenceInst) DEF_INSTR(ShuffleVector, OP(ShuffleVector), ShuffleVectorInst) DEF_INSTR(Select, OP(Select), SelectInst) DEF_INSTR(Br, OP(Br), BranchInst) diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h index 93d831c26938bb..b5b969220df85b 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h @@ -139,6 +139,10 @@ class FunctionImporter { maybeAddDeclaration(FromModule, GUID); } + // Return the list of source modules sorted in the ascending alphabetical + // order. + SmallVector getSourceModules() const; + const ImportMapTyImpl &getImportMap() const { return ImportMap; } private: diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h index ed2e7f58ca853c..c2ea88a107c32a 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -84,6 +84,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner { // combining and will be updated to reflect any changes. LoopInfo *LI; + ReversePostOrderTraversal &RPOT; + bool MadeIRChange = false; /// Edges that are known to never be taken. @@ -98,12 +100,13 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner { TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, - ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI) + ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI, + ReversePostOrderTraversal &RPOT) : TTI(TTI), Builder(Builder), Worklist(Worklist), MinimizeSize(MinimizeSize), AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL), SQ(DL, &TLI, &DT, &AC, nullptr, /*UseInstrInfo*/ true, /*CanUseUndef*/ true, &DC), - ORE(ORE), BFI(BFI), BPI(BPI), PSI(PSI), LI(LI) {} + ORE(ORE), BFI(BFI), BPI(BPI), PSI(PSI), LI(LI), RPOT(RPOT) {} virtual ~InstCombiner() = default; diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 68eb00a50fe030..826347e79f7195 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -187,7 +187,8 @@ class CodeExtractorAnalysisCache { /// sets, before extraction occurs. These modifications won't have any /// significant impact on the cost however. void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, - const ValueSet &Allocas) const; + const ValueSet &Allocas, + bool CollectGlobalInputs = false) const; /// Check if life time marker nodes can be hoisted/sunk into the outline /// region. diff --git a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index bccd9b04cd2c5c..e40248197c7c7c 100644 --- a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -402,8 +402,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( // Scan the register defs for this instruction and update // live-ranges. - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef()) continue; + for (const MachineOperand &MO : MI.all_defs()) { Register Reg = MO.getReg(); if (Reg == 0) continue; // Ignore KILLs and passthru registers for liveness... diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 7344075fb604f1..aac6c85233a68f 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -2149,19 +2149,15 @@ bool MachineInstr::addRegisterDead(Register Reg, } void MachineInstr::clearRegisterDeads(Register Reg) { - for (MachineOperand &MO : operands()) { - if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg) - continue; - MO.setIsDead(false); - } + for (MachineOperand &MO : all_defs()) + if (MO.getReg() == Reg) + MO.setIsDead(false); } void MachineInstr::setRegisterDefReadUndef(Register Reg, bool IsUndef) { - for (MachineOperand &MO : operands()) { - if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0) - continue; - MO.setIsUndef(IsUndef); - } + for (MachineOperand &MO : all_defs()) + if (MO.getReg() == Reg && MO.getSubReg() != 0) + MO.setIsUndef(IsUndef); } void MachineInstr::addRegisterDefined(Register Reg, @@ -2171,9 +2167,8 @@ void MachineInstr::addRegisterDefined(Register Reg, if (MO) return; } else { - for (const MachineOperand &MO : operands()) { - if (MO.isReg() && MO.getReg() == Reg && MO.isDef() && - MO.getSubReg() == 0) + for (const MachineOperand &MO : all_defs()) { + if (MO.getReg() == Reg && MO.getSubReg() == 0) return; } } diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index 78201d9bfb79a9..99c82bc3a2660a 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -2667,8 +2667,8 @@ void ModuloScheduleExpanderMVE::calcNumUnroll() { void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI, ValueMapTy &VRMap, bool LastDef) { - for (MachineOperand &MO : NewMI->operands()) { - if (!MO.isReg() || !MO.getReg().isVirtual() || !MO.isDef()) + for (MachineOperand &MO : NewMI->all_defs()) { + if (!MO.getReg().isVirtual()) continue; Register Reg = MO.getReg(); const TargetRegisterClass *RC = MRI.getRegClass(Reg); diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index b2a94db0ef03e3..2a64e8f15da65e 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -1475,9 +1475,8 @@ void RegAllocFastImpl::findAndSortDefOperandIndexes(const MachineInstr &MI) { // we assign these. SmallVector RegClassDefCounts(TRI->getNumRegClasses(), 0); - for (const MachineOperand &MO : MI.operands()) - if (MO.isReg() && MO.isDef()) - addRegClassDefCounts(RegClassDefCounts, MO.getReg()); + for (const MachineOperand &MO : MI.all_defs()) + addRegClassDefCounts(RegClassDefCounts, MO.getReg()); llvm::sort(DefOperandIndexes, [&](unsigned I0, unsigned I1) { const MachineOperand &MO0 = MI.getOperand(I0); @@ -1627,9 +1626,7 @@ void RegAllocFastImpl::allocateInstruction(MachineInstr &MI) { // Assign virtual register defs. while (ReArrangedImplicitOps) { ReArrangedImplicitOps = false; - for (MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef()) - continue; + for (MachineOperand &MO : MI.all_defs()) { Register Reg = MO.getReg(); if (Reg.isVirtual()) { ReArrangedImplicitOps = @@ -1645,10 +1642,7 @@ void RegAllocFastImpl::allocateInstruction(MachineInstr &MI) { // Free registers occupied by defs. // Iterate operands in reverse order, so we see the implicit super register // defs first (we added them earlier in case of ). - for (MachineOperand &MO : reverse(MI.operands())) { - if (!MO.isReg() || !MO.isDef()) - continue; - + for (MachineOperand &MO : reverse(MI.all_defs())) { Register Reg = MO.getReg(); // subreg defs don't free the full register. We left the subreg number diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index f6c53f3051c2f0..97f8346df0e8fe 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -3230,8 +3230,8 @@ void JoinVals::pruneValues(JoinVals &Other, // Also remove dead flags since the joined live range will // continue past this instruction. for (MachineOperand &MO : - Indexes->getInstructionFromIndex(Def)->operands()) { - if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) { + Indexes->getInstructionFromIndex(Def)->all_defs()) { + if (MO.getReg() == Reg) { if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef) MO.setIsUndef(false); MO.setIsDead(false); diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 88883fd0ad9462..416264e5368734 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1569,7 +1569,16 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( BasicBlock *CommonExit = nullptr; SetVector Inputs, Outputs, SinkingCands, HoistingCands; Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); - Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands); + + Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands, + /*CollectGlobalInputs=*/true); + + Inputs.remove_if([&](Value *I) { + if (auto *GV = dyn_cast_if_present(I)) + return GV->getValueType() == OpenMPIRBuilder::Ident; + + return false; + }); LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n"); diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp index 3f6f605149b479..90d7588407068a 100644 --- a/llvm/lib/MC/MCPseudoProbe.cpp +++ b/llvm/lib/MC/MCPseudoProbe.cpp @@ -18,6 +18,7 @@ #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" @@ -48,6 +49,8 @@ static const MCExpr *buildSymbolDiff(MCObjectStreamer *MCOS, const MCSymbol *A, return AddrDelta; } +uint64_t MCDecodedPseudoProbe::getGuid() const { return InlineTree->Guid; } + void MCPseudoProbe::emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const { bool IsSentinel = isSentinelProbe(getAttributes()); @@ -271,7 +274,7 @@ static StringRef getProbeFNameForGUID(const GUIDProbeFunctionMap &GUID2FuncMAP, auto It = GUID2FuncMAP.find(GUID); assert(It != GUID2FuncMAP.end() && "Probe function must exist for a valid GUID"); - return It->second.FuncName; + return It->FuncName; } void MCPseudoProbeFuncDesc::print(raw_ostream &OS) { @@ -288,8 +291,8 @@ void MCDecodedPseudoProbe::getInlineContext( // Note that it won't include the probe's belonging function(leaf location) while (Cur->hasInlineSite()) { StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, Cur->Parent->Guid); - ContextStack.emplace_back( - MCPseudoProbeFrameLocation(FuncName, std::get<1>(Cur->ISite))); + ContextStack.emplace_back(MCPseudoProbeFrameLocation( + FuncName, std::get<1>(Cur->getInlineSite()))); Cur = static_cast(Cur->Parent); } // Make the ContextStack in caller-callee order @@ -317,10 +320,10 @@ void MCDecodedPseudoProbe::print(raw_ostream &OS, bool ShowName) const { OS << "FUNC: "; if (ShowName) { - StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, Guid); + StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, getGuid()); OS << FuncName.str() << " "; } else { - OS << Guid << " "; + OS << getGuid() << " "; } OS << "Index: " << Index << " "; if (Discriminator) @@ -387,59 +390,68 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start, Data = Start; End = Data + Size; + uint32_t FuncDescCount = 0; while (Data < End) { - auto ErrorOrGUID = readUnencodedNumber(); - if (!ErrorOrGUID) + // GUID + if (!readUnencodedNumber()) return false; - - auto ErrorOrHash = readUnencodedNumber(); - if (!ErrorOrHash) + // Hash + if (!readUnencodedNumber()) return false; auto ErrorOrNameSize = readUnsignedNumber(); if (!ErrorOrNameSize) return false; - uint32_t NameSize = std::move(*ErrorOrNameSize); - - auto ErrorOrName = readString(NameSize); - if (!ErrorOrName) + // Function name + if (!readString(*ErrorOrNameSize)) return false; + ++FuncDescCount; + } + assert(Data == End && "Have unprocessed data in pseudo_probe_desc section"); + GUID2FuncDescMap.reserve(FuncDescCount); - uint64_t GUID = std::move(*ErrorOrGUID); - uint64_t Hash = std::move(*ErrorOrHash); - StringRef Name = std::move(*ErrorOrName); + Data = Start; + End = Data + Size; + while (Data < End) { + uint64_t GUID = + cantFail(errorOrToExpected(readUnencodedNumber())); + uint64_t Hash = + cantFail(errorOrToExpected(readUnencodedNumber())); + uint32_t NameSize = + cantFail(errorOrToExpected(readUnsignedNumber())); + StringRef Name = cantFail(errorOrToExpected(readString(NameSize))); // Initialize PseudoProbeFuncDesc and populate it into GUID2FuncDescMap - GUID2FuncDescMap.emplace(GUID, MCPseudoProbeFuncDesc(GUID, Hash, Name)); + GUID2FuncDescMap.emplace_back(GUID, Hash, Name.copy(FuncNameAllocator)); } assert(Data == End && "Have unprocessed data in pseudo_probe_desc section"); + assert(GUID2FuncDescMap.size() == FuncDescCount && + "Mismatching function description count pre- and post-parsing"); + llvm::sort(GUID2FuncDescMap, [](const auto &LHS, const auto &RHS) { + return LHS.FuncGUID < RHS.FuncGUID; + }); return true; } +template bool MCPseudoProbeDecoder::buildAddress2ProbeMap( MCDecodedPseudoProbeInlineTree *Cur, uint64_t &LastAddr, - const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs) { + const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs, + const uint32_t CurChildIndex) { // The pseudo_probe section encodes an inline forest and each tree has a // format defined in MCPseudoProbe.h uint32_t Index = 0; - bool IsTopLevelFunc = Cur == &DummyInlineRoot; if (IsTopLevelFunc) { // Use a sequential id for top level inliner. - Index = Cur->getChildren().size(); + Index = CurChildIndex; } else { // Read inline site for inlinees - auto ErrorOrIndex = readUnsignedNumber(); - if (!ErrorOrIndex) - return false; - Index = std::move(*ErrorOrIndex); + Index = cantFail(errorOrToExpected(readUnsignedNumber())); } // Read guid - auto ErrorOrCurGuid = readUnencodedNumber(); - if (!ErrorOrCurGuid) - return false; - uint64_t Guid = std::move(*ErrorOrCurGuid); + uint64_t Guid = cantFail(errorOrToExpected(readUnencodedNumber())); // Decide if top-level node should be disgarded. if (IsTopLevelFunc && !GuidFilter.empty() && !GuidFilter.count(Guid)) @@ -448,8 +460,9 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( // If the incoming node is null, all its children nodes should be disgarded. if (Cur) { // Switch/add to a new tree node(inlinee) - Cur = Cur->getOrAddNode(std::make_tuple(Guid, Index)); - Cur->Guid = Guid; + Cur->getChildren()[CurChildIndex] = + MCDecodedPseudoProbeInlineTree(InlineSite(Guid, Index), Cur); + Cur = &Cur->getChildren()[CurChildIndex]; if (IsTopLevelFunc && !EncodingIsAddrBased) { if (auto V = FuncStartAddrs.lookup(Guid)) LastAddr = V; @@ -457,41 +470,28 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( } // Read number of probes in the current node. - auto ErrorOrNodeCount = readUnsignedNumber(); - if (!ErrorOrNodeCount) - return false; - uint32_t NodeCount = std::move(*ErrorOrNodeCount); + uint32_t NodeCount = + cantFail(errorOrToExpected(readUnsignedNumber())); + uint32_t CurrentProbeCount = 0; // Read number of direct inlinees - auto ErrorOrCurChildrenToProcess = readUnsignedNumber(); - if (!ErrorOrCurChildrenToProcess) - return false; + uint32_t ChildrenToProcess = + cantFail(errorOrToExpected(readUnsignedNumber())); // Read all probes in this node for (std::size_t I = 0; I < NodeCount; I++) { // Read index - auto ErrorOrIndex = readUnsignedNumber(); - if (!ErrorOrIndex) - return false; - uint32_t Index = std::move(*ErrorOrIndex); + uint32_t Index = + cantFail(errorOrToExpected(readUnsignedNumber())); // Read type | flag. - auto ErrorOrValue = readUnencodedNumber(); - if (!ErrorOrValue) - return false; - uint8_t Value = std::move(*ErrorOrValue); + uint8_t Value = cantFail(errorOrToExpected(readUnencodedNumber())); uint8_t Kind = Value & 0xf; uint8_t Attr = (Value & 0x70) >> 4; // Read address uint64_t Addr = 0; if (Value & 0x80) { - auto ErrorOrOffset = readSignedNumber(); - if (!ErrorOrOffset) - return false; - int64_t Offset = std::move(*ErrorOrOffset); + int64_t Offset = cantFail(errorOrToExpected(readSignedNumber())); Addr = LastAddr + Offset; } else { - auto ErrorOrAddr = readUnencodedNumber(); - if (!ErrorOrAddr) - return false; - Addr = std::move(*ErrorOrAddr); + Addr = cantFail(errorOrToExpected(readUnencodedNumber())); if (isSentinelProbe(Attr)) { // For sentinel probe, the addr field actually stores the GUID of the // split function. Convert it to the real address. @@ -508,85 +508,189 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( uint32_t Discriminator = 0; if (hasDiscriminator(Attr)) { - auto ErrorOrDiscriminator = readUnsignedNumber(); - if (!ErrorOrDiscriminator) - return false; - Discriminator = std::move(*ErrorOrDiscriminator); + Discriminator = + cantFail(errorOrToExpected(readUnsignedNumber())); } if (Cur && !isSentinelProbe(Attr)) { - // Populate Address2ProbesMap - auto &Probes = Address2ProbesMap[Addr]; - Probes.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), Attr, - Discriminator, Cur); - Cur->addProbes(&Probes.back()); + PseudoProbeVec.emplace_back(Addr, Index, PseudoProbeType(Kind), Attr, + Discriminator, Cur); + ++CurrentProbeCount; } LastAddr = Addr; } - uint32_t ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess); + if (Cur) { + Cur->setProbes( + MutableArrayRef(PseudoProbeVec).take_back(CurrentProbeCount)); + InlineTreeVec.resize(InlineTreeVec.size() + ChildrenToProcess); + Cur->getChildren() = + MutableArrayRef(InlineTreeVec).take_back(ChildrenToProcess); + } for (uint32_t I = 0; I < ChildrenToProcess; I++) { - buildAddress2ProbeMap(Cur, LastAddr, GuidFilter, FuncStartAddrs); + buildAddress2ProbeMap(Cur, LastAddr, GuidFilter, FuncStartAddrs, I); + } + return Cur; +} + +template +bool MCPseudoProbeDecoder::countRecords(bool &Discard, uint32_t &ProbeCount, + uint32_t &InlinedCount, + const Uint64Set &GuidFilter) { + if (!IsTopLevelFunc) + // Read inline site for inlinees + if (!readUnsignedNumber()) + return false; + + // Read guid + auto ErrorOrCurGuid = readUnencodedNumber(); + if (!ErrorOrCurGuid) + return false; + uint64_t Guid = std::move(*ErrorOrCurGuid); + + // Decide if top-level node should be disgarded. + if (IsTopLevelFunc) { + Discard = !GuidFilter.empty() && !GuidFilter.count(Guid); + if (!Discard) + // Allocate an entry for top-level function record. + ++InlinedCount; + } + + // Read number of probes in the current node. + auto ErrorOrNodeCount = readUnsignedNumber(); + if (!ErrorOrNodeCount) + return false; + uint32_t NodeCount = std::move(*ErrorOrNodeCount); + uint32_t CurrentProbeCount = 0; + + // Read number of direct inlinees + auto ErrorOrCurChildrenToProcess = readUnsignedNumber(); + if (!ErrorOrCurChildrenToProcess) + return false; + uint32_t ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess); + + // Read all probes in this node + for (std::size_t I = 0; I < NodeCount; I++) { + // Read index + if (!readUnsignedNumber()) + return false; + + // Read type | flag. + auto ErrorOrValue = readUnencodedNumber(); + if (!ErrorOrValue) + return false; + uint8_t Value = std::move(*ErrorOrValue); + + uint8_t Attr = (Value & 0x70) >> 4; + if (Value & 0x80) { + // Offset + if (!readSignedNumber()) + return false; + } else { + // Addr + if (!readUnencodedNumber()) + return false; + } + + if (hasDiscriminator(Attr)) + // Discriminator + if (!readUnsignedNumber()) + return false; + + if (!Discard && !isSentinelProbe(Attr)) + ++CurrentProbeCount; + } + + if (!Discard) { + ProbeCount += CurrentProbeCount; + InlinedCount += ChildrenToProcess; } + for (uint32_t I = 0; I < ChildrenToProcess; I++) + if (!countRecords(Discard, ProbeCount, InlinedCount, GuidFilter)) + return false; return true; } bool MCPseudoProbeDecoder::buildAddress2ProbeMap( const uint8_t *Start, std::size_t Size, const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs) { + // For function records in the order of their appearance in the encoded data + // (DFS), count the number of contained probes and inlined function records. + uint32_t ProbeCount = 0; + uint32_t InlinedCount = 0; + uint32_t TopLevelFuncs = 0; + Data = Start; + End = Data + Size; + bool Discard = false; + while (Data < End) { + if (!countRecords(Discard, ProbeCount, InlinedCount, GuidFilter)) + return false; + TopLevelFuncs += !Discard; + } + assert(Data == End && "Have unprocessed data in pseudo_probe section"); + PseudoProbeVec.reserve(ProbeCount); + InlineTreeVec.reserve(InlinedCount); + + // Allocate top-level function records as children of DummyInlineRoot. + InlineTreeVec.resize(TopLevelFuncs); + DummyInlineRoot.getChildren() = MutableArrayRef(InlineTreeVec); + Data = Start; End = Data + Size; uint64_t LastAddr = 0; + uint32_t CurChildIndex = 0; while (Data < End) - buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuidFilter, - FuncStartAddrs); + CurChildIndex += buildAddress2ProbeMap( + &DummyInlineRoot, LastAddr, GuidFilter, FuncStartAddrs, CurChildIndex); assert(Data == End && "Have unprocessed data in pseudo_probe section"); + assert(PseudoProbeVec.size() == ProbeCount && + "Mismatching probe count pre- and post-parsing"); + assert(InlineTreeVec.size() == InlinedCount && + "Mismatching function records count pre- and post-parsing"); + + std::vector> SortedA2P(ProbeCount); + for (const auto &[I, Probe] : llvm::enumerate(PseudoProbeVec)) + SortedA2P[I] = {Probe.getAddress(), I}; + llvm::sort(SortedA2P); + Address2ProbesMap.reserve(ProbeCount); + for (const uint32_t I : llvm::make_second_range(SortedA2P)) + Address2ProbesMap.emplace_back(PseudoProbeVec[I]); + SortedA2P.clear(); return true; } void MCPseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) { OS << "Pseudo Probe Desc:\n"; - // Make the output deterministic - std::map OrderedMap(GUID2FuncDescMap.begin(), - GUID2FuncDescMap.end()); - for (auto &I : OrderedMap) { - I.second.print(OS); - } + for (auto &I : GUID2FuncDescMap) + I.print(OS); } void MCPseudoProbeDecoder::printProbeForAddress(raw_ostream &OS, uint64_t Address) { - auto It = Address2ProbesMap.find(Address); - if (It != Address2ProbesMap.end()) { - for (const MCDecodedPseudoProbe &Probe : It->second) { - OS << " [Probe]:\t"; - Probe.print(OS, GUID2FuncDescMap, true); - } + for (const MCDecodedPseudoProbe &Probe : Address2ProbesMap.find(Address)) { + OS << " [Probe]:\t"; + Probe.print(OS, GUID2FuncDescMap, true); } } void MCPseudoProbeDecoder::printProbesForAllAddresses(raw_ostream &OS) { - auto Entries = make_first_range(Address2ProbesMap); - SmallVector Addresses(Entries.begin(), Entries.end()); - llvm::sort(Addresses); - for (auto K : Addresses) { - OS << "Address:\t"; - OS << K; - OS << "\n"; - printProbeForAddress(OS, K); + uint64_t PrevAddress = INT64_MAX; + for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) { + uint64_t Address = Probe.getAddress(); + if (Address != PrevAddress) { + PrevAddress = Address; + OS << "Address:\t" << Address << '\n'; + } + OS << " [Probe]:\t"; + Probe.print(OS, GUID2FuncDescMap, true); } } const MCDecodedPseudoProbe * MCPseudoProbeDecoder::getCallProbeForAddr(uint64_t Address) const { - auto It = Address2ProbesMap.find(Address); - if (It == Address2ProbesMap.end()) - return nullptr; - const auto &Probes = It->second; - const MCDecodedPseudoProbe *CallProbe = nullptr; - for (const MCDecodedPseudoProbe &Probe : Probes) { + for (const MCDecodedPseudoProbe &Probe : Address2ProbesMap.find(Address)) { if (Probe.isCall()) { // Disabling the assert and returning first call probe seen so far. // Subsequent call probes, if any, are ignored. Due to the the way @@ -611,7 +715,7 @@ const MCPseudoProbeFuncDesc * MCPseudoProbeDecoder::getFuncDescForGUID(uint64_t GUID) const { auto It = GUID2FuncDescMap.find(GUID); assert(It != GUID2FuncDescMap.end() && "Function descriptor doesn't exist"); - return &It->second; + return &*It; } void MCPseudoProbeDecoder::getInlineContextForProbe( diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp index 32b20d758ee70b..c4d88856abdfb9 100644 --- a/llvm/lib/MCA/InstrBuilder.cpp +++ b/llvm/lib/MCA/InstrBuilder.cpp @@ -799,7 +799,7 @@ InstrBuilder::createInstruction(const MCInst &MCI, unsigned WriteIndex = 0; Idx = 0U; for (const WriteDescriptor &WD : D.Writes) { - RegID = WD.isImplicitWrite() ? WD.RegisterID + RegID = WD.isImplicitWrite() ? MCRegister(WD.RegisterID) : MCI.getOperand(WD.OpIndex).getReg(); // Check if this is a optional definition that references NoReg or a write // to a constant register. diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp index f92e9d38125139..559fb4d10fff52 100644 --- a/llvm/lib/SandboxIR/SandboxIR.cpp +++ b/llvm/lib/SandboxIR/SandboxIR.cpp @@ -575,6 +575,33 @@ void Instruction::dumpOS(raw_ostream &OS) const { } #endif // NDEBUG +FenceInst *FenceInst::create(AtomicOrdering Ordering, BBIterator WhereIt, + BasicBlock *WhereBB, Context &Ctx, + SyncScope::ID SSID) { + auto &Builder = Ctx.getLLVMIRBuilder(); + if (WhereIt != WhereBB->end()) + Builder.SetInsertPoint((*WhereIt).getTopmostLLVMInstruction()); + else + Builder.SetInsertPoint(cast(WhereBB->Val)); + llvm::FenceInst *LLVMI = Builder.CreateFence(Ordering, SSID); + return Ctx.createFenceInst(LLVMI); +} + +void FenceInst::setOrdering(AtomicOrdering Ordering) { + Ctx.getTracker() + .emplaceIfTracking< + GenericSetter<&FenceInst::getOrdering, &FenceInst::setOrdering>>( + this); + cast(Val)->setOrdering(Ordering); +} + +void FenceInst::setSyncScopeID(SyncScope::ID SSID) { + Ctx.getTracker() + .emplaceIfTracking>(this); + cast(Val)->setSyncScopeID(SSID); +} + Value *SelectInst::createCommon(Value *Cond, Value *True, Value *False, const Twine &Name, IRBuilder<> &Builder, Context &Ctx) { @@ -2157,6 +2184,11 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) { assert(isa(LLVMV) && "Expected Instruction"); switch (cast(LLVMV)->getOpcode()) { + case llvm::Instruction::Fence: { + auto *LLVMFence = cast(LLVMV); + It->second = std::unique_ptr(new FenceInst(LLVMFence, *this)); + return It->second.get(); + } case llvm::Instruction::Select: { auto *LLVMSel = cast(LLVMV); It->second = std::unique_ptr(new SelectInst(LLVMSel, *this)); @@ -2349,6 +2381,11 @@ BasicBlock *Context::createBasicBlock(llvm::BasicBlock *LLVMBB) { return BB; } +FenceInst *Context::createFenceInst(llvm::FenceInst *SI) { + auto NewPtr = std::unique_ptr(new FenceInst(SI, *this)); + return cast(registerValue(std::move(NewPtr))); +} + SelectInst *Context::createSelectInst(llvm::SelectInst *SI) { auto NewPtr = std::unique_ptr(new SelectInst(SI, *this)); return cast(registerValue(std::move(NewPtr))); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index c38c2dc0f5f618..cf9300ceb40cb7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -745,8 +745,8 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, case Intrinsic::copysign: return NElts * getFullRateInstrCost(); case Intrinsic::canonicalize: { - InstRate = - SLT == MVT::f64 ? get64BitInstrCost(CostKind) : getFullRateInstrCost(); + assert(SLT != MVT::f64); + InstRate = getFullRateInstrCost(); break; } case Intrinsic::uadd_sat: diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 1a10206eea2374..3914f36338fa50 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -3815,7 +3815,7 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints( const MCOperand &Opr = Inst.getOperand(OperandIdx); return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI)) ? Opr.getReg() - : MCRegister::NoRegister; + : MCRegister(); }; // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache. @@ -4753,7 +4753,7 @@ static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, if (!Op.isReg()) return -1; - unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); + MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); auto Reg = Sub ? Sub : Op.getReg(); const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); return AGPR32.contains(Reg) ? 1 : 0; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 1a0dc7098347ac..b1da9da19c69b1 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -382,7 +382,7 @@ static bool IsAGPROperand(const MCInst &Inst, int OpIdx, if (!Op.isReg()) return false; - unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); + MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); auto Reg = Sub ? Sub : Op.getReg(); return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255; } diff --git a/llvm/lib/Target/ARM/ARMCallingConv.cpp b/llvm/lib/Target/ARM/ARMCallingConv.cpp index 4878c73138940d..2ab66da4b4d2d9 100644 --- a/llvm/lib/Target/ARM/ARMCallingConv.cpp +++ b/llvm/lib/Target/ARM/ARMCallingConv.cpp @@ -24,7 +24,7 @@ static bool f64AssignAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; // Try to get the first register. - if (unsigned Reg = State.AllocateReg(RegList)) + if (MCRegister Reg = State.AllocateReg(RegList)) State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); else { // For the 2nd half of a v2f64, do not fail. @@ -38,7 +38,7 @@ static bool f64AssignAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, } // Try to get the second register. - if (unsigned Reg = State.AllocateReg(RegList)) + if (MCRegister Reg = State.AllocateReg(RegList)) State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); else State.addLoc(CCValAssign::getCustomMem( @@ -67,8 +67,8 @@ static bool f64AssignAAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 }; static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; - unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList); - if (Reg == 0) { + MCRegister Reg = State.AllocateReg(HiRegList, ShadowRegList); + if (!Reg) { // If we had R3 unallocated only, now we still must to waste it. Reg = State.AllocateReg(GPRArgRegs); @@ -89,7 +89,7 @@ static bool f64AssignAAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, if (HiRegList[i] == Reg) break; - unsigned T = State.AllocateReg(LoRegList[i]); + MCRegister T = State.AllocateReg(LoRegList[i]); (void)T; assert(T == LoRegList[i] && "Could not allocate register"); @@ -116,8 +116,8 @@ static bool f64RetAssign(unsigned ValNo, MVT ValVT, MVT LocVT, static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 }; static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 }; - unsigned Reg = State.AllocateReg(HiRegList, LoRegList); - if (Reg == 0) + MCRegister Reg = State.AllocateReg(HiRegList, LoRegList); + if (!Reg) return false; // we didn't handle it unsigned i; @@ -287,7 +287,7 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned ValNo, MVT ValVT, static bool CustomAssignInRegList(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, CCState &State, ArrayRef RegList) { - unsigned Reg = State.AllocateReg(RegList); + MCRegister Reg = State.AllocateReg(RegList); if (Reg) { State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return true; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 4ab0433069ae66..853f54943eebf1 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2915,7 +2915,7 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, // Byval (as with any stack) slots are always at least 4 byte aligned. Alignment = std::max(Alignment, Align(4)); - unsigned Reg = State->AllocateReg(GPRArgRegs); + MCRegister Reg = State->AllocateReg(GPRArgRegs); if (!Reg) return; diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index e49169cff8aa86..2daa4f825c3b25 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -38,6 +38,7 @@ static bool isIntrinsicExpansion(Function &F) { case Intrinsic::log: case Intrinsic::log10: case Intrinsic::pow: + case Intrinsic::dx_all: case Intrinsic::dx_any: case Intrinsic::dx_clamp: case Intrinsic::dx_uclamp: @@ -54,8 +55,7 @@ static bool isIntrinsicExpansion(Function &F) { static Value *expandAbs(CallInst *Orig) { Value *X = Orig->getOperand(0); - IRBuilder<> Builder(Orig->getParent()); - Builder.SetInsertPoint(Orig); + IRBuilder<> Builder(Orig); Type *Ty = X->getType(); Type *EltTy = Ty->getScalarType(); Constant *Zero = Ty->isVectorTy() @@ -148,8 +148,7 @@ static Value *expandIntegerDotIntrinsic(CallInst *Orig, static Value *expandExpIntrinsic(CallInst *Orig) { Value *X = Orig->getOperand(0); - IRBuilder<> Builder(Orig->getParent()); - Builder.SetInsertPoint(Orig); + IRBuilder<> Builder(Orig); Type *Ty = X->getType(); Type *EltTy = Ty->getScalarType(); Constant *Log2eConst = @@ -166,13 +165,21 @@ static Value *expandExpIntrinsic(CallInst *Orig) { return Exp2Call; } -static Value *expandAnyIntrinsic(CallInst *Orig) { +static Value *expandAnyOrAllIntrinsic(CallInst *Orig, + Intrinsic::ID intrinsicId) { Value *X = Orig->getOperand(0); - IRBuilder<> Builder(Orig->getParent()); - Builder.SetInsertPoint(Orig); + IRBuilder<> Builder(Orig); Type *Ty = X->getType(); Type *EltTy = Ty->getScalarType(); + auto ApplyOp = [&Builder](Intrinsic::ID IntrinsicId, Value *Result, + Value *Elt) { + if (IntrinsicId == Intrinsic::dx_any) + return Builder.CreateOr(Result, Elt); + assert(IntrinsicId == Intrinsic::dx_all); + return Builder.CreateAnd(Result, Elt); + }; + Value *Result = nullptr; if (!Ty->isVectorTy()) { Result = EltTy->isFloatingPointTy() @@ -193,7 +200,7 @@ static Value *expandAnyIntrinsic(CallInst *Orig) { Result = Builder.CreateExtractElement(Cond, (uint64_t)0); for (unsigned I = 1; I < XVec->getNumElements(); I++) { Value *Elt = Builder.CreateExtractElement(Cond, I); - Result = Builder.CreateOr(Result, Elt); + Result = ApplyOp(intrinsicId, Result, Elt); } } return Result; @@ -201,8 +208,7 @@ static Value *expandAnyIntrinsic(CallInst *Orig) { static Value *expandLengthIntrinsic(CallInst *Orig) { Value *X = Orig->getOperand(0); - IRBuilder<> Builder(Orig->getParent()); - Builder.SetInsertPoint(Orig); + IRBuilder<> Builder(Orig); Type *Ty = X->getType(); Type *EltTy = Ty->getScalarType(); @@ -230,8 +236,7 @@ static Value *expandLerpIntrinsic(CallInst *Orig) { Value *X = Orig->getOperand(0); Value *Y = Orig->getOperand(1); Value *S = Orig->getOperand(2); - IRBuilder<> Builder(Orig->getParent()); - Builder.SetInsertPoint(Orig); + IRBuilder<> Builder(Orig); auto *V = Builder.CreateFSub(Y, X); V = Builder.CreateFMul(S, V); return Builder.CreateFAdd(X, V, "dx.lerp"); @@ -240,8 +245,7 @@ static Value *expandLerpIntrinsic(CallInst *Orig) { static Value *expandLogIntrinsic(CallInst *Orig, float LogConstVal = numbers::ln2f) { Value *X = Orig->getOperand(0); - IRBuilder<> Builder(Orig->getParent()); - Builder.SetInsertPoint(Orig); + IRBuilder<> Builder(Orig); Type *Ty = X->getType(); Type *EltTy = Ty->getScalarType(); Constant *Ln2Const = @@ -266,8 +270,7 @@ static Value *expandNormalizeIntrinsic(CallInst *Orig) { Value *X = Orig->getOperand(0); Type *Ty = Orig->getType(); Type *EltTy = Ty->getScalarType(); - IRBuilder<> Builder(Orig->getParent()); - Builder.SetInsertPoint(Orig); + IRBuilder<> Builder(Orig); auto *XVec = dyn_cast(Ty); if (!XVec) { @@ -305,8 +308,7 @@ static Value *expandPowIntrinsic(CallInst *Orig) { Value *X = Orig->getOperand(0); Value *Y = Orig->getOperand(1); Type *Ty = X->getType(); - IRBuilder<> Builder(Orig->getParent()); - Builder.SetInsertPoint(Orig); + IRBuilder<> Builder(Orig); auto *Log2Call = Builder.CreateIntrinsic(Ty, Intrinsic::log2, {X}, nullptr, "elt.log2"); @@ -350,8 +352,7 @@ static Value *expandClampIntrinsic(CallInst *Orig, Value *Min = Orig->getOperand(1); Value *Max = Orig->getOperand(2); Type *Ty = X->getType(); - IRBuilder<> Builder(Orig->getParent()); - Builder.SetInsertPoint(Orig); + IRBuilder<> Builder(Orig); auto *MaxCall = Builder.CreateIntrinsic( Ty, getMaxForClamp(Ty, ClampIntrinsic), {X, Min}, nullptr, "dx.max"); return Builder.CreateIntrinsic(Ty, getMinForClamp(Ty, ClampIntrinsic), @@ -360,7 +361,8 @@ static Value *expandClampIntrinsic(CallInst *Orig, static bool expandIntrinsic(Function &F, CallInst *Orig) { Value *Result = nullptr; - switch (F.getIntrinsicID()) { + Intrinsic::ID IntrinsicId = F.getIntrinsicID(); + switch (IntrinsicId) { case Intrinsic::abs: Result = expandAbs(Orig); break; @@ -376,12 +378,13 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::pow: Result = expandPowIntrinsic(Orig); break; + case Intrinsic::dx_all: case Intrinsic::dx_any: - Result = expandAnyIntrinsic(Orig); + Result = expandAnyOrAllIntrinsic(Orig, IntrinsicId); break; case Intrinsic::dx_uclamp: case Intrinsic::dx_clamp: - Result = expandClampIntrinsic(Orig, F.getIntrinsicID()); + Result = expandClampIntrinsic(Orig, IntrinsicId); break; case Intrinsic::dx_lerp: Result = expandLerpIntrinsic(Orig); @@ -397,7 +400,7 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { break; case Intrinsic::dx_sdot: case Intrinsic::dx_udot: - Result = expandIntegerDotIntrinsic(Orig, F.getIntrinsicID()); + Result = expandIntegerDotIntrinsic(Orig, IntrinsicId); break; } diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp index f52e188f877920..c2ae4a0734b6a7 100644 --- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp @@ -1314,8 +1314,8 @@ void LoongArchAsmParser::emitFuncCall36(MCInst &Inst, SMLoc IDLoc, // expands to: // pcaddu18i $rj, %call36(sym) // jirl $r0, $rj, 0 - unsigned ScratchReg = - IsTailCall ? Inst.getOperand(0).getReg() : (unsigned)LoongArch::R1; + MCRegister ScratchReg = + IsTailCall ? Inst.getOperand(0).getReg() : MCRegister(LoongArch::R1); const MCExpr *Sym = IsTailCall ? Inst.getOperand(1).getExpr() : Inst.getOperand(0).getExpr(); const LoongArchMCExpr *LE = LoongArchMCExpr::create( @@ -1326,7 +1326,7 @@ void LoongArchAsmParser::emitFuncCall36(MCInst &Inst, SMLoc IDLoc, getSTI()); Out.emitInstruction( MCInstBuilder(LoongArch::JIRL) - .addReg(IsTailCall ? (unsigned)LoongArch::R0 : ScratchReg) + .addReg(IsTailCall ? MCRegister(LoongArch::R0) : ScratchReg) .addReg(ScratchReg) .addImm(0), getSTI()); diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 50c6c263e966b5..95c1b150722f64 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -5012,7 +5012,7 @@ static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26, LoongArch::R27, LoongArch::R28, LoongArch::R29, LoongArch::R30, LoongArch::R31}; - if (unsigned Reg = State.AllocateReg(GPRList)) { + if (MCRegister Reg = State.AllocateReg(GPRList)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -5023,7 +5023,7 @@ static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, // fs0,fs1,fs2,fs3 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25, LoongArch::F26, LoongArch::F27}; - if (unsigned Reg = State.AllocateReg(FPR32List)) { + if (MCRegister Reg = State.AllocateReg(FPR32List)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -5034,7 +5034,7 @@ static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, // fs4,fs5,fs6,fs7 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64, LoongArch::F30_64, LoongArch::F31_64}; - if (unsigned Reg = State.AllocateReg(FPR64List)) { + if (MCRegister Reg = State.AllocateReg(FPR64List)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp index ba7b6c85bd81a9..1c7a14464d7bb0 100644 --- a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -527,7 +527,7 @@ static void AnalyzeArguments(CCState &State, if (!UsedStack && Parts == 2 && RegsLeft == 1) { // Special case for 32-bit register split, see EABI section 3.3.3 - unsigned Reg = State.AllocateReg(RegList); + MCRegister Reg = State.AllocateReg(RegList); State.addLoc(CCValAssign::getReg(ValNo++, ArgVT, Reg, LocVT, LocInfo)); RegsLeft -= 1; @@ -535,7 +535,7 @@ static void AnalyzeArguments(CCState &State, CC_MSP430_AssignStack(ValNo++, ArgVT, LocVT, LocInfo, ArgFlags, State); } else if (Parts <= RegsLeft) { for (unsigned j = 0; j < Parts; j++) { - unsigned Reg = State.AllocateReg(RegList); + MCRegister Reg = State.AllocateReg(RegList); State.addLoc(CCValAssign::getReg(ValNo++, ArgVT, Reg, LocVT, LocInfo)); RegsLeft--; } diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 076e0a20cb97e9..c50c2063ee8edf 100644 --- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -5782,9 +5782,9 @@ bool MipsAsmParser::expandMXTRAlias(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, sel = 3; break; } - unsigned Op0 = IsMFTR ? Inst.getOperand(0).getReg() : rd; - unsigned Op1 = - IsMFTR ? rd + MCRegister Op0 = IsMFTR ? Inst.getOperand(0).getReg() : MCRegister(rd); + MCRegister Op1 = + IsMFTR ? MCRegister(rd) : (Inst.getOpcode() != Mips::MTTDSP ? Inst.getOperand(1).getReg() : Inst.getOperand(0).getReg()); diff --git a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp index b2ba0f8fe74dc9..311b73710fb7a1 100644 --- a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -1631,28 +1631,26 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) { void MipsConstantIslands::prescanForConstants() { for (MachineBasicBlock &B : *MF) { - for (MachineBasicBlock::instr_iterator I = B.instr_begin(), - EB = B.instr_end(); - I != EB; ++I) { - switch(I->getDesc().getOpcode()) { + for (MachineInstr &MI : B) { + switch (MI.getDesc().getOpcode()) { case Mips::LwConstant32: { PrescannedForConstants = true; - LLVM_DEBUG(dbgs() << "constant island constant " << *I << "\n"); - LLVM_DEBUG(dbgs() << "num operands " << I->getNumOperands() << "\n"); - MachineOperand &Literal = I->getOperand(1); + LLVM_DEBUG(dbgs() << "constant island constant " << MI << "\n"); + LLVM_DEBUG(dbgs() << "num operands " << MI.getNumOperands() << "\n"); + MachineOperand &Literal = MI.getOperand(1); if (Literal.isImm()) { int64_t V = Literal.getImm(); LLVM_DEBUG(dbgs() << "literal " << V << "\n"); Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext()); const Constant *C = ConstantInt::get(Int32Ty, V); unsigned index = MCP->getConstantPoolIndex(C, Align(4)); - I->getOperand(2).ChangeToImmediate(index); - LLVM_DEBUG(dbgs() << "constant island constant " << *I << "\n"); - I->setDesc(TII->get(Mips::LwRxPcTcp16)); - I->removeOperand(1); - I->removeOperand(1); - I->addOperand(MachineOperand::CreateCPI(index, 0)); - I->addOperand(MachineOperand::CreateImm(4)); + MI.getOperand(2).ChangeToImmediate(index); + LLVM_DEBUG(dbgs() << "constant island constant " << MI << "\n"); + MI.setDesc(TII->get(Mips::LwRxPcTcp16)); + MI.removeOperand(1); + MI.removeOperand(1); + MI.addOperand(MachineOperand::CreateCPI(index, 0)); + MI.addOperand(MachineOperand::CreateImm(4)); } break; } diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 0f2047fcac640e..31b86b32008903 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -2991,7 +2991,7 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, } else { Reg = State.AllocateReg(F64Regs); // Shadow int registers - unsigned Reg2 = State.AllocateReg(IntRegs); + MCRegister Reg2 = State.AllocateReg(IntRegs); if (Reg2 == Mips::A1 || Reg2 == Mips::A3) State.AllocateReg(IntRegs); State.AllocateReg(IntRegs); diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.cpp b/llvm/lib/Target/PowerPC/PPCCallingConv.cpp index 188fc96bc7c2a3..d5077ab2796519 100644 --- a/llvm/lib/Target/PowerPC/PPCCallingConv.cpp +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.cpp @@ -151,7 +151,7 @@ static bool CC_PPC32_SPE_CustomSplitFP64(unsigned &ValNo, MVT &ValVT, static const MCPhysReg LoRegList[] = { PPC::R4, PPC::R6, PPC::R8, PPC::R10 }; // Try to get the first register. - unsigned Reg = State.AllocateReg(HiRegList); + MCRegister Reg = State.AllocateReg(HiRegList); if (!Reg) return false; @@ -160,7 +160,7 @@ static bool CC_PPC32_SPE_CustomSplitFP64(unsigned &ValNo, MVT &ValVT, if (HiRegList[i] == Reg) break; - unsigned T = State.AllocateReg(LoRegList[i]); + MCRegister T = State.AllocateReg(LoRegList[i]); (void)T; assert(T == LoRegList[i] && "Could not allocate register"); @@ -180,7 +180,7 @@ static bool CC_PPC32_SPE_RetF64(unsigned &ValNo, MVT &ValVT, static const MCPhysReg LoRegList[] = { PPC::R4 }; // Try to get the first register. - unsigned Reg = State.AllocateReg(HiRegList, LoRegList); + MCRegister Reg = State.AllocateReg(HiRegList, LoRegList); if (!Reg) return false; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 459a96eca1ff20..efabfa0b511a6e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6904,7 +6904,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, while (NextReg != GPRs.size() && !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) { // Shadow allocate next registers since its aligment is not strict enough. - unsigned Reg = State.AllocateReg(GPRs); + MCRegister Reg = State.AllocateReg(GPRs); // Allocate the stack space shadowed by said register. State.AllocateStack(PtrSize, PtrAlign); assert(Reg && "Alocating register unexpectedly failed."); @@ -6915,7 +6915,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, const unsigned StackSize = alignTo(ByValSize, ObjAlign); unsigned Offset = State.AllocateStack(StackSize, ObjAlign); for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) { - if (unsigned Reg = State.AllocateReg(GPRs)) + if (MCRegister Reg = State.AllocateReg(GPRs)) State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo)); else { State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE, @@ -6942,7 +6942,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits()) LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt : CCValAssign::LocInfo::ZExt; - if (unsigned Reg = State.AllocateReg(GPRs)) + if (MCRegister Reg = State.AllocateReg(GPRs)) State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo)); else State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo)); @@ -6957,13 +6957,13 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, // This includes f64 in 64-bit mode for ABI compatibility. const unsigned Offset = State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4)); - unsigned FReg = State.AllocateReg(FPR); + MCRegister FReg = State.AllocateReg(FPR); if (FReg) State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo)); // Reserve and initialize GPRs or initialize the PSA as required. for (unsigned I = 0; I < StoreSize; I += PtrSize) { - if (unsigned Reg = State.AllocateReg(GPRs)) { + if (MCRegister Reg = State.AllocateReg(GPRs)) { assert(FReg && "An FPR should be available when a GPR is reserved."); if (State.isVarArg()) { // Successfully reserved GPRs are only initialized for vararg calls. @@ -7003,7 +7003,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, if (!State.isVarArg()) { // If there are vector registers remaining we don't consume any stack // space. - if (unsigned VReg = State.AllocateReg(VR)) { + if (MCRegister VReg = State.AllocateReg(VR)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo)); return false; } @@ -7021,7 +7021,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, while (NextRegIndex != GPRs.size() && !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) { // Shadow allocate register and its stack shadow. - unsigned Reg = State.AllocateReg(GPRs); + MCRegister Reg = State.AllocateReg(GPRs); State.AllocateStack(PtrSize, PtrAlign); assert(Reg && "Allocating register unexpectedly failed."); (void)Reg; @@ -7033,7 +7033,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, // through ellipses) and shadow GPRs (unlike arguments to non-vaarg // functions) if (State.isFixed(ValNo)) { - if (unsigned VReg = State.AllocateReg(VR)) { + if (MCRegister VReg = State.AllocateReg(VR)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo)); // Shadow allocate GPRs and stack space even though we pass in a VR. for (unsigned I = 0; I != VecSize; I += PtrSize) @@ -7062,8 +7062,8 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, State.addLoc( CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - const unsigned FirstReg = State.AllocateReg(PPC::R9); - const unsigned SecondReg = State.AllocateReg(PPC::R10); + const MCRegister FirstReg = State.AllocateReg(PPC::R9); + const MCRegister SecondReg = State.AllocateReg(PPC::R10); assert(FirstReg && SecondReg && "Allocating R9 or R10 unexpectedly failed."); State.addLoc( @@ -7080,7 +7080,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, State.addLoc( CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo)); for (unsigned I = 0; I != VecSize; I += PtrSize) { - const unsigned Reg = State.AllocateReg(GPRs); + const MCRegister Reg = State.AllocateReg(GPRs); assert(Reg && "Failed to allocated register for vararg vector argument"); State.addLoc( CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo)); diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 93677433c04405..476dde2be39e57 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -396,6 +396,15 @@ bool RISCVAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, OS << MCO.getImm(); else if (Offset.isGlobal() || Offset.isBlockAddress() || Offset.isMCSymbol()) OS << *MCO.getExpr(); + + if (Offset.isMCSymbol()) + MMI->getContext().registerInlineAsmLabel(Offset.getMCSymbol()); + if (Offset.isBlockAddress()) { + const BlockAddress *BA = Offset.getBlockAddress(); + MCSymbol *Sym = GetBlockAddressSymbol(BA); + MMI->getContext().registerInlineAsmLabel(Sym); + } + OS << "(" << RISCVInstPrinter::getRegisterName(AddrReg.getReg()) << ")"; return false; } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 670dee2edb1dfb..4e86bee6a55b54 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -18625,7 +18625,7 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, // Static chain parameter must not be passed in normal argument registers, // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain if (ArgFlags.isNest()) { - if (unsigned Reg = State.AllocateReg(RISCV::X7)) { + if (MCRegister Reg = State.AllocateReg(RISCV::X7)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -19098,7 +19098,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher) { if (LocVT == MVT::i32 || LocVT == MVT::i64) { - if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { + if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -19113,7 +19113,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; - if (unsigned Reg = State.AllocateReg(FPR16List)) { + if (MCRegister Reg = State.AllocateReg(FPR16List)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -19125,7 +19125,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; - if (unsigned Reg = State.AllocateReg(FPR32List)) { + if (MCRegister Reg = State.AllocateReg(FPR32List)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -19137,7 +19137,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; - if (unsigned Reg = State.AllocateReg(FPR64List)) { + if (MCRegister Reg = State.AllocateReg(FPR64List)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -19149,7 +19149,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || (LocVT == MVT::f64 && Subtarget.is64Bit() && Subtarget.hasStdExtZdinx())) { - if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { + if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -19184,7 +19184,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo)); } else { // Try and pass the address via a "fast" GPR. - if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) { + if (MCRegister GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) { LocInfo = CCValAssign::Indirect; LocVT = TLI.getSubtarget().getXLenVT(); State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo)); @@ -19222,7 +19222,7 @@ bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, if (LocVT == MVT::i32 || LocVT == MVT::i64) { // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 - if (unsigned Reg = State.AllocateReg(GPRList)) { + if (MCRegister Reg = State.AllocateReg(GPRList)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -19237,7 +19237,7 @@ bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, RISCV::F18_F, RISCV::F19_F, RISCV::F20_F, RISCV::F21_F}; - if (unsigned Reg = State.AllocateReg(FPR32List)) { + if (MCRegister Reg = State.AllocateReg(FPR32List)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -19249,7 +19249,7 @@ bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, RISCV::F24_D, RISCV::F25_D, RISCV::F26_D, RISCV::F27_D}; - if (unsigned Reg = State.AllocateReg(FPR64List)) { + if (MCRegister Reg = State.AllocateReg(FPR64List)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -19258,7 +19258,7 @@ bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())) { - if (unsigned Reg = State.AllocateReg(GPRList)) { + if (MCRegister Reg = State.AllocateReg(GPRList)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index 822ab492c710b4..34e5d9224f7150 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -131,6 +131,9 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const { case RISCV::VMV_V_V: SrcIdx = 2; break; + case RISCV::VMERGE_VVM: + SrcIdx = 3; // TODO: We can also handle the false operand. + break; } MachineOperand &VL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc())); diff --git a/llvm/lib/Target/X86/X86CallingConv.cpp b/llvm/lib/Target/X86/X86CallingConv.cpp index 0ea51bec29b816..154cb1399880bc 100644 --- a/llvm/lib/Target/X86/X86CallingConv.cpp +++ b/llvm/lib/Target/X86/X86CallingConv.cpp @@ -51,7 +51,7 @@ static bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, for (unsigned I = 0; I < RequiredGprsUponSplit; I++) { // Marking the register as located. - unsigned Reg = State.AllocateReg(AvailableRegs[I]); + MCRegister Reg = State.AllocateReg(AvailableRegs[I]); // Since we previously made sure that 2 registers are available // we expect that a real register number will be returned. @@ -102,7 +102,7 @@ static bool CC_X86_VectorCallAssignRegister(unsigned &ValNo, MVT &ValVT, for (auto Reg : RegList) { // If the register is not marked as allocated - assign to it. if (!State.isAllocated(Reg)) { - unsigned AssigedReg = State.AllocateReg(Reg); + MCRegister AssigedReg = State.AllocateReg(Reg); assert(AssigedReg == Reg && "Expecting a valid register allocation"); State.addLoc( CCValAssign::getReg(ValNo, ValVT, AssigedReg, LocVT, LocInfo)); @@ -158,7 +158,7 @@ static bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT, (void)State.AllocateReg(CC_X86_64_VectorCallGetGPRs()); // Assign XMM register - (shadow for HVA and non-shadow for non HVA). - if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) { + if (MCRegister Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) { // In Vectorcall Calling convention, additional shadow stack can be // created on top of the basic 32 bytes of win64. // It can happen if the fifth or sixth argument is vector type or HVA. @@ -209,7 +209,7 @@ static bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT, return true; // If this is an HVA - Stop the search. // Assign XMM register. - if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) { + if (MCRegister Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return true; } @@ -259,7 +259,7 @@ static bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT, // If there are no pending members, we are not in the middle of a split, // so do the usual inreg stuff. if (PendingMembers.empty()) { - if (unsigned Reg = State.AllocateReg(RegList)) { + if (MCRegister Reg = State.AllocateReg(RegList)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return true; } diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index c7675c2f501761..0d2ce26a942e03 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -70,11 +70,12 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand); - // No sign extend instructions for i1 + // No sign extend instructions for i1 and sign extend load i8 for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); } setOperationAction(ISD::ConstantPool, PtrVT, Custom); diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index a06fe9555471fd..3a49863d91431a 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -352,6 +352,13 @@ void FunctionImporter::ImportMapTy::maybeAddDeclaration( ImportMap[FromModule].try_emplace(GUID, GlobalValueSummary::Declaration); } +SmallVector +FunctionImporter::ImportMapTy::getSourceModules() const { + SmallVector Modules(make_first_range(ImportMap)); + llvm::sort(Modules); + return Modules; +} + /// Import globals referenced by a function or other globals that are being /// imported, if importing such global is possible. class GlobalsImporter final { @@ -1710,11 +1717,6 @@ Expected FunctionImporter::importFunctions( unsigned ImportedCount = 0, ImportedGVCount = 0; IRMover Mover(DestModule); - // Do the actual import of functions now, one Module at a time - std::set ModuleNameOrderedList; - for (const auto &FunctionsToImportPerModule : ImportList.getImportMap()) { - ModuleNameOrderedList.insert(FunctionsToImportPerModule.first); - } auto getImportType = [&](const FunctionsToImportTy &GUIDToImportType, GlobalValue::GUID GUID) @@ -1725,7 +1727,8 @@ Expected FunctionImporter::importFunctions( return Iter->second; }; - for (const auto &Name : ModuleNameOrderedList) { + // Do the actual import of functions now, one Module at a time + for (const auto &Name : ImportList.getSourceModules()) { // Get the module for the import const auto &FunctionsToImportPerModule = ImportList.getImportMap().find(Name); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index a0e846c3b5a566..b3957b760b4a29 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -66,15 +66,15 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, - ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI) + ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI, + ReversePostOrderTraversal &RPOT) : InstCombiner(Worklist, Builder, MinimizeSize, AA, AC, TLI, TTI, DT, ORE, - BFI, BPI, PSI, DL, LI) {} + BFI, BPI, PSI, DL, LI, RPOT) {} virtual ~InstCombinerImpl() = default; /// Perform early cleanup and prepare the InstCombine worklist. - bool prepareWorklist(Function &F, - ReversePostOrderTraversal &RPOT); + bool prepareWorklist(Function &F); /// Run the combiner over the entire worklist until it is empty. /// diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index c3f79fe4f901ad..8a96d1d0fb4c90 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -5234,8 +5234,7 @@ class AliasScopeTracker { /// them to the worklist (this significantly speeds up instcombine on code where /// many instructions are dead or constant). Additionally, if we find a branch /// whose condition is a known constant, we only visit the reachable successors. -bool InstCombinerImpl::prepareWorklist( - Function &F, ReversePostOrderTraversal &RPOT) { +bool InstCombinerImpl::prepareWorklist(Function &F) { bool MadeIRChange = false; SmallPtrSet LiveBlocks; SmallVector InstrsForInstructionWorklist; @@ -5417,9 +5416,9 @@ static bool combineInstructionsOverFunction( << F.getName() << "\n"); InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT, - ORE, BFI, BPI, PSI, DL, LI); + ORE, BFI, BPI, PSI, DL, LI, RPOT); IC.MaxArraySizeForCombine = MaxArraySize; - bool MadeChangeInThisIteration = IC.prepareWorklist(F, RPOT); + bool MadeChangeInThisIteration = IC.prepareWorklist(F); MadeChangeInThisIteration |= IC.run(); if (!MadeChangeInThisIteration) break; diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 39cf94daab7d3b..a417854b5f3303 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -1657,11 +1657,17 @@ void PGOUseFunc::setBranchWeights() { continue; // We have a non-zero Branch BB. - unsigned Size = BBCountInfo.OutEdges.size(); - SmallVector EdgeCounts(Size, 0); + + // SuccessorCount can be greater than OutEdgesCount, because + // removed edges don't appear in OutEdges. + unsigned OutEdgesCount = BBCountInfo.OutEdges.size(); + unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors(); + assert(OutEdgesCount <= SuccessorCount); + + SmallVector EdgeCounts(SuccessorCount, 0); uint64_t MaxCount = 0; - for (unsigned s = 0; s < Size; s++) { - const PGOUseEdge *E = BBCountInfo.OutEdges[s]; + for (unsigned It = 0; It < OutEdgesCount; It++) { + const PGOUseEdge *E = BBCountInfo.OutEdges[It]; const BasicBlock *SrcBB = E->SrcBB; const BasicBlock *DestBB = E->DestBB; if (DestBB == nullptr) diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index ce45c58e624e48..caf9f890418e29 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -69,9 +69,9 @@ static bool runSCCP(Function &F, const DataLayout &DL, // Mark the first block of the function as being executable. Solver.markBlockExecutable(&F.front()); - // Mark all arguments to the function as being overdefined. + // Initialize arguments based on attributes. for (Argument &AI : F.args()) - Solver.markOverdefined(&AI); + Solver.trackValueOfArgument(&AI); // Solve for constants. bool ResolvedUndefs = true; diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 4ca76793c9eae6..b124105beb11ff 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -632,14 +632,17 @@ bool CodeExtractor::isEligible() const { } void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, - const ValueSet &SinkCands) const { + const ValueSet &SinkCands, + bool CollectGlobalInputs) const { for (BasicBlock *BB : Blocks) { // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. for (Instruction &II : *BB) { for (auto &OI : II.operands()) { Value *V = OI; - if (!SinkCands.count(V) && definedInCaller(Blocks, V)) + if (!SinkCands.count(V) && + (definedInCaller(Blocks, V) || + (CollectGlobalInputs && llvm::isa(V)))) Inputs.insert(V); } diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp index 67fb806d3eae54..4bcd85ff2336bd 100644 --- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp +++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp @@ -342,7 +342,7 @@ static bool FixIrreducibleImpl(Function &F, CycleInfo &CI, DominatorTree &DT, #if defined(EXPENSIVE_CHECKS) CI.verify(); if (LI) { - LI.verify(DT); + LI->verify(DT); } #endif // EXPENSIVE_CHECKS diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index 670d88ac7cf8fa..c6f355a07d9c7f 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -1364,7 +1364,7 @@ void SCCPInstVisitor::visitInsertValueInst(InsertValueInst &IVI) { // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would // discover a concrete value later. - if (SCCPSolver::isOverdefined(ValueState[&IVI])) + if (ValueState[&IVI].isOverdefined()) return (void)markOverdefined(&IVI); // If this has more than one index, we can't handle it, drive all results to @@ -1436,7 +1436,7 @@ void SCCPInstVisitor::visitUnaryOperator(Instruction &I) { ValueLatticeElement &IV = ValueState[&I]; // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would // discover a concrete value later. - if (SCCPSolver::isOverdefined(IV)) + if (IV.isOverdefined()) return (void)markOverdefined(&I); // If something is unknown/undef, wait for it to resolve. @@ -1461,7 +1461,7 @@ void SCCPInstVisitor::visitFreezeInst(FreezeInst &I) { ValueLatticeElement &IV = ValueState[&I]; // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would // discover a concrete value later. - if (SCCPSolver::isOverdefined(IV)) + if (IV.isOverdefined()) return (void)markOverdefined(&I); // If something is unknown/undef, wait for it to resolve. @@ -1541,7 +1541,7 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) { void SCCPInstVisitor::visitCmpInst(CmpInst &I) { // Do not cache this lookup, getValueState calls later in the function might // invalidate the reference. - if (SCCPSolver::isOverdefined(ValueState[&I])) + if (ValueState[&I].isOverdefined()) return (void)markOverdefined(&I); Value *Op1 = I.getOperand(0); @@ -1571,7 +1571,7 @@ void SCCPInstVisitor::visitCmpInst(CmpInst &I) { // Handle getelementptr instructions. If all operands are constants then we // can turn this into a getelementptr ConstantExpr. void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) { - if (SCCPSolver::isOverdefined(ValueState[&I])) + if (ValueState[&I].isOverdefined()) return (void)markOverdefined(&I); SmallVector Operands; @@ -1582,9 +1582,6 @@ void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) { if (State.isUnknownOrUndef()) return; // Operands are not resolved yet. - if (SCCPSolver::isOverdefined(State)) - return (void)markOverdefined(&I); - if (Constant *C = getConstant(State, I.getOperand(i)->getType())) { Operands.push_back(C); continue; diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index e7bb1b59942e2f..90909657bd938c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3005,7 +3005,8 @@ class BoUpSLP { } bool isOperandGatherNode(const EdgeInfo &UserEI) const { - return isGather() && UserTreeIndices.front().EdgeIdx == UserEI.EdgeIdx && + return isGather() && (Idx > 0 || !UserTreeIndices.empty()) && + UserTreeIndices.front().EdgeIdx == UserEI.EdgeIdx && UserTreeIndices.front().UserTE == UserEI.UserTE; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index fe1325f4163004..53b28a692059f6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1455,7 +1455,7 @@ void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent; printAsOperand(O, SlotTracker); - O << Indent << "= DERIVED-IV "; + O << " = DERIVED-IV "; getStartValue()->printAsOperand(O, SlotTracker); O << " + "; getOperand(1)->printAsOperand(O, SlotTracker); diff --git a/llvm/test/CodeGen/DirectX/all.ll b/llvm/test/CodeGen/DirectX/all.ll new file mode 100644 index 00000000000000..1c0b6486dc9358 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/all.ll @@ -0,0 +1,83 @@ +; RUN: opt -S -passes=dxil-intrinsic-expansion,dxil-op-lower -mtriple=dxil-pc-shadermodel6.0-library < %s | FileCheck %s + +; Make sure dxil operation function calls for all are generated for float and half. + +; CHECK-LABEL: all_bool +; CHECK: icmp ne i1 %{{.*}}, false +define noundef i1 @all_bool(i1 noundef %p0) { +entry: + %dx.all = call i1 @llvm.dx.all.i1(i1 %p0) + ret i1 %dx.all +} + +; CHECK-LABEL: all_int64_t +; CHECK: icmp ne i64 %{{.*}}, 0 +define noundef i1 @all_int64_t(i64 noundef %p0) { +entry: + %dx.all = call i1 @llvm.dx.all.i64(i64 %p0) + ret i1 %dx.all +} + +; CHECK-LABEL: all_int +; CHECK: icmp ne i32 %{{.*}}, 0 +define noundef i1 @all_int(i32 noundef %p0) { +entry: + %dx.all = call i1 @llvm.dx.all.i32(i32 %p0) + ret i1 %dx.all +} + +; CHECK-LABEL: all_int16_t +; CHECK: icmp ne i16 %{{.*}}, 0 +define noundef i1 @all_int16_t(i16 noundef %p0) { +entry: + %dx.all = call i1 @llvm.dx.all.i16(i16 %p0) + ret i1 %dx.all +} + +; CHECK-LABEL: all_double +; CHECK: fcmp une double %{{.*}}, 0.000000e+00 +define noundef i1 @all_double(double noundef %p0) { +entry: + %dx.all = call i1 @llvm.dx.all.f64(double %p0) + ret i1 %dx.all +} + +; CHECK-LABEL: all_float +; CHECK: fcmp une float %{{.*}}, 0.000000e+00 +define noundef i1 @all_float(float noundef %p0) { +entry: + %dx.all = call i1 @llvm.dx.all.f32(float %p0) + ret i1 %dx.all +} + +; CHECK-LABEL: all_half +; CHECK: fcmp une half %{{.*}}, 0xH0000 +define noundef i1 @all_half(half noundef %p0) { +entry: + %dx.all = call i1 @llvm.dx.all.f16(half %p0) + ret i1 %dx.all +} + +; CHECK-LABEL: all_bool4 +; CHECK: icmp ne <4 x i1> %{{.*}}, zeroinitialize +; CHECK: extractelement <4 x i1> %{{.*}}, i64 0 +; CHECK: extractelement <4 x i1> %{{.*}}, i64 1 +; CHECK: and i1 %{{.*}}, %{{.*}} +; CHECK: extractelement <4 x i1> %{{.*}}, i64 2 +; CHECK: and i1 %{{.*}}, %{{.*}} +; CHECK: extractelement <4 x i1> %{{.*}}, i64 3 +; CHECK: and i1 %{{.*}}, %{{.*}} +define noundef i1 @all_bool4(<4 x i1> noundef %p0) { +entry: + %dx.all = call i1 @llvm.dx.all.v4i1(<4 x i1> %p0) + ret i1 %dx.all +} + +declare i1 @llvm.dx.all.v4i1(<4 x i1>) +declare i1 @llvm.dx.all.i1(i1) +declare i1 @llvm.dx.all.i16(i16) +declare i1 @llvm.dx.all.i32(i32) +declare i1 @llvm.dx.all.i64(i64) +declare i1 @llvm.dx.all.f16(half) +declare i1 @llvm.dx.all.f32(float) +declare i1 @llvm.dx.all.f64(double) diff --git a/llvm/test/CodeGen/RISCV/inline-asm-mem-constraint.ll b/llvm/test/CodeGen/RISCV/inline-asm-mem-constraint.ll index e34df9b1c01f25..7fae0ca692669e 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm-mem-constraint.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm-mem-constraint.ll @@ -129,41 +129,23 @@ define void @constraint_m_with_global_1() nounwind { ; RV64I-NEXT: #NO_APP ; RV64I-NEXT: ret ; -; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_global_1: -; RV32I-MEDIUM-NO-INTEGRATED: # %bb.0: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi0: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi0)(a0) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_global_1: -; RV64I-MEDIUM-NO-INTEGRATED: # %bb.0: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi0: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi0)(a0) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_global_1: -; RV32I-MEDIUM-INTEGRATED: # %bb.0: -; RV32I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi0: -; RV32I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg) -; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi00)(a0) -; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-INTEGRATED-NEXT: ret +; RV32I-MEDIUM-LABEL: constraint_m_with_global_1: +; RV32I-MEDIUM: # %bb.0: +; RV32I-MEDIUM-NEXT: .Lpcrel_hi0: +; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(eg) +; RV32I-MEDIUM-NEXT: #APP +; RV32I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi0)(a0) +; RV32I-MEDIUM-NEXT: #NO_APP +; RV32I-MEDIUM-NEXT: ret ; -; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_global_1: -; RV64I-MEDIUM-INTEGRATED: # %bb.0: -; RV64I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi0: -; RV64I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg) -; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi00)(a0) -; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-INTEGRATED-NEXT: ret +; RV64I-MEDIUM-LABEL: constraint_m_with_global_1: +; RV64I-MEDIUM: # %bb.0: +; RV64I-MEDIUM-NEXT: .Lpcrel_hi0: +; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(eg) +; RV64I-MEDIUM-NEXT: #APP +; RV64I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi0)(a0) +; RV64I-MEDIUM-NEXT: #NO_APP +; RV64I-MEDIUM-NEXT: ret call void asm "sw zero, $0", "=*m"(ptr elementtype(i32) @eg) ret void } @@ -185,41 +167,23 @@ define void @constraint_m_with_global_2() nounwind { ; RV64I-NEXT: #NO_APP ; RV64I-NEXT: ret ; -; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_global_2: -; RV32I-MEDIUM-NO-INTEGRATED: # %bb.0: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi1: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg+4) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi1)(a0) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_global_2: -; RV64I-MEDIUM-NO-INTEGRATED: # %bb.0: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi1: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg+4) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi1)(a0) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_global_2: -; RV32I-MEDIUM-INTEGRATED: # %bb.0: -; RV32I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi1: -; RV32I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg+4) -; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi110)(a0) -; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-INTEGRATED-NEXT: ret +; RV32I-MEDIUM-LABEL: constraint_m_with_global_2: +; RV32I-MEDIUM: # %bb.0: +; RV32I-MEDIUM-NEXT: .Lpcrel_hi1: +; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(eg+4) +; RV32I-MEDIUM-NEXT: #APP +; RV32I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi1)(a0) +; RV32I-MEDIUM-NEXT: #NO_APP +; RV32I-MEDIUM-NEXT: ret ; -; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_global_2: -; RV64I-MEDIUM-INTEGRATED: # %bb.0: -; RV64I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi1: -; RV64I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg+4) -; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi110)(a0) -; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-INTEGRATED-NEXT: ret +; RV64I-MEDIUM-LABEL: constraint_m_with_global_2: +; RV64I-MEDIUM: # %bb.0: +; RV64I-MEDIUM-NEXT: .Lpcrel_hi1: +; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(eg+4) +; RV64I-MEDIUM-NEXT: #APP +; RV64I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi1)(a0) +; RV64I-MEDIUM-NEXT: #NO_APP +; RV64I-MEDIUM-NEXT: ret call void asm "sw zero, $0", "=*m"(ptr elementtype(i32) getelementptr ([400000 x i32], ptr @eg, i32 0, i32 1)) ret void } @@ -241,41 +205,23 @@ define void @constraint_m_with_global_3() nounwind { ; RV64I-NEXT: #NO_APP ; RV64I-NEXT: ret ; -; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_global_3: -; RV32I-MEDIUM-NO-INTEGRATED: # %bb.0: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi2: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg+8000) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi2)(a0) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_global_3: -; RV64I-MEDIUM-NO-INTEGRATED: # %bb.0: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi2: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg+8000) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi2)(a0) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_global_3: -; RV32I-MEDIUM-INTEGRATED: # %bb.0: -; RV32I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi2: -; RV32I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg+8000) -; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi210)(a0) -; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-INTEGRATED-NEXT: ret +; RV32I-MEDIUM-LABEL: constraint_m_with_global_3: +; RV32I-MEDIUM: # %bb.0: +; RV32I-MEDIUM-NEXT: .Lpcrel_hi2: +; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(eg+8000) +; RV32I-MEDIUM-NEXT: #APP +; RV32I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi2)(a0) +; RV32I-MEDIUM-NEXT: #NO_APP +; RV32I-MEDIUM-NEXT: ret ; -; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_global_3: -; RV64I-MEDIUM-INTEGRATED: # %bb.0: -; RV64I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi2: -; RV64I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg+8000) -; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi210)(a0) -; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-INTEGRATED-NEXT: ret +; RV64I-MEDIUM-LABEL: constraint_m_with_global_3: +; RV64I-MEDIUM: # %bb.0: +; RV64I-MEDIUM-NEXT: .Lpcrel_hi2: +; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(eg+8000) +; RV64I-MEDIUM-NEXT: #APP +; RV64I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi2)(a0) +; RV64I-MEDIUM-NEXT: #NO_APP +; RV64I-MEDIUM-NEXT: ret call void asm "sw zero, $0", "=*m"(ptr elementtype(i32) getelementptr ([400000 x i32], ptr @eg, i32 0, i32 2000)) ret void } @@ -407,89 +353,47 @@ define void @constraint_m_with_extern_weak_global_3() nounwind { } define void @constraint_m_with_local_1() nounwind { -; RV32I-NO-INTEGRATED-LABEL: constraint_m_with_local_1: -; RV32I-NO-INTEGRATED: # %bb.0: # %entry -; RV32I-NO-INTEGRATED-NEXT: .Ltmp0: # Block address taken -; RV32I-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-NO-INTEGRATED-NEXT: lui a0, %hi(.Ltmp0) -; RV32I-NO-INTEGRATED-NEXT: #APP -; RV32I-NO-INTEGRATED-NEXT: lw zero, %lo(.Ltmp0)(a0) -; RV32I-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-NO-INTEGRATED-NEXT: ret -; -; RV64I-NO-INTEGRATED-LABEL: constraint_m_with_local_1: -; RV64I-NO-INTEGRATED: # %bb.0: # %entry -; RV64I-NO-INTEGRATED-NEXT: .Ltmp0: # Block address taken -; RV64I-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-NO-INTEGRATED-NEXT: lui a0, %hi(.Ltmp0) -; RV64I-NO-INTEGRATED-NEXT: #APP -; RV64I-NO-INTEGRATED-NEXT: lw zero, %lo(.Ltmp0)(a0) -; RV64I-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-NO-INTEGRATED-NEXT: ret -; -; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_local_1: -; RV32I-MEDIUM-NO-INTEGRATED: # %bb.0: # %entry -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Ltmp0: # Block address taken -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi6: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp0) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi6)(a0) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_local_1: -; RV64I-MEDIUM-NO-INTEGRATED: # %bb.0: # %entry -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Ltmp0: # Block address taken -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi6: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp0) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi6)(a0) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV32I-INTEGRATED-LABEL: constraint_m_with_local_1: -; RV32I-INTEGRATED: # %bb.0: # %entry -; RV32I-INTEGRATED-NEXT: .Ltmp0: # Block address taken -; RV32I-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-INTEGRATED-NEXT: lui a0, %hi(.Ltmp0) -; RV32I-INTEGRATED-NEXT: #APP -; RV32I-INTEGRATED-NEXT: lw zero, %lo(.Ltmp00)(a0) -; RV32I-INTEGRATED-NEXT: #NO_APP -; RV32I-INTEGRATED-NEXT: ret +; RV32I-LABEL: constraint_m_with_local_1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: .Ltmp0: # Block address taken +; RV32I-NEXT: # %bb.1: # %label +; RV32I-NEXT: lui a0, %hi(.Ltmp0) +; RV32I-NEXT: #APP +; RV32I-NEXT: lw zero, %lo(.Ltmp0)(a0) +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: ret ; -; RV64I-INTEGRATED-LABEL: constraint_m_with_local_1: -; RV64I-INTEGRATED: # %bb.0: # %entry -; RV64I-INTEGRATED-NEXT: .Ltmp0: # Block address taken -; RV64I-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-INTEGRATED-NEXT: lui a0, %hi(.Ltmp0) -; RV64I-INTEGRATED-NEXT: #APP -; RV64I-INTEGRATED-NEXT: lw zero, %lo(.Ltmp00)(a0) -; RV64I-INTEGRATED-NEXT: #NO_APP -; RV64I-INTEGRATED-NEXT: ret +; RV64I-LABEL: constraint_m_with_local_1: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: .Ltmp0: # Block address taken +; RV64I-NEXT: # %bb.1: # %label +; RV64I-NEXT: lui a0, %hi(.Ltmp0) +; RV64I-NEXT: #APP +; RV64I-NEXT: lw zero, %lo(.Ltmp0)(a0) +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: ret ; -; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_local_1: -; RV32I-MEDIUM-INTEGRATED: # %bb.0: # %entry -; RV32I-MEDIUM-INTEGRATED-NEXT: .Ltmp0: # Block address taken -; RV32I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi6: -; RV32I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp0) -; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi60)(a0) -; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-INTEGRATED-NEXT: ret +; RV32I-MEDIUM-LABEL: constraint_m_with_local_1: +; RV32I-MEDIUM: # %bb.0: # %entry +; RV32I-MEDIUM-NEXT: .Ltmp0: # Block address taken +; RV32I-MEDIUM-NEXT: # %bb.1: # %label +; RV32I-MEDIUM-NEXT: .Lpcrel_hi6: +; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(.Ltmp0) +; RV32I-MEDIUM-NEXT: #APP +; RV32I-MEDIUM-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi6)(a0) +; RV32I-MEDIUM-NEXT: #NO_APP +; RV32I-MEDIUM-NEXT: ret ; -; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_local_1: -; RV64I-MEDIUM-INTEGRATED: # %bb.0: # %entry -; RV64I-MEDIUM-INTEGRATED-NEXT: .Ltmp0: # Block address taken -; RV64I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi6: -; RV64I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp0) -; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi60)(a0) -; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-INTEGRATED-NEXT: ret +; RV64I-MEDIUM-LABEL: constraint_m_with_local_1: +; RV64I-MEDIUM: # %bb.0: # %entry +; RV64I-MEDIUM-NEXT: .Ltmp0: # Block address taken +; RV64I-MEDIUM-NEXT: # %bb.1: # %label +; RV64I-MEDIUM-NEXT: .Lpcrel_hi6: +; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(.Ltmp0) +; RV64I-MEDIUM-NEXT: #APP +; RV64I-MEDIUM-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi6)(a0) +; RV64I-MEDIUM-NEXT: #NO_APP +; RV64I-MEDIUM-NEXT: ret entry: br label %label @@ -499,89 +403,47 @@ label: } define void @constraint_m_with_local_2() nounwind { -; RV32I-NO-INTEGRATED-LABEL: constraint_m_with_local_2: -; RV32I-NO-INTEGRATED: # %bb.0: # %entry -; RV32I-NO-INTEGRATED-NEXT: .Ltmp1: # Block address taken -; RV32I-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-NO-INTEGRATED-NEXT: lui a0, %hi(.Ltmp1+4) -; RV32I-NO-INTEGRATED-NEXT: #APP -; RV32I-NO-INTEGRATED-NEXT: lw zero, %lo(.Ltmp1+4)(a0) -; RV32I-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-NO-INTEGRATED-NEXT: ret -; -; RV64I-NO-INTEGRATED-LABEL: constraint_m_with_local_2: -; RV64I-NO-INTEGRATED: # %bb.0: # %entry -; RV64I-NO-INTEGRATED-NEXT: .Ltmp1: # Block address taken -; RV64I-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-NO-INTEGRATED-NEXT: lui a0, %hi(.Ltmp1+4) -; RV64I-NO-INTEGRATED-NEXT: #APP -; RV64I-NO-INTEGRATED-NEXT: lw zero, %lo(.Ltmp1+4)(a0) -; RV64I-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-NO-INTEGRATED-NEXT: ret -; -; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_local_2: -; RV32I-MEDIUM-NO-INTEGRATED: # %bb.0: # %entry -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Ltmp1: # Block address taken -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi7: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp1+4) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi7)(a0) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_local_2: -; RV64I-MEDIUM-NO-INTEGRATED: # %bb.0: # %entry -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Ltmp1: # Block address taken -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi7: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp1+4) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi7)(a0) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV32I-INTEGRATED-LABEL: constraint_m_with_local_2: -; RV32I-INTEGRATED: # %bb.0: # %entry -; RV32I-INTEGRATED-NEXT: .Ltmp1: # Block address taken -; RV32I-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-INTEGRATED-NEXT: lui a0, %hi(.Ltmp1+4) -; RV32I-INTEGRATED-NEXT: #APP -; RV32I-INTEGRATED-NEXT: lw zero, %lo(.Ltmp10+4)(a0) -; RV32I-INTEGRATED-NEXT: #NO_APP -; RV32I-INTEGRATED-NEXT: ret +; RV32I-LABEL: constraint_m_with_local_2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: .Ltmp1: # Block address taken +; RV32I-NEXT: # %bb.1: # %label +; RV32I-NEXT: lui a0, %hi(.Ltmp1+4) +; RV32I-NEXT: #APP +; RV32I-NEXT: lw zero, %lo(.Ltmp1+4)(a0) +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: ret ; -; RV64I-INTEGRATED-LABEL: constraint_m_with_local_2: -; RV64I-INTEGRATED: # %bb.0: # %entry -; RV64I-INTEGRATED-NEXT: .Ltmp1: # Block address taken -; RV64I-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-INTEGRATED-NEXT: lui a0, %hi(.Ltmp1+4) -; RV64I-INTEGRATED-NEXT: #APP -; RV64I-INTEGRATED-NEXT: lw zero, %lo(.Ltmp10+4)(a0) -; RV64I-INTEGRATED-NEXT: #NO_APP -; RV64I-INTEGRATED-NEXT: ret +; RV64I-LABEL: constraint_m_with_local_2: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: .Ltmp1: # Block address taken +; RV64I-NEXT: # %bb.1: # %label +; RV64I-NEXT: lui a0, %hi(.Ltmp1+4) +; RV64I-NEXT: #APP +; RV64I-NEXT: lw zero, %lo(.Ltmp1+4)(a0) +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: ret ; -; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_local_2: -; RV32I-MEDIUM-INTEGRATED: # %bb.0: # %entry -; RV32I-MEDIUM-INTEGRATED-NEXT: .Ltmp1: # Block address taken -; RV32I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi7: -; RV32I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp1+4) -; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi70)(a0) -; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-INTEGRATED-NEXT: ret +; RV32I-MEDIUM-LABEL: constraint_m_with_local_2: +; RV32I-MEDIUM: # %bb.0: # %entry +; RV32I-MEDIUM-NEXT: .Ltmp1: # Block address taken +; RV32I-MEDIUM-NEXT: # %bb.1: # %label +; RV32I-MEDIUM-NEXT: .Lpcrel_hi7: +; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(.Ltmp1+4) +; RV32I-MEDIUM-NEXT: #APP +; RV32I-MEDIUM-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi7)(a0) +; RV32I-MEDIUM-NEXT: #NO_APP +; RV32I-MEDIUM-NEXT: ret ; -; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_local_2: -; RV64I-MEDIUM-INTEGRATED: # %bb.0: # %entry -; RV64I-MEDIUM-INTEGRATED-NEXT: .Ltmp1: # Block address taken -; RV64I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi7: -; RV64I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp1+4) -; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi70)(a0) -; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-INTEGRATED-NEXT: ret +; RV64I-MEDIUM-LABEL: constraint_m_with_local_2: +; RV64I-MEDIUM: # %bb.0: # %entry +; RV64I-MEDIUM-NEXT: .Ltmp1: # Block address taken +; RV64I-MEDIUM-NEXT: # %bb.1: # %label +; RV64I-MEDIUM-NEXT: .Lpcrel_hi7: +; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(.Ltmp1+4) +; RV64I-MEDIUM-NEXT: #APP +; RV64I-MEDIUM-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi7)(a0) +; RV64I-MEDIUM-NEXT: #NO_APP +; RV64I-MEDIUM-NEXT: ret entry: br label %label @@ -591,89 +453,47 @@ label: } define void @constraint_m_with_local_3() nounwind { -; RV32I-NO-INTEGRATED-LABEL: constraint_m_with_local_3: -; RV32I-NO-INTEGRATED: # %bb.0: # %entry -; RV32I-NO-INTEGRATED-NEXT: .Ltmp2: # Block address taken -; RV32I-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-NO-INTEGRATED-NEXT: lui a0, %hi(.Ltmp2+2000) -; RV32I-NO-INTEGRATED-NEXT: #APP -; RV32I-NO-INTEGRATED-NEXT: lw zero, %lo(.Ltmp2+2000)(a0) -; RV32I-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-NO-INTEGRATED-NEXT: ret -; -; RV64I-NO-INTEGRATED-LABEL: constraint_m_with_local_3: -; RV64I-NO-INTEGRATED: # %bb.0: # %entry -; RV64I-NO-INTEGRATED-NEXT: .Ltmp2: # Block address taken -; RV64I-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-NO-INTEGRATED-NEXT: lui a0, %hi(.Ltmp2+2000) -; RV64I-NO-INTEGRATED-NEXT: #APP -; RV64I-NO-INTEGRATED-NEXT: lw zero, %lo(.Ltmp2+2000)(a0) -; RV64I-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-NO-INTEGRATED-NEXT: ret -; -; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_local_3: -; RV32I-MEDIUM-NO-INTEGRATED: # %bb.0: # %entry -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Ltmp2: # Block address taken -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi8: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp2+2000) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi8)(a0) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_local_3: -; RV64I-MEDIUM-NO-INTEGRATED: # %bb.0: # %entry -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Ltmp2: # Block address taken -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi8: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp2+2000) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi8)(a0) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV32I-INTEGRATED-LABEL: constraint_m_with_local_3: -; RV32I-INTEGRATED: # %bb.0: # %entry -; RV32I-INTEGRATED-NEXT: .Ltmp2: # Block address taken -; RV32I-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-INTEGRATED-NEXT: lui a0, %hi(.Ltmp2+2000) -; RV32I-INTEGRATED-NEXT: #APP -; RV32I-INTEGRATED-NEXT: lw zero, %lo(.Ltmp20+2000)(a0) -; RV32I-INTEGRATED-NEXT: #NO_APP -; RV32I-INTEGRATED-NEXT: ret +; RV32I-LABEL: constraint_m_with_local_3: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: .Ltmp2: # Block address taken +; RV32I-NEXT: # %bb.1: # %label +; RV32I-NEXT: lui a0, %hi(.Ltmp2+2000) +; RV32I-NEXT: #APP +; RV32I-NEXT: lw zero, %lo(.Ltmp2+2000)(a0) +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: ret ; -; RV64I-INTEGRATED-LABEL: constraint_m_with_local_3: -; RV64I-INTEGRATED: # %bb.0: # %entry -; RV64I-INTEGRATED-NEXT: .Ltmp2: # Block address taken -; RV64I-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-INTEGRATED-NEXT: lui a0, %hi(.Ltmp2+2000) -; RV64I-INTEGRATED-NEXT: #APP -; RV64I-INTEGRATED-NEXT: lw zero, %lo(.Ltmp20+2000)(a0) -; RV64I-INTEGRATED-NEXT: #NO_APP -; RV64I-INTEGRATED-NEXT: ret +; RV64I-LABEL: constraint_m_with_local_3: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: .Ltmp2: # Block address taken +; RV64I-NEXT: # %bb.1: # %label +; RV64I-NEXT: lui a0, %hi(.Ltmp2+2000) +; RV64I-NEXT: #APP +; RV64I-NEXT: lw zero, %lo(.Ltmp2+2000)(a0) +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: ret ; -; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_local_3: -; RV32I-MEDIUM-INTEGRATED: # %bb.0: # %entry -; RV32I-MEDIUM-INTEGRATED-NEXT: .Ltmp2: # Block address taken -; RV32I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi8: -; RV32I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp2+2000) -; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi80)(a0) -; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-INTEGRATED-NEXT: ret +; RV32I-MEDIUM-LABEL: constraint_m_with_local_3: +; RV32I-MEDIUM: # %bb.0: # %entry +; RV32I-MEDIUM-NEXT: .Ltmp2: # Block address taken +; RV32I-MEDIUM-NEXT: # %bb.1: # %label +; RV32I-MEDIUM-NEXT: .Lpcrel_hi8: +; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(.Ltmp2+2000) +; RV32I-MEDIUM-NEXT: #APP +; RV32I-MEDIUM-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi8)(a0) +; RV32I-MEDIUM-NEXT: #NO_APP +; RV32I-MEDIUM-NEXT: ret ; -; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_local_3: -; RV64I-MEDIUM-INTEGRATED: # %bb.0: # %entry -; RV64I-MEDIUM-INTEGRATED-NEXT: .Ltmp2: # Block address taken -; RV64I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi8: -; RV64I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp2+2000) -; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi80)(a0) -; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-INTEGRATED-NEXT: ret +; RV64I-MEDIUM-LABEL: constraint_m_with_local_3: +; RV64I-MEDIUM: # %bb.0: # %entry +; RV64I-MEDIUM-NEXT: .Ltmp2: # Block address taken +; RV64I-MEDIUM-NEXT: # %bb.1: # %label +; RV64I-MEDIUM-NEXT: .Lpcrel_hi8: +; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(.Ltmp2+2000) +; RV64I-MEDIUM-NEXT: #APP +; RV64I-MEDIUM-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi8)(a0) +; RV64I-MEDIUM-NEXT: #NO_APP +; RV64I-MEDIUM-NEXT: ret entry: br label %label @@ -740,8 +560,8 @@ define void @constraint_m_with_multi_operands() nounwind { ; RV32I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi9: ; RV32I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg) ; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi90)(a0) -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi90)(a0) +; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi9)(a0) +; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi9)(a0) ; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP ; RV32I-MEDIUM-INTEGRATED-NEXT: ret ; @@ -750,8 +570,8 @@ define void @constraint_m_with_multi_operands() nounwind { ; RV64I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi9: ; RV64I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg) ; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi90)(a0) -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi90)(a0) +; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi9)(a0) +; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi9)(a0) ; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP ; RV64I-MEDIUM-INTEGRATED-NEXT: ret call void asm "sw zero, $0; sw zero, $1", "=*m,=*m"(ptr elementtype(i32) @eg, ptr elementtype(i32) @eg) @@ -781,53 +601,29 @@ define void @constraint_m_with_multi_asm() nounwind { ; RV64I-NEXT: #NO_APP ; RV64I-NEXT: ret ; -; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_multi_asm: -; RV32I-MEDIUM-NO-INTEGRATED: # %bb.0: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi10: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi10)(a0) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi10)(a0) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_multi_asm: -; RV64I-MEDIUM-NO-INTEGRATED: # %bb.0: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi10: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi10)(a0) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi10)(a0) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_multi_asm: -; RV32I-MEDIUM-INTEGRATED: # %bb.0: -; RV32I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi10: -; RV32I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg) -; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi100)(a0) -; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi100)(a0) -; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-INTEGRATED-NEXT: ret +; RV32I-MEDIUM-LABEL: constraint_m_with_multi_asm: +; RV32I-MEDIUM: # %bb.0: +; RV32I-MEDIUM-NEXT: .Lpcrel_hi10: +; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(eg) +; RV32I-MEDIUM-NEXT: #APP +; RV32I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi10)(a0) +; RV32I-MEDIUM-NEXT: #NO_APP +; RV32I-MEDIUM-NEXT: #APP +; RV32I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi10)(a0) +; RV32I-MEDIUM-NEXT: #NO_APP +; RV32I-MEDIUM-NEXT: ret ; -; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_multi_asm: -; RV64I-MEDIUM-INTEGRATED: # %bb.0: -; RV64I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi10: -; RV64I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg) -; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi100)(a0) -; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi100)(a0) -; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-INTEGRATED-NEXT: ret +; RV64I-MEDIUM-LABEL: constraint_m_with_multi_asm: +; RV64I-MEDIUM: # %bb.0: +; RV64I-MEDIUM-NEXT: .Lpcrel_hi10: +; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(eg) +; RV64I-MEDIUM-NEXT: #APP +; RV64I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi10)(a0) +; RV64I-MEDIUM-NEXT: #NO_APP +; RV64I-MEDIUM-NEXT: #APP +; RV64I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi10)(a0) +; RV64I-MEDIUM-NEXT: #NO_APP +; RV64I-MEDIUM-NEXT: ret call void asm "sw zero, $0", "=*m"(ptr elementtype(i32) @eg) call void asm "sw zero, $0", "=*m"(ptr elementtype(i32) @eg) ret void @@ -935,8 +731,8 @@ define i32 @constraint_m_with_callbr_multi_operands(i32 %a) { ; RV32I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi11: ; RV32I-MEDIUM-INTEGRATED-NEXT: auipc a1, %pcrel_hi(eg) ; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi111)(a1) -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi111)(a1) +; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi11)(a1) +; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi11)(a1) ; RV32I-MEDIUM-INTEGRATED-NEXT: beqz a0, .LBB14_2 ; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP ; RV32I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %normal @@ -953,8 +749,8 @@ define i32 @constraint_m_with_callbr_multi_operands(i32 %a) { ; RV64I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi11: ; RV64I-MEDIUM-INTEGRATED-NEXT: auipc a1, %pcrel_hi(eg) ; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi111)(a1) -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi111)(a1) +; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi11)(a1) +; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi11)(a1) ; RV64I-MEDIUM-INTEGRATED-NEXT: beqz a0, .LBB14_2 ; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP ; RV64I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %normal @@ -1101,12 +897,12 @@ define i32 @constraint_m_with_multi_callbr_asm(i32 %a) { ; RV32I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi12: ; RV32I-MEDIUM-INTEGRATED-NEXT: auipc a1, %pcrel_hi(eg) ; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi120)(a1) +; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi12)(a1) ; RV32I-MEDIUM-INTEGRATED-NEXT: beqz a0, .LBB15_3 ; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP ; RV32I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %normal0 ; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi120)(a1) +; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi12)(a1) ; RV32I-MEDIUM-INTEGRATED-NEXT: beqz a0, .LBB15_3 ; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP ; RV32I-MEDIUM-INTEGRATED-NEXT: # %bb.2: # %normal1 @@ -1123,12 +919,12 @@ define i32 @constraint_m_with_multi_callbr_asm(i32 %a) { ; RV64I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi12: ; RV64I-MEDIUM-INTEGRATED-NEXT: auipc a1, %pcrel_hi(eg) ; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi120)(a1) +; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi12)(a1) ; RV64I-MEDIUM-INTEGRATED-NEXT: beqz a0, .LBB15_3 ; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP ; RV64I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %normal0 ; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi120)(a1) +; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi12)(a1) ; RV64I-MEDIUM-INTEGRATED-NEXT: beqz a0, .LBB15_3 ; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP ; RV64I-MEDIUM-INTEGRATED-NEXT: # %bb.2: # %normal1 @@ -1262,41 +1058,23 @@ define void @constraint_o_with_global_1() nounwind { ; RV64I-NEXT: #NO_APP ; RV64I-NEXT: ret ; -; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_global_1: -; RV32I-MEDIUM-NO-INTEGRATED: # %bb.0: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi13: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi13)(a0) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_global_1: -; RV64I-MEDIUM-NO-INTEGRATED: # %bb.0: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi13: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi13)(a0) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_global_1: -; RV32I-MEDIUM-INTEGRATED: # %bb.0: -; RV32I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi13: -; RV32I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg) -; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi130)(a0) -; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-INTEGRATED-NEXT: ret +; RV32I-MEDIUM-LABEL: constraint_o_with_global_1: +; RV32I-MEDIUM: # %bb.0: +; RV32I-MEDIUM-NEXT: .Lpcrel_hi13: +; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(eg) +; RV32I-MEDIUM-NEXT: #APP +; RV32I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi13)(a0) +; RV32I-MEDIUM-NEXT: #NO_APP +; RV32I-MEDIUM-NEXT: ret ; -; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_global_1: -; RV64I-MEDIUM-INTEGRATED: # %bb.0: -; RV64I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi13: -; RV64I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg) -; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi130)(a0) -; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-INTEGRATED-NEXT: ret +; RV64I-MEDIUM-LABEL: constraint_o_with_global_1: +; RV64I-MEDIUM: # %bb.0: +; RV64I-MEDIUM-NEXT: .Lpcrel_hi13: +; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(eg) +; RV64I-MEDIUM-NEXT: #APP +; RV64I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi13)(a0) +; RV64I-MEDIUM-NEXT: #NO_APP +; RV64I-MEDIUM-NEXT: ret call void asm "sw zero, $0", "=*o"(ptr elementtype(i32) @eg) ret void } @@ -1316,43 +1094,25 @@ define void @constraint_o_with_global_2() nounwind { ; RV64I-NEXT: #APP ; RV64I-NEXT: sw zero, %lo(eg+4)(a0) ; RV64I-NEXT: #NO_APP -; RV64I-NEXT: ret -; -; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_global_2: -; RV32I-MEDIUM-NO-INTEGRATED: # %bb.0: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi14: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg+4) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi14)(a0) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_global_2: -; RV64I-MEDIUM-NO-INTEGRATED: # %bb.0: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi14: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg+4) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi14)(a0) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_global_2: -; RV32I-MEDIUM-INTEGRATED: # %bb.0: -; RV32I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi14: -; RV32I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg+4) -; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi140)(a0) -; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-INTEGRATED-NEXT: ret +; RV64I-NEXT: ret ; -; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_global_2: -; RV64I-MEDIUM-INTEGRATED: # %bb.0: -; RV64I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi14: -; RV64I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg+4) -; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi140)(a0) -; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-INTEGRATED-NEXT: ret +; RV32I-MEDIUM-LABEL: constraint_o_with_global_2: +; RV32I-MEDIUM: # %bb.0: +; RV32I-MEDIUM-NEXT: .Lpcrel_hi14: +; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(eg+4) +; RV32I-MEDIUM-NEXT: #APP +; RV32I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi14)(a0) +; RV32I-MEDIUM-NEXT: #NO_APP +; RV32I-MEDIUM-NEXT: ret +; +; RV64I-MEDIUM-LABEL: constraint_o_with_global_2: +; RV64I-MEDIUM: # %bb.0: +; RV64I-MEDIUM-NEXT: .Lpcrel_hi14: +; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(eg+4) +; RV64I-MEDIUM-NEXT: #APP +; RV64I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi14)(a0) +; RV64I-MEDIUM-NEXT: #NO_APP +; RV64I-MEDIUM-NEXT: ret call void asm "sw zero, $0", "=*o"(ptr elementtype(i32) getelementptr ([400000 x i32], ptr @eg, i32 0, i32 1)) ret void } @@ -1374,41 +1134,23 @@ define void @constraint_o_with_global_3() nounwind { ; RV64I-NEXT: #NO_APP ; RV64I-NEXT: ret ; -; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_global_3: -; RV32I-MEDIUM-NO-INTEGRATED: # %bb.0: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi15: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg+8000) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi15)(a0) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_global_3: -; RV64I-MEDIUM-NO-INTEGRATED: # %bb.0: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi15: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg+8000) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi15)(a0) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_global_3: -; RV32I-MEDIUM-INTEGRATED: # %bb.0: -; RV32I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi15: -; RV32I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg+8000) -; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi150)(a0) -; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-INTEGRATED-NEXT: ret +; RV32I-MEDIUM-LABEL: constraint_o_with_global_3: +; RV32I-MEDIUM: # %bb.0: +; RV32I-MEDIUM-NEXT: .Lpcrel_hi15: +; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(eg+8000) +; RV32I-MEDIUM-NEXT: #APP +; RV32I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi15)(a0) +; RV32I-MEDIUM-NEXT: #NO_APP +; RV32I-MEDIUM-NEXT: ret ; -; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_global_3: -; RV64I-MEDIUM-INTEGRATED: # %bb.0: -; RV64I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi15: -; RV64I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg+8000) -; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi150)(a0) -; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-INTEGRATED-NEXT: ret +; RV64I-MEDIUM-LABEL: constraint_o_with_global_3: +; RV64I-MEDIUM: # %bb.0: +; RV64I-MEDIUM-NEXT: .Lpcrel_hi15: +; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(eg+8000) +; RV64I-MEDIUM-NEXT: #APP +; RV64I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi15)(a0) +; RV64I-MEDIUM-NEXT: #NO_APP +; RV64I-MEDIUM-NEXT: ret call void asm "sw zero, $0", "=*o"(ptr elementtype(i32) getelementptr ([400000 x i32], ptr @eg, i32 0, i32 2000)) ret void } @@ -1562,53 +1304,29 @@ define void @constraint_o_with_multi_asm() nounwind { ; RV64I-NEXT: #NO_APP ; RV64I-NEXT: ret ; -; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_multi_asm: -; RV32I-MEDIUM-NO-INTEGRATED: # %bb.0: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi19: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi19)(a0) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi19)(a0) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_multi_asm: -; RV64I-MEDIUM-NO-INTEGRATED: # %bb.0: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi19: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi19)(a0) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi19)(a0) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_multi_asm: -; RV32I-MEDIUM-INTEGRATED: # %bb.0: -; RV32I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi19: -; RV32I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg) -; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi190)(a0) -; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi190)(a0) -; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-INTEGRATED-NEXT: ret +; RV32I-MEDIUM-LABEL: constraint_o_with_multi_asm: +; RV32I-MEDIUM: # %bb.0: +; RV32I-MEDIUM-NEXT: .Lpcrel_hi19: +; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(eg) +; RV32I-MEDIUM-NEXT: #APP +; RV32I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi19)(a0) +; RV32I-MEDIUM-NEXT: #NO_APP +; RV32I-MEDIUM-NEXT: #APP +; RV32I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi19)(a0) +; RV32I-MEDIUM-NEXT: #NO_APP +; RV32I-MEDIUM-NEXT: ret ; -; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_multi_asm: -; RV64I-MEDIUM-INTEGRATED: # %bb.0: -; RV64I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi19: -; RV64I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(eg) -; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi190)(a0) -; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi190)(a0) -; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-INTEGRATED-NEXT: ret +; RV64I-MEDIUM-LABEL: constraint_o_with_multi_asm: +; RV64I-MEDIUM: # %bb.0: +; RV64I-MEDIUM-NEXT: .Lpcrel_hi19: +; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(eg) +; RV64I-MEDIUM-NEXT: #APP +; RV64I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi19)(a0) +; RV64I-MEDIUM-NEXT: #NO_APP +; RV64I-MEDIUM-NEXT: #APP +; RV64I-MEDIUM-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi19)(a0) +; RV64I-MEDIUM-NEXT: #NO_APP +; RV64I-MEDIUM-NEXT: ret call void asm "sw zero, $0", "=*o"(ptr elementtype(i32) @eg) call void asm "sw zero, $0", "=*o"(ptr elementtype(i32) @eg) ret void @@ -1716,8 +1434,8 @@ define i32 @constraint_o_with_callbr_multi_operands(i32 %a) { ; RV32I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi20: ; RV32I-MEDIUM-INTEGRATED-NEXT: auipc a1, %pcrel_hi(eg) ; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi200)(a1) -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi200)(a1) +; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi20)(a1) +; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi20)(a1) ; RV32I-MEDIUM-INTEGRATED-NEXT: beqz a0, .LBB26_2 ; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP ; RV32I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %normal @@ -1734,8 +1452,8 @@ define i32 @constraint_o_with_callbr_multi_operands(i32 %a) { ; RV64I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi20: ; RV64I-MEDIUM-INTEGRATED-NEXT: auipc a1, %pcrel_hi(eg) ; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi200)(a1) -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi200)(a1) +; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi20)(a1) +; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi20)(a1) ; RV64I-MEDIUM-INTEGRATED-NEXT: beqz a0, .LBB26_2 ; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP ; RV64I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %normal @@ -1882,12 +1600,12 @@ define i32 @constraint_o_with_multi_callbr_asm(i32 %a) { ; RV32I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi21: ; RV32I-MEDIUM-INTEGRATED-NEXT: auipc a1, %pcrel_hi(eg) ; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi211)(a1) +; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi21)(a1) ; RV32I-MEDIUM-INTEGRATED-NEXT: beqz a0, .LBB27_3 ; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP ; RV32I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %normal0 ; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi211)(a1) +; RV32I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi21)(a1) ; RV32I-MEDIUM-INTEGRATED-NEXT: beqz a0, .LBB27_3 ; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP ; RV32I-MEDIUM-INTEGRATED-NEXT: # %bb.2: # %normal1 @@ -1904,12 +1622,12 @@ define i32 @constraint_o_with_multi_callbr_asm(i32 %a) { ; RV64I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi21: ; RV64I-MEDIUM-INTEGRATED-NEXT: auipc a1, %pcrel_hi(eg) ; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi211)(a1) +; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi21)(a1) ; RV64I-MEDIUM-INTEGRATED-NEXT: beqz a0, .LBB27_3 ; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP ; RV64I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %normal0 ; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi211)(a1) +; RV64I-MEDIUM-INTEGRATED-NEXT: sw zero, %pcrel_lo(.Lpcrel_hi21)(a1) ; RV64I-MEDIUM-INTEGRATED-NEXT: beqz a0, .LBB27_3 ; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP ; RV64I-MEDIUM-INTEGRATED-NEXT: # %bb.2: # %normal1 @@ -1934,89 +1652,47 @@ fail: } define void @constraint_o_with_local_1() nounwind { -; RV32I-NO-INTEGRATED-LABEL: constraint_o_with_local_1: -; RV32I-NO-INTEGRATED: # %bb.0: # %entry -; RV32I-NO-INTEGRATED-NEXT: .Ltmp3: # Block address taken -; RV32I-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-NO-INTEGRATED-NEXT: lui a0, %hi(.Ltmp3) -; RV32I-NO-INTEGRATED-NEXT: #APP -; RV32I-NO-INTEGRATED-NEXT: lw zero, %lo(.Ltmp3)(a0) -; RV32I-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-NO-INTEGRATED-NEXT: ret -; -; RV64I-NO-INTEGRATED-LABEL: constraint_o_with_local_1: -; RV64I-NO-INTEGRATED: # %bb.0: # %entry -; RV64I-NO-INTEGRATED-NEXT: .Ltmp3: # Block address taken -; RV64I-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-NO-INTEGRATED-NEXT: lui a0, %hi(.Ltmp3) -; RV64I-NO-INTEGRATED-NEXT: #APP -; RV64I-NO-INTEGRATED-NEXT: lw zero, %lo(.Ltmp3)(a0) -; RV64I-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-NO-INTEGRATED-NEXT: ret -; -; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_local_1: -; RV32I-MEDIUM-NO-INTEGRATED: # %bb.0: # %entry -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Ltmp3: # Block address taken -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi22: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp3) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi22)(a0) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_local_1: -; RV64I-MEDIUM-NO-INTEGRATED: # %bb.0: # %entry -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Ltmp3: # Block address taken -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi22: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp3) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi22)(a0) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV32I-INTEGRATED-LABEL: constraint_o_with_local_1: -; RV32I-INTEGRATED: # %bb.0: # %entry -; RV32I-INTEGRATED-NEXT: .Ltmp3: # Block address taken -; RV32I-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-INTEGRATED-NEXT: lui a0, %hi(.Ltmp3) -; RV32I-INTEGRATED-NEXT: #APP -; RV32I-INTEGRATED-NEXT: lw zero, %lo(.Ltmp30)(a0) -; RV32I-INTEGRATED-NEXT: #NO_APP -; RV32I-INTEGRATED-NEXT: ret +; RV32I-LABEL: constraint_o_with_local_1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: .Ltmp3: # Block address taken +; RV32I-NEXT: # %bb.1: # %label +; RV32I-NEXT: lui a0, %hi(.Ltmp3) +; RV32I-NEXT: #APP +; RV32I-NEXT: lw zero, %lo(.Ltmp3)(a0) +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: ret ; -; RV64I-INTEGRATED-LABEL: constraint_o_with_local_1: -; RV64I-INTEGRATED: # %bb.0: # %entry -; RV64I-INTEGRATED-NEXT: .Ltmp3: # Block address taken -; RV64I-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-INTEGRATED-NEXT: lui a0, %hi(.Ltmp3) -; RV64I-INTEGRATED-NEXT: #APP -; RV64I-INTEGRATED-NEXT: lw zero, %lo(.Ltmp30)(a0) -; RV64I-INTEGRATED-NEXT: #NO_APP -; RV64I-INTEGRATED-NEXT: ret +; RV64I-LABEL: constraint_o_with_local_1: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: .Ltmp3: # Block address taken +; RV64I-NEXT: # %bb.1: # %label +; RV64I-NEXT: lui a0, %hi(.Ltmp3) +; RV64I-NEXT: #APP +; RV64I-NEXT: lw zero, %lo(.Ltmp3)(a0) +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: ret ; -; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_local_1: -; RV32I-MEDIUM-INTEGRATED: # %bb.0: # %entry -; RV32I-MEDIUM-INTEGRATED-NEXT: .Ltmp3: # Block address taken -; RV32I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi22: -; RV32I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp3) -; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi220)(a0) -; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-INTEGRATED-NEXT: ret +; RV32I-MEDIUM-LABEL: constraint_o_with_local_1: +; RV32I-MEDIUM: # %bb.0: # %entry +; RV32I-MEDIUM-NEXT: .Ltmp3: # Block address taken +; RV32I-MEDIUM-NEXT: # %bb.1: # %label +; RV32I-MEDIUM-NEXT: .Lpcrel_hi22: +; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(.Ltmp3) +; RV32I-MEDIUM-NEXT: #APP +; RV32I-MEDIUM-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi22)(a0) +; RV32I-MEDIUM-NEXT: #NO_APP +; RV32I-MEDIUM-NEXT: ret ; -; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_local_1: -; RV64I-MEDIUM-INTEGRATED: # %bb.0: # %entry -; RV64I-MEDIUM-INTEGRATED-NEXT: .Ltmp3: # Block address taken -; RV64I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi22: -; RV64I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp3) -; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi220)(a0) -; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-INTEGRATED-NEXT: ret +; RV64I-MEDIUM-LABEL: constraint_o_with_local_1: +; RV64I-MEDIUM: # %bb.0: # %entry +; RV64I-MEDIUM-NEXT: .Ltmp3: # Block address taken +; RV64I-MEDIUM-NEXT: # %bb.1: # %label +; RV64I-MEDIUM-NEXT: .Lpcrel_hi22: +; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(.Ltmp3) +; RV64I-MEDIUM-NEXT: #APP +; RV64I-MEDIUM-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi22)(a0) +; RV64I-MEDIUM-NEXT: #NO_APP +; RV64I-MEDIUM-NEXT: ret entry: br label %label @@ -2026,89 +1702,47 @@ label: } define void @constraint_o_with_local_2() nounwind { -; RV32I-NO-INTEGRATED-LABEL: constraint_o_with_local_2: -; RV32I-NO-INTEGRATED: # %bb.0: # %entry -; RV32I-NO-INTEGRATED-NEXT: .Ltmp4: # Block address taken -; RV32I-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-NO-INTEGRATED-NEXT: lui a0, %hi(.Ltmp4+4) -; RV32I-NO-INTEGRATED-NEXT: #APP -; RV32I-NO-INTEGRATED-NEXT: lw zero, %lo(.Ltmp4+4)(a0) -; RV32I-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-NO-INTEGRATED-NEXT: ret -; -; RV64I-NO-INTEGRATED-LABEL: constraint_o_with_local_2: -; RV64I-NO-INTEGRATED: # %bb.0: # %entry -; RV64I-NO-INTEGRATED-NEXT: .Ltmp4: # Block address taken -; RV64I-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-NO-INTEGRATED-NEXT: lui a0, %hi(.Ltmp4+4) -; RV64I-NO-INTEGRATED-NEXT: #APP -; RV64I-NO-INTEGRATED-NEXT: lw zero, %lo(.Ltmp4+4)(a0) -; RV64I-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-NO-INTEGRATED-NEXT: ret -; -; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_local_2: -; RV32I-MEDIUM-NO-INTEGRATED: # %bb.0: # %entry -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Ltmp4: # Block address taken -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi23: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp4+4) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi23)(a0) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_local_2: -; RV64I-MEDIUM-NO-INTEGRATED: # %bb.0: # %entry -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Ltmp4: # Block address taken -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi23: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp4+4) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi23)(a0) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV32I-INTEGRATED-LABEL: constraint_o_with_local_2: -; RV32I-INTEGRATED: # %bb.0: # %entry -; RV32I-INTEGRATED-NEXT: .Ltmp4: # Block address taken -; RV32I-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-INTEGRATED-NEXT: lui a0, %hi(.Ltmp4+4) -; RV32I-INTEGRATED-NEXT: #APP -; RV32I-INTEGRATED-NEXT: lw zero, %lo(.Ltmp40+4)(a0) -; RV32I-INTEGRATED-NEXT: #NO_APP -; RV32I-INTEGRATED-NEXT: ret +; RV32I-LABEL: constraint_o_with_local_2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: .Ltmp4: # Block address taken +; RV32I-NEXT: # %bb.1: # %label +; RV32I-NEXT: lui a0, %hi(.Ltmp4+4) +; RV32I-NEXT: #APP +; RV32I-NEXT: lw zero, %lo(.Ltmp4+4)(a0) +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: ret ; -; RV64I-INTEGRATED-LABEL: constraint_o_with_local_2: -; RV64I-INTEGRATED: # %bb.0: # %entry -; RV64I-INTEGRATED-NEXT: .Ltmp4: # Block address taken -; RV64I-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-INTEGRATED-NEXT: lui a0, %hi(.Ltmp4+4) -; RV64I-INTEGRATED-NEXT: #APP -; RV64I-INTEGRATED-NEXT: lw zero, %lo(.Ltmp40+4)(a0) -; RV64I-INTEGRATED-NEXT: #NO_APP -; RV64I-INTEGRATED-NEXT: ret +; RV64I-LABEL: constraint_o_with_local_2: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: .Ltmp4: # Block address taken +; RV64I-NEXT: # %bb.1: # %label +; RV64I-NEXT: lui a0, %hi(.Ltmp4+4) +; RV64I-NEXT: #APP +; RV64I-NEXT: lw zero, %lo(.Ltmp4+4)(a0) +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: ret ; -; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_local_2: -; RV32I-MEDIUM-INTEGRATED: # %bb.0: # %entry -; RV32I-MEDIUM-INTEGRATED-NEXT: .Ltmp4: # Block address taken -; RV32I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi23: -; RV32I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp4+4) -; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi230)(a0) -; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-INTEGRATED-NEXT: ret +; RV32I-MEDIUM-LABEL: constraint_o_with_local_2: +; RV32I-MEDIUM: # %bb.0: # %entry +; RV32I-MEDIUM-NEXT: .Ltmp4: # Block address taken +; RV32I-MEDIUM-NEXT: # %bb.1: # %label +; RV32I-MEDIUM-NEXT: .Lpcrel_hi23: +; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(.Ltmp4+4) +; RV32I-MEDIUM-NEXT: #APP +; RV32I-MEDIUM-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi23)(a0) +; RV32I-MEDIUM-NEXT: #NO_APP +; RV32I-MEDIUM-NEXT: ret ; -; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_local_2: -; RV64I-MEDIUM-INTEGRATED: # %bb.0: # %entry -; RV64I-MEDIUM-INTEGRATED-NEXT: .Ltmp4: # Block address taken -; RV64I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi23: -; RV64I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp4+4) -; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi230)(a0) -; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-INTEGRATED-NEXT: ret +; RV64I-MEDIUM-LABEL: constraint_o_with_local_2: +; RV64I-MEDIUM: # %bb.0: # %entry +; RV64I-MEDIUM-NEXT: .Ltmp4: # Block address taken +; RV64I-MEDIUM-NEXT: # %bb.1: # %label +; RV64I-MEDIUM-NEXT: .Lpcrel_hi23: +; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(.Ltmp4+4) +; RV64I-MEDIUM-NEXT: #APP +; RV64I-MEDIUM-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi23)(a0) +; RV64I-MEDIUM-NEXT: #NO_APP +; RV64I-MEDIUM-NEXT: ret entry: br label %label @@ -2118,89 +1752,47 @@ label: } define void @constraint_o_with_local_3() nounwind { -; RV32I-NO-INTEGRATED-LABEL: constraint_o_with_local_3: -; RV32I-NO-INTEGRATED: # %bb.0: # %entry -; RV32I-NO-INTEGRATED-NEXT: .Ltmp5: # Block address taken -; RV32I-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-NO-INTEGRATED-NEXT: lui a0, %hi(.Ltmp5+2000) -; RV32I-NO-INTEGRATED-NEXT: #APP -; RV32I-NO-INTEGRATED-NEXT: lw zero, %lo(.Ltmp5+2000)(a0) -; RV32I-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-NO-INTEGRATED-NEXT: ret -; -; RV64I-NO-INTEGRATED-LABEL: constraint_o_with_local_3: -; RV64I-NO-INTEGRATED: # %bb.0: # %entry -; RV64I-NO-INTEGRATED-NEXT: .Ltmp5: # Block address taken -; RV64I-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-NO-INTEGRATED-NEXT: lui a0, %hi(.Ltmp5+2000) -; RV64I-NO-INTEGRATED-NEXT: #APP -; RV64I-NO-INTEGRATED-NEXT: lw zero, %lo(.Ltmp5+2000)(a0) -; RV64I-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-NO-INTEGRATED-NEXT: ret -; -; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_local_3: -; RV32I-MEDIUM-NO-INTEGRATED: # %bb.0: # %entry -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Ltmp5: # Block address taken -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi24: -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp5+2000) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi24)(a0) -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_local_3: -; RV64I-MEDIUM-NO-INTEGRATED: # %bb.0: # %entry -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Ltmp5: # Block address taken -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: .Lpcrel_hi24: -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp5+2000) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi24)(a0) -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-NO-INTEGRATED-NEXT: ret -; -; RV32I-INTEGRATED-LABEL: constraint_o_with_local_3: -; RV32I-INTEGRATED: # %bb.0: # %entry -; RV32I-INTEGRATED-NEXT: .Ltmp5: # Block address taken -; RV32I-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-INTEGRATED-NEXT: lui a0, %hi(.Ltmp5+2000) -; RV32I-INTEGRATED-NEXT: #APP -; RV32I-INTEGRATED-NEXT: lw zero, %lo(.Ltmp50+2000)(a0) -; RV32I-INTEGRATED-NEXT: #NO_APP -; RV32I-INTEGRATED-NEXT: ret +; RV32I-LABEL: constraint_o_with_local_3: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: .Ltmp5: # Block address taken +; RV32I-NEXT: # %bb.1: # %label +; RV32I-NEXT: lui a0, %hi(.Ltmp5+2000) +; RV32I-NEXT: #APP +; RV32I-NEXT: lw zero, %lo(.Ltmp5+2000)(a0) +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: ret ; -; RV64I-INTEGRATED-LABEL: constraint_o_with_local_3: -; RV64I-INTEGRATED: # %bb.0: # %entry -; RV64I-INTEGRATED-NEXT: .Ltmp5: # Block address taken -; RV64I-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-INTEGRATED-NEXT: lui a0, %hi(.Ltmp5+2000) -; RV64I-INTEGRATED-NEXT: #APP -; RV64I-INTEGRATED-NEXT: lw zero, %lo(.Ltmp50+2000)(a0) -; RV64I-INTEGRATED-NEXT: #NO_APP -; RV64I-INTEGRATED-NEXT: ret +; RV64I-LABEL: constraint_o_with_local_3: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: .Ltmp5: # Block address taken +; RV64I-NEXT: # %bb.1: # %label +; RV64I-NEXT: lui a0, %hi(.Ltmp5+2000) +; RV64I-NEXT: #APP +; RV64I-NEXT: lw zero, %lo(.Ltmp5+2000)(a0) +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: ret ; -; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_local_3: -; RV32I-MEDIUM-INTEGRATED: # %bb.0: # %entry -; RV32I-MEDIUM-INTEGRATED-NEXT: .Ltmp5: # Block address taken -; RV32I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %label -; RV32I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi24: -; RV32I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp5+2000) -; RV32I-MEDIUM-INTEGRATED-NEXT: #APP -; RV32I-MEDIUM-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi240)(a0) -; RV32I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV32I-MEDIUM-INTEGRATED-NEXT: ret +; RV32I-MEDIUM-LABEL: constraint_o_with_local_3: +; RV32I-MEDIUM: # %bb.0: # %entry +; RV32I-MEDIUM-NEXT: .Ltmp5: # Block address taken +; RV32I-MEDIUM-NEXT: # %bb.1: # %label +; RV32I-MEDIUM-NEXT: .Lpcrel_hi24: +; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(.Ltmp5+2000) +; RV32I-MEDIUM-NEXT: #APP +; RV32I-MEDIUM-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi24)(a0) +; RV32I-MEDIUM-NEXT: #NO_APP +; RV32I-MEDIUM-NEXT: ret ; -; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_local_3: -; RV64I-MEDIUM-INTEGRATED: # %bb.0: # %entry -; RV64I-MEDIUM-INTEGRATED-NEXT: .Ltmp5: # Block address taken -; RV64I-MEDIUM-INTEGRATED-NEXT: # %bb.1: # %label -; RV64I-MEDIUM-INTEGRATED-NEXT: .Lpcrel_hi24: -; RV64I-MEDIUM-INTEGRATED-NEXT: auipc a0, %pcrel_hi(.Ltmp5+2000) -; RV64I-MEDIUM-INTEGRATED-NEXT: #APP -; RV64I-MEDIUM-INTEGRATED-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi240)(a0) -; RV64I-MEDIUM-INTEGRATED-NEXT: #NO_APP -; RV64I-MEDIUM-INTEGRATED-NEXT: ret +; RV64I-MEDIUM-LABEL: constraint_o_with_local_3: +; RV64I-MEDIUM: # %bb.0: # %entry +; RV64I-MEDIUM-NEXT: .Ltmp5: # Block address taken +; RV64I-MEDIUM-NEXT: # %bb.1: # %label +; RV64I-MEDIUM-NEXT: .Lpcrel_hi24: +; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(.Ltmp5+2000) +; RV64I-MEDIUM-NEXT: #APP +; RV64I-MEDIUM-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi24)(a0) +; RV64I-MEDIUM-NEXT: #NO_APP +; RV64I-MEDIUM-NEXT: ret entry: br label %label diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll index d26fd0ca26c729..3a439cdb996fac 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll @@ -159,9 +159,8 @@ define @vmerge_larger_vl_same_passthru( %pa define @vmerge_smaller_vl_different_passthru( %pt1, %pt2, %x, %y, %m) { ; CHECK-LABEL: vmerge_smaller_vl_different_passthru: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, mu +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu ; CHECK-NEXT: vadd.vv v8, v10, v11, v0.t -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-NEXT: vmv.v.v v9, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index 39055dc5adfcf7..6700920cebff0a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -1072,9 +1072,8 @@ define @vmerge_larger_vl_same_passthru( %pa define @vmerge_smaller_vl_different_passthru( %pt1, %pt2, %x, %y, %m) { ; CHECK-LABEL: vmerge_smaller_vl_different_passthru: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; CHECK-NEXT: vadd.vv v8, v10, v11 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vadd.vv v8, v10, v11 ; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/Xtensa/load.ll b/llvm/test/CodeGen/Xtensa/load.ll new file mode 100644 index 00000000000000..2f730f56eb1f51 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/load.ll @@ -0,0 +1,12 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=xtensa < %s | FileCheck %s + +define signext i8 @test_load_i8(ptr %p){ +; CHECK-LABEL: test_load_i8: +; CHECK: l8ui a8, a2, 0 +; CHECK-NEXT: slli a8, a8, 24 +; CHECK-NEXT: srai a2, a8, 24 +; CHECK-NEXT: ret + %1 = load i8, ptr %p, align 1 + ret i8 %1 +} diff --git a/llvm/test/Transforms/Coroutines/coro-pgo-setbranchweights.ll b/llvm/test/Transforms/Coroutines/coro-pgo-setbranchweights.ll new file mode 100644 index 00000000000000..4f5f936606ca3f --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-pgo-setbranchweights.ll @@ -0,0 +1,42 @@ +; RUN: rm -rf %t && split-file %s %t + +; RUN: llvm-profdata merge %t/a.proftext -o %t/a.profdata +; RUN: opt < %t/a.ll --passes=pgo-instr-use -pgo-test-profile-file=%t/a.profdata + +;--- a.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-redhat-linux-gnu" + +define void @_bar() presplitcoroutine personality ptr null { + %1 = call token @llvm.coro.save(ptr null) + %2 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %2, label %5 [ + i8 0, label %3 + i8 1, label %4 + ] + +3: ; preds = %0 + ret void + +4: ; preds = %0 + ret void + +5: ; preds = %0 + ret void +} + +declare token @llvm.coro.save(ptr) + +declare i8 @llvm.coro.suspend(token, i1) + +;--- a.proftext +# IR level Instrumentation Flag +:ir + +_bar +# Func Hash: +1063705160175073211 +# Num Counters: +2 +1 +0 diff --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll index e03e45312687bc..673c8f6c9488d6 100644 --- a/llvm/test/Transforms/InstCombine/phi.ll +++ b/llvm/test/Transforms/InstCombine/phi.ll @@ -2714,3 +2714,31 @@ join: %cmp = icmp slt i32 %13, 0 ret i1 %cmp } + +define void @phi_op_in_loop(i1 %c, i32 %x) { +; CHECK-LABEL: @phi_op_in_loop( +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LOOP_LATCH:%.*]] +; CHECK: if: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[X:%.*]], [[IF]] ], [ 0, [[LOOP]] ] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[PHI]], 1 +; CHECK-NEXT: call void @use(i32 [[AND]]) +; CHECK-NEXT: br label [[LOOP]] +; + br label %loop + +loop: + br i1 %c, label %if, label %loop.latch + +if: + br label %loop.latch + +loop.latch: + %phi = phi i32 [ %x, %if ], [ 0, %loop ] + %and = and i32 %phi, 1 + call void @use(i32 %and) + br label %loop +} diff --git a/llvm/test/Transforms/SCCP/pointer-nonnull.ll b/llvm/test/Transforms/SCCP/pointer-nonnull.ll new file mode 100644 index 00000000000000..85367d8a56765e --- /dev/null +++ b/llvm/test/Transforms/SCCP/pointer-nonnull.ll @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=sccp < %s | FileCheck %s + +define i1 @test_no_attr(ptr %p) { +; CHECK-LABEL: define i1 @test_no_attr( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[P]], null +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ne ptr %p, null + ret i1 %cmp +} + +define i1 @test_nonnull(ptr nonnull %p) { +; CHECK-LABEL: define i1 @test_nonnull( +; CHECK-SAME: ptr nonnull [[P:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[P]], null +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ne ptr %p, null + ret i1 %cmp +} + +define i1 @test_nonnull_eq(ptr nonnull %p) { +; CHECK-LABEL: define i1 @test_nonnull_eq( +; CHECK-SAME: ptr nonnull [[P:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P]], null +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp eq ptr %p, null + ret i1 %cmp +} + +define i1 @test_dereferenceable(ptr dereferenceable(4) %p) { +; CHECK-LABEL: define i1 @test_dereferenceable( +; CHECK-SAME: ptr dereferenceable(4) [[P:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[P]], null +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ne ptr %p, null + ret i1 %cmp +} + +define i1 @test_gep_no_flags(ptr nonnull %p, i64 %x) { +; CHECK-LABEL: define i1 @test_gep_no_flags( +; CHECK-SAME: ptr nonnull [[P:%.*]], i64 [[X:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[X]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[GEP]], null +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep = getelementptr i8, ptr %p, i64 %x + %cmp = icmp ne ptr %gep, null + ret i1 %cmp +} + +define i1 @test_gep_nuw(ptr nonnull %p, i64 %x) { +; CHECK-LABEL: define i1 @test_gep_nuw( +; CHECK-SAME: ptr nonnull [[P:%.*]], i64 [[X:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[X]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[GEP]], null +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep = getelementptr nuw i8, ptr %p, i64 %x + %cmp = icmp ne ptr %gep, null + ret i1 %cmp +} + +define i1 @test_gep_inbounds(ptr nonnull %p, i64 %x) { +; CHECK-LABEL: define i1 @test_gep_inbounds( +; CHECK-SAME: ptr nonnull [[P:%.*]], i64 [[X:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[X]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[GEP]], null +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep = getelementptr inbounds i8, ptr %p, i64 %x + %cmp = icmp ne ptr %gep, null + ret i1 %cmp +} + +define i1 @test_gep_inbounds_null_pointer_valid(ptr nonnull %p, i64 %x) null_pointer_is_valid { +; CHECK-LABEL: define i1 @test_gep_inbounds_null_pointer_valid( +; CHECK-SAME: ptr nonnull [[P:%.*]], i64 [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[X]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[GEP]], null +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep = getelementptr inbounds i8, ptr %p, i64 %x + %cmp = icmp ne ptr %gep, null + ret i1 %cmp +} + +define i1 @test_select(i1 %c, ptr nonnull %p, i64 %x) { +; CHECK-LABEL: define i1 @test_select( +; CHECK-SAME: i1 [[C:%.*]], ptr nonnull [[P:%.*]], i64 [[X:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[X]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], ptr [[P]], ptr [[GEP]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[SEL]], null +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep = getelementptr nuw i8, ptr %p, i64 %x + %sel = select i1 %c, ptr %p, ptr %gep + %cmp = icmp ne ptr %sel, null + ret i1 %cmp +} + +define i1 @test_select_not_nuw(i1 %c, ptr nonnull %p, i64 %x) { +; CHECK-LABEL: define i1 @test_select_not_nuw( +; CHECK-SAME: i1 [[C:%.*]], ptr nonnull [[P:%.*]], i64 [[X:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[X]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], ptr [[P]], ptr [[GEP]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[SEL]], null +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep = getelementptr i8, ptr %p, i64 %x + %sel = select i1 %c, ptr %p, ptr %gep + %cmp = icmp ne ptr %sel, null + ret i1 %cmp +} + +define i1 @test_phi(i1 %c, ptr nonnull %p, i64 %x) { +; CHECK-LABEL: define i1 @test_phi( +; CHECK-SAME: i1 [[C:%.*]], ptr nonnull [[P:%.*]], i64 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 [[C]], label %[[IF:.*]], label %[[JOIN:.*]] +; CHECK: [[IF]]: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[X]] +; CHECK-NEXT: br label %[[JOIN]] +; CHECK: [[JOIN]]: +; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[P]], %[[ENTRY]] ], [ [[GEP]], %[[IF]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[PHI]], null +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + br i1 %c, label %if, label %join + +if: + %gep = getelementptr nuw i8, ptr %p, i64 %x + br label %join + +join: + %phi = phi ptr [ %p, %entry ], [ %gep, %if ] + %cmp = icmp ne ptr %phi, null + ret i1 %cmp +} diff --git a/llvm/test/Transforms/SCCP/range-attribute.ll b/llvm/test/Transforms/SCCP/range-attribute.ll index 209c5464ccf221..8b156e6f483ed4 100644 --- a/llvm/test/Transforms/SCCP/range-attribute.ll +++ b/llvm/test/Transforms/SCCP/range-attribute.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=ipsccp -S | FileCheck %s +; RUN: opt < %s -passes=ipsccp -S | FileCheck %s --check-prefixes=CHECK,IPSCCP +; RUN: opt < %s -passes=sccp -S | FileCheck %s --check-prefixes=CHECK,SCCP declare void @use(i1) declare i32 @get_i32() @@ -26,8 +27,11 @@ define void @range_attribute(i32 range(i32 0, 10) %v) { } define i32 @range_attribute_single(i32 range(i32 0, 1) %v) { -; CHECK-LABEL: @range_attribute_single( -; CHECK-NEXT: ret i32 0 +; IPSCCP-LABEL: @range_attribute_single( +; IPSCCP-NEXT: ret i32 0 +; +; SCCP-LABEL: @range_attribute_single( +; SCCP-NEXT: ret i32 [[V:%.*]] ; ret i32 %v } @@ -82,35 +86,52 @@ define void @call_range_result() { } define internal i1 @ip_cmp_range_attribute(i32 %v) { -; CHECK-LABEL: @ip_cmp_range_attribute( -; CHECK-NEXT: ret i1 poison +; IPSCCP-LABEL: @ip_cmp_range_attribute( +; IPSCCP-NEXT: ret i1 poison +; +; SCCP-LABEL: @ip_cmp_range_attribute( +; SCCP-NEXT: [[C:%.*]] = icmp ult i32 [[V:%.*]], 10 +; SCCP-NEXT: ret i1 [[C]] ; %c = icmp ult i32 %v, 10 ret i1 %c } define i1 @ip_range_attribute(i32 range(i32 0, 10) %v) { -; CHECK-LABEL: @ip_range_attribute( -; CHECK-NEXT: [[C:%.*]] = call i1 @ip_cmp_range_attribute(i32 [[V:%.*]]) -; CHECK-NEXT: ret i1 true +; IPSCCP-LABEL: @ip_range_attribute( +; IPSCCP-NEXT: [[C:%.*]] = call i1 @ip_cmp_range_attribute(i32 [[V:%.*]]) +; IPSCCP-NEXT: ret i1 true +; +; SCCP-LABEL: @ip_range_attribute( +; SCCP-NEXT: [[C:%.*]] = call i1 @ip_cmp_range_attribute(i32 [[V:%.*]]) +; SCCP-NEXT: ret i1 [[C]] ; %c = call i1 @ip_cmp_range_attribute(i32 %v) ret i1 %c } define internal i1 @ip_cmp_range_call(i32 %v) { -; CHECK-LABEL: @ip_cmp_range_call( -; CHECK-NEXT: ret i1 poison +; IPSCCP-LABEL: @ip_cmp_range_call( +; IPSCCP-NEXT: ret i1 poison +; +; SCCP-LABEL: @ip_cmp_range_call( +; SCCP-NEXT: [[C:%.*]] = icmp ult i32 [[V:%.*]], 10 +; SCCP-NEXT: ret i1 [[C]] ; %c = icmp ult i32 %v, 10 ret i1 %c } define i1 @ip_range_call() { -; CHECK-LABEL: @ip_range_call( -; CHECK-NEXT: [[V:%.*]] = call range(i32 0, 10) i32 @get_i32() -; CHECK-NEXT: [[C:%.*]] = call i1 @ip_cmp_range_call(i32 [[V]]) -; CHECK-NEXT: ret i1 true +; IPSCCP-LABEL: @ip_range_call( +; IPSCCP-NEXT: [[V:%.*]] = call range(i32 0, 10) i32 @get_i32() +; IPSCCP-NEXT: [[C:%.*]] = call i1 @ip_cmp_range_call(i32 [[V]]) +; IPSCCP-NEXT: ret i1 true +; +; SCCP-LABEL: @ip_range_call( +; SCCP-NEXT: [[V:%.*]] = call range(i32 0, 10) i32 @get_i32() +; SCCP-NEXT: [[C:%.*]] = call i1 @ip_cmp_range_call(i32 [[V]]) +; SCCP-NEXT: ret i1 [[C]] ; %v = call range(i32 0, 10) i32 @get_i32() %c = call i1 @ip_cmp_range_call(i32 %v) @@ -118,18 +139,27 @@ define i1 @ip_range_call() { } define internal i1 @ip_cmp_range_result(i32 %v) { -; CHECK-LABEL: @ip_cmp_range_result( -; CHECK-NEXT: ret i1 poison +; IPSCCP-LABEL: @ip_cmp_range_result( +; IPSCCP-NEXT: ret i1 poison +; +; SCCP-LABEL: @ip_cmp_range_result( +; SCCP-NEXT: [[C:%.*]] = icmp ult i32 [[V:%.*]], 10 +; SCCP-NEXT: ret i1 [[C]] ; %c = icmp ult i32 %v, 10 ret i1 %c } define i1 @ip_range_result() { -; CHECK-LABEL: @ip_range_result( -; CHECK-NEXT: [[V:%.*]] = call range(i32 0, 10) i32 @get_i32() -; CHECK-NEXT: [[C:%.*]] = call i1 @ip_cmp_range_result(i32 [[V]]) -; CHECK-NEXT: ret i1 true +; IPSCCP-LABEL: @ip_range_result( +; IPSCCP-NEXT: [[V:%.*]] = call range(i32 0, 10) i32 @get_i32() +; IPSCCP-NEXT: [[C:%.*]] = call i1 @ip_cmp_range_result(i32 [[V]]) +; IPSCCP-NEXT: ret i1 true +; +; SCCP-LABEL: @ip_range_result( +; SCCP-NEXT: [[V:%.*]] = call range(i32 0, 10) i32 @get_i32() +; SCCP-NEXT: [[C:%.*]] = call i1 @ip_cmp_range_result(i32 [[V]]) +; SCCP-NEXT: ret i1 [[C]] ; %v = call range(i32 0, 10) i32 @get_i32() %c = call i1 @ip_cmp_range_result(i32 %v) @@ -137,17 +167,25 @@ define i1 @ip_range_result() { } define internal i1 @ip_cmp_with_range_attribute(i32 range(i32 0, 10) %v) { -; CHECK-LABEL: @ip_cmp_with_range_attribute( -; CHECK-NEXT: ret i1 poison +; IPSCCP-LABEL: @ip_cmp_with_range_attribute( +; IPSCCP-NEXT: ret i1 poison +; +; SCCP-LABEL: @ip_cmp_with_range_attribute( +; SCCP-NEXT: [[C:%.*]] = icmp eq i32 [[V:%.*]], 5 +; SCCP-NEXT: ret i1 [[C]] ; %c = icmp eq i32 %v, 5 ret i1 %c } define i1 @ip_range_attribute_constant() { -; CHECK-LABEL: @ip_range_attribute_constant( -; CHECK-NEXT: [[C:%.*]] = call i1 @ip_cmp_with_range_attribute(i32 5) -; CHECK-NEXT: ret i1 true +; IPSCCP-LABEL: @ip_range_attribute_constant( +; IPSCCP-NEXT: [[C:%.*]] = call i1 @ip_cmp_with_range_attribute(i32 5) +; IPSCCP-NEXT: ret i1 true +; +; SCCP-LABEL: @ip_range_attribute_constant( +; SCCP-NEXT: [[C:%.*]] = call i1 @ip_cmp_with_range_attribute(i32 5) +; SCCP-NEXT: ret i1 [[C]] ; %c = call i1 @ip_cmp_with_range_attribute(i32 5) ret i1 %c diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll new file mode 100644 index 00000000000000..f07b6bbe8d6621 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mattr=+v,+zvl512b < %s | FileCheck %s + +define void @test(ptr %c) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[C]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <8 x ptr> [[TMP1]], <8 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <8 x ptr> [[TMP1]], <8 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP2]], i32 1, <8 x i1> , <8 x i8> poison) +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP3]], i32 1, <8 x i1> , <8 x i8> poison) +; CHECK-NEXT: br label %[[FOR_COND:.*]] +; CHECK: [[FOR_COND]]: +; CHECK-NEXT: [[A_PROMOTED2226:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[TMP8:%.*]], %[[FOR_COND]] ] +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[TMP4]], i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP5]], i64 8) +; CHECK-NEXT: [[TMP8]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP7]]) +; CHECK-NEXT: br label %[[FOR_COND]] +; +entry: + %arrayidx8.5.3 = getelementptr i8, ptr %c, i64 222 + %0 = load i8, ptr %arrayidx8.5.3, align 1 + %arrayidx8.7.3 = getelementptr i8, ptr %c, i64 228 + %1 = load i8, ptr %arrayidx8.7.3, align 1 + %arrayidx8.434 = getelementptr i8, ptr %c, i64 276 + %2 = load i8, ptr %arrayidx8.434, align 1 + %arrayidx8.1.4 = getelementptr i8, ptr %c, i64 279 + %3 = load i8, ptr %arrayidx8.1.4, align 1 + %arrayidx8.2.4 = getelementptr i8, ptr %c, i64 282 + %4 = load i8, ptr %arrayidx8.2.4, align 1 + %arrayidx8.3.4 = getelementptr i8, ptr %c, i64 285 + %5 = load i8, ptr %arrayidx8.3.4, align 1 + %arrayidx8.4.4 = getelementptr i8, ptr %c, i64 288 + %6 = load i8, ptr %arrayidx8.4.4, align 1 + %7 = load i8, ptr %c, align 1 + %8 = load i8, ptr %c, align 1 + %arrayidx8.536 = getelementptr i8, ptr %c, i64 345 + %9 = load i8, ptr %arrayidx8.536, align 1 + %arrayidx8.1.5 = getelementptr i8, ptr %c, i64 348 + %10 = load i8, ptr %arrayidx8.1.5, align 1 + %arrayidx8.2.5 = getelementptr i8, ptr %c, i64 351 + %11 = load i8, ptr %arrayidx8.2.5, align 1 + %arrayidx8.3.5 = getelementptr i8, ptr %c, i64 354 + %12 = load i8, ptr %arrayidx8.3.5, align 1 + %arrayidx8.4.5 = getelementptr i8, ptr %c, i64 357 + %13 = load i8, ptr %arrayidx8.4.5, align 1 + %arrayidx8.5.5 = getelementptr i8, ptr %c, i64 360 + %14 = load i8, ptr %arrayidx8.5.5, align 1 + %arrayidx8.6.5 = getelementptr i8, ptr %c, i64 363 + %15 = load i8, ptr %arrayidx8.6.5, align 1 + br label %for.cond + +for.cond: + %a.promoted2226 = phi i8 [ 0, %entry ], [ %or18.6.5, %for.cond ] + %or18.7.3 = or i8 %0, %1 + %or18.435 = or i8 %or18.7.3, %2 + %or18.1.4 = or i8 %or18.435, %3 + %or18.2.4 = or i8 %or18.1.4, %4 + %or18.3.4 = or i8 %or18.2.4, %5 + %or18.4.4 = or i8 %or18.3.4, %6 + %or18.5.4 = or i8 %or18.4.4, %7 + %or18.6.4 = or i8 %or18.5.4, %8 + %or18.537 = or i8 %or18.6.4, %9 + %or18.1.5 = or i8 %or18.537, %10 + %or18.2.5 = or i8 %or18.1.5, %11 + %or18.3.5 = or i8 %or18.2.5, %12 + %or18.4.5 = or i8 %or18.3.5, %13 + %or18.5.5 = or i8 %or18.4.5, %14 + %or18.6.5 = or i8 %or18.5.5, %15 + br label %for.cond +} + diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll new file mode 100644 index 00000000000000..7e75970de34929 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i1 @test(i32 %g, i16 %d) { +; CHECK-LABEL: define i1 @test( +; CHECK-SAME: i32 [[G:%.*]], i16 [[D:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[D]], 1 +; CHECK-NEXT: [[XOR_I_I:%.*]] = xor i32 [[G]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[G]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[XOR_I_I]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[TMP9]] to <2 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i8> [[TMP4]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i8> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i8> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i1> [[TMP7]] to <4 x i8> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[TMP8]] to <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i32> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP12]]) +; CHECK-NEXT: ret i1 [[TMP13]] +; +entry: + %0 = and i16 %d, 1 + %xor.i.i = xor i32 %g, 1 + %conv1.i.i = trunc i32 %xor.i.i to i8 + %notsub.i = add i8 %conv1.i.i, -1 + %cmp.i.i = icmp sgt i8 %notsub.i, -3 + %conv3.i.i = zext i1 %cmp.i.i to i32 + %cmp4.i.i = icmp sgt i32 %xor.i.i, %conv3.i.i + %conv1.1.i.i = trunc i32 %g to i8 + %notsub25.i = add i8 %conv1.1.i.i, -1 + %cmp.1.i.i = icmp sgt i8 %notsub25.i, -3 + %conv3.1.i.i = zext i1 %cmp.1.i.i to i32 + %cmp4.1.i.i = icmp sgt i32 %g, %conv3.1.i.i + %notsub26.i = add i8 %conv1.1.i.i, -9 + %cmp.i17.i = icmp sgt i8 %notsub26.i, -3 + %conv3.i18.i = zext i1 %cmp.i17.i to i32 + %cmp4.i19.i = icmp sgt i32 %g, %conv3.i18.i + %notsub27.i = add i8 %conv1.i.i, -9 + %cmp.1.i22.i = icmp sgt i8 %notsub27.i, -3 + %conv3.1.i23.i = zext i1 %cmp.1.i22.i to i32 + %cmp4.1.i24.i = icmp sgt i32 %xor.i.i, %conv3.1.i23.i + %1 = and i1 %cmp4.i19.i, %cmp4.1.i24.i + %2 = and i1 %cmp4.i.i, %1 + %3 = and i1 %cmp4.1.i.i, %2 + ret i1 %3 +} diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 5094871a1d415d..b47c77c5f2ff3f 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -1183,11 +1183,9 @@ void ProfileGeneratorBase::extractProbesFromRange( do { const AddressProbesMap &Address2ProbesMap = Binary->getAddress2ProbesMap(); - auto It = Address2ProbesMap.find(IP.Address); - if (It != Address2ProbesMap.end()) { - for (const MCDecodedPseudoProbe &Probe : It->second) { - ProbeCounter[&Probe] += Count; - } + for (const MCDecodedPseudoProbe &Probe : + Address2ProbesMap.find(IP.Address)) { + ProbeCounter[&Probe] += Count; } } while (IP.advance() && IP.Address <= RangeEnd); } @@ -1293,9 +1291,9 @@ void CSProfileGenerator::populateBodySamplesWithProbes( // and will be inferred by the compiler. for (auto &I : FrameSamples) { for (auto *FunctionProfile : I.second) { - for (auto *Probe : I.first->getProbes()) { - FunctionProfile->addBodySamples(Probe->getIndex(), - Probe->getDiscriminator(), 0); + for (const MCDecodedPseudoProbe &Probe : I.first->getProbes()) { + FunctionProfile->addBodySamples(Probe.getIndex(), + Probe.getDiscriminator(), 0); } } } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index a458ffcb96b41a..e4fc3816cd0c45 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -132,7 +132,7 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway( MCPseudoProbeDecoder &ProbeDecoder) { ProbeFrameStack ProbeContext; for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) - trackInlineesOptimizedAway(ProbeDecoder, *Child.second, ProbeContext); + trackInlineesOptimizedAway(ProbeDecoder, Child, ProbeContext); } void BinarySizeContextTracker::trackInlineesOptimizedAway( @@ -160,9 +160,9 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway( // DFS down the probe inline tree for (const auto &ChildNode : ProbeNode.getChildren()) { - InlineSite Location = ChildNode.first; + InlineSite Location = ChildNode.getInlineSite(); ProbeContext.back().second = std::get<1>(Location); - trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second, ProbeContext); + trackInlineesOptimizedAway(ProbeDecoder, ChildNode, ProbeContext); } ProbeContext.pop_back(); @@ -454,8 +454,8 @@ void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) { // Build TopLevelProbeFrameMap to track size for optimized inlinees when probe // is available if (TrackFuncContextSize) { - for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) { - auto *Frame = Child.second.get(); + for (auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) { + auto *Frame = &Child; StringRef FuncName = ProbeDecoder.getFuncDescForGUID(Frame->Guid)->FuncName; TopLevelProbeFrameMap[FuncName] = Frame; diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index 83edd954080e9f..a7192ac98af41a 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -580,6 +580,52 @@ define void @foo(i8 %v1) { EXPECT_EQ(I0->getNextNode(), Ret); } +TEST_F(SandboxIRTest, FenceInst) { + parseIR(C, R"IR( +define void @foo() { + fence syncscope("singlethread") seq_cst + ret void +} +)IR"); + llvm::Function *LLVMF = &*M->getFunction("foo"); + llvm::BasicBlock *LLVMBB = &*LLVMF->begin(); + auto *LLVMFence = cast(&*LLVMBB->begin()); + sandboxir::Context Ctx(C); + sandboxir::Function *F = Ctx.createFunction(LLVMF); + auto *BB = &*F->begin(); + auto It = BB->begin(); + auto *Fence = cast(&*It++); + auto *Ret = cast(&*It++); + + // Check getOrdering(). + EXPECT_EQ(Fence->getOrdering(), LLVMFence->getOrdering()); + // Check setOrdering(). + auto OrigOrdering = Fence->getOrdering(); + auto NewOrdering = AtomicOrdering::Release; + EXPECT_NE(NewOrdering, OrigOrdering); + Fence->setOrdering(NewOrdering); + EXPECT_EQ(Fence->getOrdering(), NewOrdering); + Fence->setOrdering(OrigOrdering); + EXPECT_EQ(Fence->getOrdering(), OrigOrdering); + // Check getSyncScopeID(). + EXPECT_EQ(Fence->getSyncScopeID(), LLVMFence->getSyncScopeID()); + // Check setSyncScopeID(). + auto OrigSSID = Fence->getSyncScopeID(); + auto NewSSID = SyncScope::System; + EXPECT_NE(NewSSID, OrigSSID); + Fence->setSyncScopeID(NewSSID); + EXPECT_EQ(Fence->getSyncScopeID(), NewSSID); + Fence->setSyncScopeID(OrigSSID); + EXPECT_EQ(Fence->getSyncScopeID(), OrigSSID); + // Check create(). + auto *NewFence = + sandboxir::FenceInst::create(AtomicOrdering::Release, Ret->getIterator(), + BB, Ctx, SyncScope::SingleThread); + EXPECT_EQ(NewFence->getNextNode(), Ret); + EXPECT_EQ(NewFence->getOrdering(), AtomicOrdering::Release); + EXPECT_EQ(NewFence->getSyncScopeID(), SyncScope::SingleThread); +} + TEST_F(SandboxIRTest, SelectInst) { parseIR(C, R"IR( define void @foo(i1 %c0, i8 %v0, i8 %v1, i1 %c1) { diff --git a/llvm/unittests/SandboxIR/TrackerTest.cpp b/llvm/unittests/SandboxIR/TrackerTest.cpp index f0d6a0d57b8c3e..5f04cbd5840ba5 100644 --- a/llvm/unittests/SandboxIR/TrackerTest.cpp +++ b/llvm/unittests/SandboxIR/TrackerTest.cpp @@ -542,6 +542,40 @@ define void @foo(ptr %ptr) { EXPECT_EQ(It, BB->end()); } +TEST_F(TrackerTest, FenceInstSetters) { + parseIR(C, R"IR( +define void @foo() { + fence syncscope("singlethread") seq_cst + ret void +} +)IR"); + llvm::Function *LLVMF = &*M->getFunction("foo"); + sandboxir::Context Ctx(C); + sandboxir::Function *F = Ctx.createFunction(LLVMF); + auto *BB = &*F->begin(); + auto It = BB->begin(); + auto *Fence = cast(&*It++); + + // Check setOrdering(). + auto OrigOrdering = Fence->getOrdering(); + auto NewOrdering = AtomicOrdering::Release; + EXPECT_NE(NewOrdering, OrigOrdering); + Ctx.save(); + Fence->setOrdering(NewOrdering); + EXPECT_EQ(Fence->getOrdering(), NewOrdering); + Ctx.revert(); + EXPECT_EQ(Fence->getOrdering(), OrigOrdering); + // Check setSyncScopeID(). + auto OrigSSID = Fence->getSyncScopeID(); + auto NewSSID = SyncScope::System; + EXPECT_NE(NewSSID, OrigSSID); + Ctx.save(); + Fence->setSyncScopeID(NewSSID); + EXPECT_EQ(Fence->getSyncScopeID(), NewSSID); + Ctx.revert(); + EXPECT_EQ(Fence->getSyncScopeID(), OrigSSID); +} + TEST_F(TrackerTest, CallBaseSetters) { parseIR(C, R"IR( declare void @bar1(i8) diff --git a/llvm/utils/TableGen/CallingConvEmitter.cpp b/llvm/utils/TableGen/CallingConvEmitter.cpp index ec6ef56a66fa0f..6a3030bfc1b7e3 100644 --- a/llvm/utils/TableGen/CallingConvEmitter.cpp +++ b/llvm/utils/TableGen/CallingConvEmitter.cpp @@ -160,7 +160,7 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, ListInit *RegList = Action->getValueAsListInit("RegList"); if (RegList->size() == 1) { std::string Name = getQualifiedName(RegList->getElementAsRecord(0)); - O << IndentStr << "if (unsigned Reg = State.AllocateReg(" << Name + O << IndentStr << "if (MCRegister Reg = State.AllocateReg(" << Name << ")) {\n"; if (SwiftAction) AssignedSwiftRegsMap[CurrentAction].insert(Name); @@ -180,7 +180,7 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, O << LS << Name; } O << "\n" << IndentStr << "};\n"; - O << IndentStr << "if (unsigned Reg = State.AllocateReg(RegList" + O << IndentStr << "if (MCRegister Reg = State.AllocateReg(RegList" << Counter << ")) {\n"; } O << IndentStr << " State.addLoc(CCValAssign::getReg(ValNo, ValVT, " @@ -217,7 +217,7 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, "Invalid length of list of shadowed registers"); if (RegList->size() == 1) { - O << IndentStr << "if (unsigned Reg = State.AllocateReg("; + O << IndentStr << "if (MCRegister Reg = State.AllocateReg("; O << getQualifiedName(RegList->getElementAsRecord(0)); O << ", " << getQualifiedName(ShadowRegList->getElementAsRecord(0)); O << ")) {\n"; @@ -241,7 +241,7 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, O << LSS << getQualifiedName(ShadowRegList->getElementAsRecord(i)); O << "\n" << IndentStr << "};\n"; - O << IndentStr << "if (unsigned Reg = State.AllocateReg(RegList" + O << IndentStr << "if (MCRegister Reg = State.AllocateReg(RegList" << RegListNumber << ", " << "RegList" << ShadowRegListNumber << ")) {\n"; } diff --git a/mlir/include/mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h b/mlir/include/mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h index 78c79c915e0607..28fdc234e5ef07 100644 --- a/mlir/include/mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h +++ b/mlir/include/mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h @@ -9,7 +9,9 @@ #ifndef MLIR_CONVERSION_ARITHTOAMDGPU_ARITHTOAMDGPU_H #define MLIR_CONVERSION_ARITHTOAMDGPU_ARITHTOAMDGPU_H +#include "mlir/Dialect/AMDGPU/Utils/Chipset.h" #include +#include namespace mlir { @@ -26,7 +28,10 @@ namespace arith { /// to the largest value of that type instead of being rewritten to Inf (aka /// NaN). void populateArithToAMDGPUConversionPatterns(RewritePatternSet &patterns, - bool saturateFP8TruncF); + bool convertFP8Arithmetic, + bool saturateFP8Truncf, + bool allowPackedF16Rtz, + amdgpu::Chipset chipset); } // namespace arith } // namespace mlir diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 7bde9e490e4f4e..383e7dca0429c5 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -150,9 +150,15 @@ def ArithToAMDGPUConversionPass : Pass<"convert-arith-to-amdgpu"> { let dependentDialects = ["amdgpu::AMDGPUDialect", "vector::VectorDialect"]; let options = [ + Option<"chipset", "chipset", "std::string", + /*default=*/"\"gfx000\"", + "Chipset that these operations will run on">, Option<"saturateFP8Truncf", "saturate-fp8-truncf", "bool", /*default=*/"false", "Use saturating truncation for 8-bit float types">, + Option<"allowPackedF16Rtz", "allow-packed-f16-round-to-zero", "bool", + /*default=*/"false", + "Whether we should allow f32->f16 packed round-to-zero conversion">, ]; } diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td index 97e0580c898080..e5c1a53f34bf64 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td +++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td @@ -25,6 +25,7 @@ def AMDGPU_Dialect : Dialect { let dependentDialects = [ + "ROCDL::ROCDLDialect", "arith::ArithDialect", "gpu::GPUDialect" ]; diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td index a1e6fc3e299009..e832dfa9d6b80e 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td @@ -166,7 +166,7 @@ def ROCDL_BallotOp : let summary = "Vote across thread group"; let description = [{ - Ballot provides a bit mask containing the 1-bit predicate value from each lane. + Ballot provides a bit mask containing the 1-bit predicate value from each lane. The nth bit of the result contains the 1 bit contributed by the nth warp lane. }]; @@ -579,6 +579,21 @@ def ROCDL_DPPUpdateOp : ROCDL_IntrOp<"update.dpp", [], [0], }]; } +//===---------------------------------------------------------------------===// +// 16-bit float intrinsics +//===---------------------------------------------------------------------===// +def ROCDL_CvtPkRtz: + ROCDL_IntrOp<"cvt.pkrtz", [], [], [Pure], 1>, + Arguments<(ins F32:$srcA, F32:$srcB)> { + let summary = "Convert two f32 input into a vector<2xf16>"; + let description = [{ + Convert two f32 values into a packed vector<2xf16>. + }]; + let assemblyFormat = [{ + attr-dict $srcA `,` $srcB `:` type($res) + }]; +} + //===---------------------------------------------------------------------===// // 8-bit float intrinsics //===---------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h index 08afdf373f014a..0fcaa96ade4031 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h @@ -110,8 +110,12 @@ struct ConvolutionDimensions { FailureOr inferConvolutionDims(LinalgOp linalgOp); /// Checks whether `linalgOp` conforms to ConvolutionOpInterface. +/// By default, we require the `linalgOp` to have non-empty convolved dims +/// (implicitly non-empty `output_image` and `filter_loop`). +/// Users can loosen the constraint by setting `allowEmptyConvolvedDims` to true // TODO: embed within `isa` if possible / natural. -bool isaConvolutionOpInterface(LinalgOp linalgOp); +bool isaConvolutionOpInterface(LinalgOp linalgOp, + bool allowEmptyConvolvedDims = false); /// Checks whether `linalgOp` is semantically equivalent to a `linalg.copyOp`. bool isaCopyOpInterface(LinalgOp linalgOp); @@ -175,9 +179,12 @@ enum class MatchConvolutionResult; /// Checks whether `op` conforms to ConvolutionOpInterface and populates /// `dimensions` with indexes of the different kinds of dimensions when /// present. +/// If `allowEmptyConvolvedDims` is not set, we further checks whether the `op` +/// contains convolved dims. MatchConvolutionResult isConvolutionInterfaceImpl(Operation *op, - ConvolutionDimensions *dimensions = nullptr); + ConvolutionDimensions *dimensions = nullptr, + bool allowEmptyConvolvedDims = false); /// Returns the error message corresponding to the convolution checking return /// code. diff --git a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp index b3798a3f7624b0..d36583c8118ff4 100644 --- a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp +++ b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp @@ -9,8 +9,11 @@ #include "mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h" #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h" +#include "mlir/Dialect/AMDGPU/Utils/Chipset.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/PatternMatch.h" @@ -24,6 +27,7 @@ namespace mlir { } // namespace mlir using namespace mlir; +using namespace mlir::amdgpu; namespace { struct ArithToAMDGPUConversionPass final @@ -43,12 +47,25 @@ struct ExtFOnFloat8RewritePattern final : OpRewritePattern { struct TruncFToFloat8RewritePattern final : OpRewritePattern { bool saturateFP8 = false; - TruncFToFloat8RewritePattern(MLIRContext *ctx, bool saturateFP8) - : OpRewritePattern::OpRewritePattern(ctx), saturateFP8(saturateFP8) {} + TruncFToFloat8RewritePattern(MLIRContext *ctx, bool saturateFP8, + Chipset chipset) + : OpRewritePattern::OpRewritePattern(ctx), saturateFP8(saturateFP8), + chipset(chipset) {} + Chipset chipset; LogicalResult match(arith::TruncFOp op) const override; void rewrite(arith::TruncFOp op, PatternRewriter &rewriter) const override; }; + +struct TruncfToFloat16RewritePattern final + : public OpRewritePattern { + + using OpRewritePattern::OpRewritePattern; + + LogicalResult match(arith::TruncFOp op) const override; + void rewrite(arith::TruncFOp op, PatternRewriter &rewriter) const override; +}; + } // end namespace static Value castF32To(Type elementType, Value f32, Location loc, @@ -272,17 +289,105 @@ void TruncFToFloat8RewritePattern::rewrite(arith::TruncFOp op, rewriter.replaceOp(op, result); } +LogicalResult TruncfToFloat16RewritePattern::match(arith::TruncFOp op) const { + Type outType = op.getOut().getType(); + Type inputType = getElementTypeOrSelf(op.getIn()); + if (auto outVecType = dyn_cast(outType)) { + if (outVecType.isScalable()) + return failure(); + outType = outVecType.getElementType(); + } + return success(outType.isF16() && inputType.isF32()); +} + +void TruncfToFloat16RewritePattern::rewrite(arith::TruncFOp op, + PatternRewriter &rewriter) const { + Location loc = op.getLoc(); + Value in = op.getIn(); + Type outElemType = getElementTypeOrSelf(op.getOut().getType()); + VectorType truncResType = VectorType::get(2, outElemType); + auto inVectorTy = dyn_cast(in.getType()); + + // Handle the case where input type is not a vector type + if (!inVectorTy) { + auto sourceB = rewriter.create(loc, rewriter.getF32Type()); + Value asF16s = + rewriter.create(loc, truncResType, in, sourceB); + Value result = rewriter.create( + loc, asF16s, rewriter.createOrFold(loc, 0)); + return rewriter.replaceOp(op, result); + } + VectorType outType = cast(op.getOut().getType()); + int64_t numElements = outType.getNumElements(); + Value zero = rewriter.createOrFold( + loc, outElemType, rewriter.getFloatAttr(outElemType, 0.0)); + Value result = rewriter.createOrFold(loc, outType, zero); + + if (inVectorTy.getRank() > 1) { + inVectorTy = VectorType::get(SmallVector{numElements}, + inVectorTy.getElementType()); + in = rewriter.create(loc, inVectorTy, in); + } + + // Handle the vector case. We also handle the (uncommon) case where the vector + // length is odd + for (int64_t i = 0; i < numElements; i += 2) { + int64_t elemsThisOp = std::min(numElements, i + 2) - i; + Value thisResult = nullptr; + Value elemA = rewriter.create( + loc, in, rewriter.create(loc, i)); + Value elemB = rewriter.create(loc, rewriter.getF32Type()); + + if (elemsThisOp == 2) { + elemB = rewriter.create( + loc, in, rewriter.createOrFold(loc, i + 1)); + } + + thisResult = + rewriter.create(loc, truncResType, elemA, elemB); + // Place back the truncated result into the possibly larger vector. If we + // are operating on a size 2 vector, these operations should be folded away + thisResult = rewriter.create( + loc, thisResult, 0, elemsThisOp, 1); + result = rewriter.create(loc, thisResult, + result, i, 1); + } + + if (inVectorTy.getRank() != outType.getRank()) { + result = rewriter.create(loc, outType, result); + } + + rewriter.replaceOp(op, result); +} + void mlir::arith::populateArithToAMDGPUConversionPatterns( - RewritePatternSet &patterns, bool saturateFP8TruncF) { - patterns.add(patterns.getContext()); - patterns.add(patterns.getContext(), - saturateFP8TruncF); + RewritePatternSet &patterns, bool convertFP8Arithmetic, + bool saturateFP8Truncf, bool allowPackedF16Rtz, Chipset chipset) { + + if (convertFP8Arithmetic) { + patterns.add(patterns.getContext()); + patterns.add(patterns.getContext(), + saturateFP8Truncf, chipset); + } + if (allowPackedF16Rtz) + patterns.add(patterns.getContext()); } void ArithToAMDGPUConversionPass::runOnOperation() { Operation *op = getOperation(); + MLIRContext *ctx = &getContext(); RewritePatternSet patterns(op->getContext()); - arith::populateArithToAMDGPUConversionPatterns(patterns, saturateFP8Truncf); + FailureOr maybeChipset = amdgpu::Chipset::parse(chipset); + if (failed(maybeChipset)) { + emitError(UnknownLoc::get(ctx), "Invalid chipset name: " + chipset); + return signalPassFailure(); + } + + bool convertFP8Arithmetic = + (*maybeChipset).majorVersion == 9 && (*maybeChipset).minorVersion >= 0x40; + arith::populateArithToAMDGPUConversionPatterns( + patterns, convertFP8Arithmetic, saturateFP8Truncf, allowPackedF16Rtz, + *maybeChipset); if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns)))) return signalPassFailure(); } diff --git a/mlir/lib/Conversion/ArithToAMDGPU/CMakeLists.txt b/mlir/lib/Conversion/ArithToAMDGPU/CMakeLists.txt index e2c951b0b34d8b..50be09ab5a7c5b 100644 --- a/mlir/lib/Conversion/ArithToAMDGPU/CMakeLists.txt +++ b/mlir/lib/Conversion/ArithToAMDGPU/CMakeLists.txt @@ -12,6 +12,7 @@ add_mlir_conversion_library(MLIRArithToAMDGPU LINK_LIBS PUBLIC MLIRAMDGPUDialect + MLIRAMDGPUUtils MLIRArithDialect MLIRArithUtils MLIRVectorDialect diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp index c1a785fb25478d..3943696364950f 100644 --- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp +++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp @@ -14,6 +14,7 @@ #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Diagnostics.h" diff --git a/mlir/lib/Dialect/AMDGPU/IR/CMakeLists.txt b/mlir/lib/Dialect/AMDGPU/IR/CMakeLists.txt index 0551d13b5a0cf0..78d78cf48a747c 100644 --- a/mlir/lib/Dialect/AMDGPU/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/AMDGPU/IR/CMakeLists.txt @@ -11,6 +11,7 @@ add_mlir_dialect_library(MLIRAMDGPUDialect LINK_LIBS PUBLIC MLIRArithDialect + MLIRROCDLDialect # Needed for GPU address space enum definition MLIRGPUDialect MLIRIR diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp index 6ee1810c2ff2b9..d5c21fb5d845e9 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp @@ -762,13 +762,15 @@ enum class MatchConvolutionResult { NotProjectedPermutations, NonConvolutionLoop, OutputDimsNotParallel, - NonOutputDimNotReduction + NonOutputDimNotReduction, + EmptyConvolvedDims }; } // namespace mlir::linalg::detail mlir::linalg::detail::MatchConvolutionResult mlir::linalg::detail::isConvolutionInterfaceImpl( - Operation *op, ConvolutionDimensions *dimensions) { + Operation *op, ConvolutionDimensions *dimensions, + bool allowEmptyConvolvedDims) { auto linalgOp = dyn_cast(op); if (!linalgOp) return MatchConvolutionResult::NotLinalgOp; @@ -886,10 +888,12 @@ mlir::linalg::detail::isConvolutionInterfaceImpl( if (allLoopDims.size() != linalgOp.getNumLoops()) return MatchConvolutionResult::NonConvolutionLoop; + if (!allowEmptyConvolvedDims && inputExprWalker.convolvedDims.empty()) + return MatchConvolutionResult::EmptyConvolvedDims; + if (dimensions) { - FailureOr res = - inferConvolutionDimsImpl(linalgOp, inputExprWalker, - /*allowEmptyConvolvedDims=*/true); + FailureOr res = inferConvolutionDimsImpl( + linalgOp, inputExprWalker, allowEmptyConvolvedDims); assert(succeeded(res) && "unexpected failure to infer convolution dims"); *dimensions = *res; } @@ -914,14 +918,18 @@ mlir::linalg::detail::getMatchConvolutionMessage(MatchConvolutionResult res) { return "expected all iterators used to access outputs to be parallel"; case MatchConvolutionResult::NonOutputDimNotReduction: return "expected all iterators not used to access outputs to be reduction"; + case MatchConvolutionResult::EmptyConvolvedDims: + return "FIXME"; case MatchConvolutionResult::Success: return ""; } llvm_unreachable("unhandled MatchConvolutionResult case"); } -bool mlir::linalg::isaConvolutionOpInterface(LinalgOp linalgOp) { - return linalg::detail::isConvolutionInterfaceImpl(linalgOp.getOperation()) == +bool mlir::linalg::isaConvolutionOpInterface(LinalgOp linalgOp, + bool allowEmptyConvolvedDims) { + return linalg::detail::isConvolutionInterfaceImpl( + linalgOp.getOperation(), nullptr, allowEmptyConvolvedDims) == linalg::detail::MatchConvolutionResult::Success; } diff --git a/mlir/lib/Transforms/Utils/InliningUtils.cpp b/mlir/lib/Transforms/Utils/InliningUtils.cpp index ba146920fae2e9..0db097d14cd3c7 100644 --- a/mlir/lib/Transforms/Utils/InliningUtils.cpp +++ b/mlir/lib/Transforms/Utils/InliningUtils.cpp @@ -25,22 +25,37 @@ using namespace mlir; -/// Remap locations from the inlined blocks with CallSiteLoc locations with the -/// provided caller location. +/// Remap all locations reachable from the inlined blocks with CallSiteLoc +/// locations with the provided caller location. static void remapInlinedLocations(iterator_range inlinedBlocks, Location callerLoc) { - DenseMap mappedLocations; - auto remapOpLoc = [&](Operation *op) { - auto it = mappedLocations.find(op->getLoc()); - if (it == mappedLocations.end()) { - auto newLoc = CallSiteLoc::get(op->getLoc(), callerLoc); - it = mappedLocations.try_emplace(op->getLoc(), newLoc).first; + DenseMap mappedLocations; + auto remapLoc = [&](Location loc) { + auto [it, inserted] = mappedLocations.try_emplace(loc); + // Only query the attribute uniquer once per callsite attribute. + if (inserted) { + auto newLoc = CallSiteLoc::get(loc, callerLoc); + it->getSecond() = newLoc; } - op->setLoc(it->second); + return it->second; }; - for (auto &block : inlinedBlocks) - block.walk(remapOpLoc); + + AttrTypeReplacer attrReplacer; + attrReplacer.addReplacement( + [&](LocationAttr loc) -> std::pair { + return {remapLoc(loc), WalkResult::skip()}; + }); + + for (Block &block : inlinedBlocks) { + for (BlockArgument &arg : block.getArguments()) + if (LocationAttr newLoc = remapLoc(arg.getLoc())) + arg.setLoc(newLoc); + + for (Operation &op : block) + attrReplacer.recursivelyReplaceElementsIn(&op, /*replaceAttrs=*/false, + /*replaceLocs=*/true); + } } static void remapInlinedOperands(iterator_range inlinedBlocks, diff --git a/mlir/test/Conversion/ArithToAMDGPU/16-bit-floats.mlir b/mlir/test/Conversion/ArithToAMDGPU/16-bit-floats.mlir new file mode 100644 index 00000000000000..121cae26748a82 --- /dev/null +++ b/mlir/test/Conversion/ArithToAMDGPU/16-bit-floats.mlir @@ -0,0 +1,51 @@ +// RUN: mlir-opt --split-input-file %s -convert-arith-to-amdgpu="allow-packed-f16-round-to-zero=true" | FileCheck %s + +// CHECK-LABEL: @scalar_trunc +// CHECK-SAME: (%[[value:.*]]: f32) +func.func @scalar_trunc(%v: f32) -> f16{ + // CHECK: %[[poison:.*]] = llvm.mlir.poison : f32 + // CHECK: %[[trunc:.*]] = rocdl.cvt.pkrtz %[[value]], %[[poison]] : vector<2xf16> + // CHECK: %[[extract:.*]] = vector.extractelement %[[trunc]][%c0 : index] : vector<2xf16> + // CHECK: return %[[extract]] : f16 + %w = arith.truncf %v : f32 to f16 + return %w : f16 +} + +// CHECK-LABEL: @vector_trunc +// CHECK-SAME: (%[[value:.*]]: vector<2xf32>) +func.func @vector_trunc_short(%v: vector<2xf32>) -> vector<2xf16> { + // CHECK: %[[elem0:.*]] = vector.extractelement %[[value]] + // CHECK: %[[elem1:.*]] = vector.extractelement %[[value]] + // CHECK: %[[ret:.*]] = rocdl.cvt.pkrtz %[[elem0]], %[[elem1]] : vector<2xf16> + // CHECK: return %[[ret]] + %w = arith.truncf %v : vector<2xf32> to vector<2xf16> + return %w : vector<2xf16> +} + +// CHECK-LABEL: @vector_trunc_long +// CHECK-SAME: (%[[value:.*]]: vector<9xf32>) +func.func @vector_trunc_long(%v: vector<9xf32>) -> vector<9xf16> { + // CHECK: %[[elem0:.*]] = vector.extractelement %[[value]][%c0 : index] + // CHECK: %[[elem1:.*]] = vector.extractelement %[[value]][%c1 : index] + // CHECK: %[[packed0:.*]] = rocdl.cvt.pkrtz %[[elem0]], %[[elem1]] : vector<2xf16> + // CHECK: %[[out0:.*]] = vector.insert_strided_slice %[[packed0]], {{.*}} {offsets = [0], strides = [1]} : vector<2xf16> into vector<9xf16> + // CHECK: %[[elem2:.*]] = vector.extractelement %[[value]][%c2 : index] + // CHECK: %[[elem3:.*]] = vector.extractelement %[[value]][%c3 : index] + // CHECK: %[[packed1:.*]] = rocdl.cvt.pkrtz %[[elem2]], %[[elem3]] : vector<2xf16> + // CHECK: %[[out1:.*]] = vector.insert_strided_slice %[[packed1]], %[[out0]] {offsets = [2], strides = [1]} : vector<2xf16> into vector<9xf16> + // CHECK: %[[elem4:.*]] = vector.extractelement %[[value]][%c4 : index] + // CHECK: %[[elem5:.*]] = vector.extractelement %[[value]][%c5 : index] + // CHECK: %[[packed2:.*]] = rocdl.cvt.pkrtz %[[elem4]], %[[elem5]] : vector<2xf16> + // CHECK: %[[out2:.*]] = vector.insert_strided_slice %[[packed2]], %[[out1]] {offsets = [4], strides = [1]} : vector<2xf16> into vector<9xf16> + // CHECK: %[[elem6:.*]] = vector.extractelement %[[value]] + // CHECK: %[[elem7:.*]] = vector.extractelement %[[value]] + // CHECK: %[[packed3:.*]] = rocdl.cvt.pkrtz %[[elem6]], %[[elem7]] : vector<2xf16> + // CHECK: %[[out3:.*]] = vector.insert_strided_slice %[[packed3]], %[[out2]] {offsets = [6], strides = [1]} : vector<2xf16> into vector<9xf16> + // CHECK: %[[elem8:.*]] = vector.extractelement %[[value]] + // CHECK: %[[packed4:.*]] = rocdl.cvt.pkrtz %[[elem8:.*]] : vector<2xf16> + // CHECK: %[[slice:.*]] = vector.extract_strided_slice %[[packed4]] {offsets = [0], sizes = [1], strides = [1]} : vector<2xf16> to vector<1xf16> + // CHECK: %[[out4:.*]] = vector.insert_strided_slice %[[slice]], %[[out3]] {offsets = [8], strides = [1]} : vector<1xf16> into vector<9xf16> + // CHECK: return %[[out4]] + %w = arith.truncf %v : vector<9xf32> to vector<9xf16> + return %w : vector<9xf16> +} diff --git a/mlir/test/Conversion/ArithToAMDGPU/8-bit-float-saturation.mlir b/mlir/test/Conversion/ArithToAMDGPU/8-bit-float-saturation.mlir index c7f39440a349b0..cd921da2294e13 100644 --- a/mlir/test/Conversion/ArithToAMDGPU/8-bit-float-saturation.mlir +++ b/mlir/test/Conversion/ArithToAMDGPU/8-bit-float-saturation.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt --split-input-file %s \ -// RUN: --pass-pipeline='builtin.module(func.func(convert-arith-to-amdgpu{saturate-fp8-truncf=true}))' \ +// RUN: --pass-pipeline='builtin.module(func.func(convert-arith-to-amdgpu{chipset=gfx940 saturate-fp8-truncf=true}))' \ // RUN: | FileCheck %s // CHECK-LABEL: func.func @scalar_trunc diff --git a/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir b/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir index 26a222a4a788e5..bd90facb615440 100644 --- a/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir +++ b/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --split-input-file %s -convert-arith-to-amdgpu | FileCheck %s +// RUN: mlir-opt --split-input-file %s -convert-arith-to-amdgpu="chipset=gfx940" | FileCheck %s // CHECK-LABEL: func.func @scalar_ext // CHECK-SAME: ([[V:%.+]]: f8E5M2FNUZ) diff --git a/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir index b06ad96f4592c5..02ce6b5b19ceaf 100644 --- a/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir +++ b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir @@ -156,3 +156,49 @@ llvm.func @foo() // CHECK: %[[STR_LEN:.*]] = extractvalue { ptr, i64 } %{{.*}}, 1 // CHECK: %{{.*}} = alloca i8, i64 %[[STR_LEN]], align 1 // CHECK: call void @foo() + +// ----- + +// Verifies fix for https://github.com/llvm/llvm-project/issues/102939. +// +// The issues occurs because the CodeExtractor component only collect inputs +// (to the parallel regions) that are defined in the same function in which the +// parallel regions is present. Howerver, this is problematic because if we are +// privatizing a global value (e.g. a `target` variable which is emitted as a +// global), then we miss finding that input and we do not privatize the +// variable. + +omp.private {type = firstprivate} @global_privatizer : !llvm.ptr alloc { +^bb0(%arg0: !llvm.ptr): + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x f32 {bindc_name = "global", pinned} : (i64) -> !llvm.ptr + omp.yield(%1 : !llvm.ptr) +} copy { +^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): + %0 = llvm.load %arg0 : !llvm.ptr -> f32 + llvm.store %0, %arg1 : f32, !llvm.ptr + omp.yield(%arg1 : !llvm.ptr) +} + +llvm.func @global_accessor() { + %global_addr = llvm.mlir.addressof @global : !llvm.ptr + omp.parallel private(@global_privatizer %global_addr -> %arg0 : !llvm.ptr) { + %1 = llvm.mlir.constant(3.140000e+00 : f32) : f32 + llvm.store %1, %arg0 : f32, !llvm.ptr + omp.terminator + } + llvm.return +} + +llvm.mlir.global internal @global() {addr_space = 0 : i32} : f32 { + %0 = llvm.mlir.zero : f32 + llvm.return %0 : f32 +} + +// CHECK-LABEL: @global_accessor..omp_par({{.*}}) +// CHECK-NEXT: omp.par.entry: +// Verify that we found the privatizer by checking that we properly inlined the +// bodies of the alloc and copy regions. +// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, i64 1, align 4 +// CHECK: %[[GLOB_VAL:.*]] = load float, ptr @global, align 4 +// CHECK: store float %[[GLOB_VAL]], ptr %[[PRIV_ALLOC]], align 4 diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir index 64bcb5bdb255db..d902a82eeb9ea2 100644 --- a/mlir/test/Target/LLVMIR/rocdl.mlir +++ b/mlir/test/Target/LLVMIR/rocdl.mlir @@ -530,6 +530,12 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 { llvm.return %source5 : i32 } +llvm.func @rocdl_16bit_packed_floats(%sourceA: f32, %sourceB: f32) -> vector<2xf16> { + // CHECK: call <2 x half> @llvm.amdgcn.cvt.pkrtz(float {{.*}}, float {{.*}}) + %source = rocdl.cvt.pkrtz %sourceA, %sourceB : vector<2xf16> + llvm.return %source : vector<2xf16> +} + // CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "uniform-work-group-size"="true" } // CHECK-DAG: attributes #[[$KERNEL_WORKGROUP_ATTRS]] = { "amdgpu-flat-work-group-size"="1,1024" // CHECK-DAG: attributes #[[$KNOWN_BLOCK_SIZE_ATTRS]] = { "amdgpu-flat-work-group-size"="128,128" diff --git a/mlir/test/Transforms/inlining.mlir b/mlir/test/Transforms/inlining.mlir index 2a08e625ba79e2..79a2936b104fa1 100644 --- a/mlir/test/Transforms/inlining.mlir +++ b/mlir/test/Transforms/inlining.mlir @@ -215,9 +215,9 @@ func.func @func_with_block_args_location(%arg0 : i32) { // INLINE-LOC-LABEL: func @func_with_block_args_location_callee1 // INLINE-LOC: cf.br -// INLINE-LOC: ^bb{{[0-9]+}}(%{{.*}}: i32 loc("foo") +// INLINE-LOC: ^bb{{[0-9]+}}(%{{.*}}: i32 loc(callsite("foo" at "bar")) func.func @func_with_block_args_location_callee1(%arg0 : i32) { - call @func_with_block_args_location(%arg0) : (i32) -> () + call @func_with_block_args_location(%arg0) : (i32) -> () loc("bar") return } diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index ddb08f12f04976..866bd5ed6fd3e6 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1534,6 +1534,7 @@ cc_library( ":BytecodeOpInterface", ":GPUDialect", ":IR", + ":ROCDLDialect", ":SideEffectInterfaces", "//llvm:Support", ], @@ -8581,11 +8582,14 @@ cc_library( includes = ["include"], deps = [ ":AMDGPUDialect", + ":AMDGPUUtils", ":ArithDialect", ":ArithUtils", ":ConversionPassIncGen", ":IR", + ":LLVMDialect", ":Pass", + ":ROCDLDialect", ":Support", ":TransformUtils", ":VectorDialect",