From bb8df02dfbb40c5a6717ad3b7e8e5811acc6d164 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 8 Oct 2024 19:21:58 -0700 Subject: [PATCH 01/55] [RISCV] Use the MCStreamer reference passed to RISCVAsmPrinter::EmitToStreamer. NFCI (#111607) We passed a MCStreamer to the function but hardcoded *OutStreamer instead of using it. It's very likely that OutStreamer is the only streamer used, but lets not assume that without doing the audit. --- llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 52d0a70d335e97..3bed8c4349dac0 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -247,7 +247,7 @@ bool RISCVAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) { bool Res = RISCVRVC::compress(CInst, Inst, *STI); if (Res) ++RISCVNumInstrsCompressed; - AsmPrinter::EmitToStreamer(*OutStreamer, Res ? CInst : Inst); + AsmPrinter::EmitToStreamer(S, Res ? CInst : Inst); return Res; } From 267e852109381fe35cff0a92915a0418b872213f Mon Sep 17 00:00:00 2001 From: Vasileios Porpodas Date: Tue, 8 Oct 2024 20:01:43 -0700 Subject: [PATCH 02/55] [SandboxVec][DAG][NFC] Rename enumerators --- .../SandboxVectorizer/DependencyGraph.h | 14 ++++---- .../SandboxVectorizer/DependencyGraph.cpp | 36 +++++++++---------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h index ab49c3aa27143c..b1fe67d446be0a 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h @@ -171,13 +171,13 @@ class DependencyGraph { std::unique_ptr BatchAA; enum class DependencyType { - RAW, ///> Read After Write - WAW, ///> Write After Write - RAR, ///> Read After Read - WAR, ///> Write After Read - CTRL, ///> Control-related dependencies, like with PHIs/Terminators - OTHER, ///> Currently used for stack related instrs - NONE, ///> No memory/other dependency + ReadAfterWrite, ///> Memory dependency write -> read + WriteAfterWrite, ///> Memory dependency write -> write + ReadAfterRead, ///> Memory dependency read -> read + WriteAfterRead, ///> Memory dependency read -> write + Control, ///> Control-related dependency, like with PHI/Terminator + Other, ///> Currently used for stack related instrs + None, ///> No memory/other dependency }; /// \Returns the dependency type depending on whether instructions may /// read/write memory or whether they are some specific opcode-related diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp index 845fadefc9bf03..35ea28697424a6 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp @@ -56,23 +56,23 @@ DependencyGraph::getRoughDepType(Instruction *FromI, Instruction *ToI) { // TODO: Perhaps compile-time improvement by skipping if neither is mem? if (FromI->mayWriteToMemory()) { if (ToI->mayReadFromMemory()) - return DependencyType::RAW; + return DependencyType::ReadAfterWrite; if (ToI->mayWriteToMemory()) - return DependencyType::WAW; + return DependencyType::WriteAfterWrite; } else if (FromI->mayReadFromMemory()) { if (ToI->mayWriteToMemory()) - return DependencyType::WAR; + return DependencyType::WriteAfterRead; if (ToI->mayReadFromMemory()) - return DependencyType::RAR; + return DependencyType::ReadAfterRead; } if (isa(FromI) || isa(ToI)) - return DependencyType::CTRL; + return DependencyType::Control; if (ToI->isTerminator()) - return DependencyType::CTRL; + return DependencyType::Control; if (DGNode::isStackSaveOrRestoreIntrinsic(FromI) || DGNode::isStackSaveOrRestoreIntrinsic(ToI)) - return DependencyType::OTHER; - return DependencyType::NONE; + return DependencyType::Other; + return DependencyType::None; } static bool isOrdered(Instruction *I) { @@ -106,10 +106,10 @@ bool DependencyGraph::alias(Instruction *SrcI, Instruction *DstI, ? ModRefInfo::Mod : Utils::aliasAnalysisGetModRefInfo(*BatchAA, SrcI, *DstLocOpt); switch (DepType) { - case DependencyType::RAW: - case DependencyType::WAW: + case DependencyType::ReadAfterWrite: + case DependencyType::WriteAfterWrite: return isModSet(SrcModRef); - case DependencyType::WAR: + case DependencyType::WriteAfterRead: return isRefSet(SrcModRef); default: llvm_unreachable("Expected only RAW, WAW and WAR!"); @@ -119,21 +119,21 @@ bool DependencyGraph::alias(Instruction *SrcI, Instruction *DstI, bool DependencyGraph::hasDep(Instruction *SrcI, Instruction *DstI) { DependencyType RoughDepType = getRoughDepType(SrcI, DstI); switch (RoughDepType) { - case DependencyType::RAR: + case DependencyType::ReadAfterRead: return false; - case DependencyType::RAW: - case DependencyType::WAW: - case DependencyType::WAR: + case DependencyType::ReadAfterWrite: + case DependencyType::WriteAfterWrite: + case DependencyType::WriteAfterRead: return alias(SrcI, DstI, RoughDepType); - case DependencyType::CTRL: + case DependencyType::Control: // Adding actual dep edges from PHIs/to terminator would just create too // many edges, which would be bad for compile-time. // So we ignore them in the DAG formation but handle them in the // scheduler, while sorting the ready list. return false; - case DependencyType::OTHER: + case DependencyType::Other: return true; - case DependencyType::NONE: + case DependencyType::None: return false; } } From 1e81056b31749f7b60d56260089f75a4813749c0 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 8 Oct 2024 20:37:11 -0700 Subject: [PATCH 03/55] [Coroutines] Avoid repeated hash lookups (NFC) (#111617) --- llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 91530503a7e1ed..2b43b7a5d027d1 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -797,8 +797,8 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape, AlignInBits = OffsetCache[Index].first * 8; OffsetInBits = OffsetCache[Index].second * 8; - if (NameCache.contains(Index)) { - Name = NameCache[Index].str(); + if (auto It = NameCache.find(Index); It != NameCache.end()) { + Name = It->second.str(); DITy = TyCache[Index]; } else { DITy = solveDIType(DBuilder, Ty, Layout, FrameDITy, LineNum, DITypeCache); From 2d8cd32ae5a69a9f3baaeca18a8318115586b3b8 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 8 Oct 2024 20:37:33 -0700 Subject: [PATCH 04/55] [InstCombine] Avoid repeated hash lookups (NFC) (#111618) --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 6c3fc987d9add2..d1eb84b5ca5c10 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -602,8 +602,9 @@ static Value *rewriteGEPAsOffset(Value *Start, Value *Base, GEPNoWrapFlags NW, for (unsigned I = 0, E = PHI->getNumIncomingValues(); I < E; ++I) { Value *NewIncoming = PHI->getIncomingValue(I); - if (NewInsts.contains(NewIncoming)) - NewIncoming = NewInsts[NewIncoming]; + auto It = NewInsts.find(NewIncoming); + if (It != NewInsts.end()) + NewIncoming = It->second; NewPhi->addIncoming(NewIncoming, PHI->getIncomingBlock(I)); } From 0ee5c869fc003b09e464e079d6bbaab1baf59aba Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 8 Oct 2024 20:38:19 -0700 Subject: [PATCH 05/55] [mlir][spirv] Avoid repeated hash lookups (NFC) (#111619) --- mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp index c8386fecea038a..dd0a872e05dcbb 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp @@ -1538,11 +1538,8 @@ LogicalResult spirv::ModuleOp::verifyRegions() { auto key = std::pair( funcOp, entryPointOp.getExecutionModel()); - auto entryPtIt = entryPoints.find(key); - if (entryPtIt != entryPoints.end()) { + if (!entryPoints.try_emplace(key, entryPointOp).second) return entryPointOp.emitError("duplicate of a previous EntryPointOp"); - } - entryPoints[key] = entryPointOp; } else if (auto funcOp = dyn_cast(op)) { // If the function is external and does not have 'Import' // linkage_attributes(LinkageAttributes), throw an error. 'Import' From a579782a775ebc2bfe6203d7178ee524b3559006 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Wed, 9 Oct 2024 05:44:19 +0200 Subject: [PATCH 06/55] [llvm] Add serialization to uint32_t for FixedPointSemantics (#110288) FixedPointSemantics is exactly 32bits and this static_assert'ed after its declaration. Add support for converting it to and from a uint32_t. --- llvm/include/llvm/ADT/APFixedPoint.h | 9 +++++++ llvm/lib/Support/APFixedPoint.cpp | 10 ++++++++ llvm/unittests/ADT/APFixedPointTest.cpp | 31 +++++++++++++++++++++++++ 3 files changed, 50 insertions(+) diff --git a/llvm/include/llvm/ADT/APFixedPoint.h b/llvm/include/llvm/ADT/APFixedPoint.h index ae40db96e4818c..e4aa82d7a41c31 100644 --- a/llvm/include/llvm/ADT/APFixedPoint.h +++ b/llvm/include/llvm/ADT/APFixedPoint.h @@ -114,6 +114,15 @@ class FixedPointSemantics { } bool operator!=(FixedPointSemantics Other) const { return !(*this == Other); } + /// Convert the semantics to a 32-bit unsigned integer. + /// The result is dependent on the host endianness and not stable across LLVM + /// versions. See getFromOpaqueInt() to convert it back to a + /// FixedPointSemantics object. + uint32_t toOpaqueInt() const; + /// Create a FixedPointSemantics object from an integer created via + /// toOpaqueInt(). + static FixedPointSemantics getFromOpaqueInt(uint32_t); + private: unsigned Width : WidthBitWidth; signed int LsbWeight : LsbWeightBitWidth; diff --git a/llvm/lib/Support/APFixedPoint.cpp b/llvm/lib/Support/APFixedPoint.cpp index 249c4f1e2153da..f395919287b729 100644 --- a/llvm/lib/Support/APFixedPoint.cpp +++ b/llvm/lib/Support/APFixedPoint.cpp @@ -29,6 +29,16 @@ void FixedPointSemantics::print(llvm::raw_ostream &OS) const { OS << "IsSaturated=" << IsSaturated; } +uint32_t FixedPointSemantics::toOpaqueInt() const { + return llvm::bit_cast(*this); +} + +FixedPointSemantics FixedPointSemantics::getFromOpaqueInt(uint32_t I) { + FixedPointSemantics F(0, 0, false, false, false); + std::memcpy(&F, &I, sizeof(F)); + return F; +} + APFixedPoint APFixedPoint::convert(const FixedPointSemantics &DstSema, bool *Overflow) const { APSInt NewVal = Val; diff --git a/llvm/unittests/ADT/APFixedPointTest.cpp b/llvm/unittests/ADT/APFixedPointTest.cpp index ecb89fbf76c8bb..e7aa58a8325773 100644 --- a/llvm/unittests/ADT/APFixedPointTest.cpp +++ b/llvm/unittests/ADT/APFixedPointTest.cpp @@ -1274,4 +1274,35 @@ TEST(FixedPoint, div) { true, false, false))); } +TEST(FixedPoint, semanticsSerialization) { + auto roundTrip = [](FixedPointSemantics FPS) -> bool { + uint32_t I = FPS.toOpaqueInt(); + FixedPointSemantics FPS2 = FixedPointSemantics::getFromOpaqueInt(I); + return FPS == FPS2; + }; + + ASSERT_TRUE(roundTrip(getS32Pos2())); + ASSERT_TRUE(roundTrip(getU8Pos4())); + ASSERT_TRUE(roundTrip(getS16Neg18())); + ASSERT_TRUE(roundTrip(getU8Neg10())); + ASSERT_TRUE(roundTrip(getPadULFractSema())); + ASSERT_TRUE(roundTrip(getPadUFractSema())); + ASSERT_TRUE(roundTrip(getPadUSFractSema())); + ASSERT_TRUE(roundTrip(getPadULAccumSema())); + ASSERT_TRUE(roundTrip(getPadUAccumSema())); + ASSERT_TRUE(roundTrip(getPadUSAccumSema())); + ASSERT_TRUE(roundTrip(getULFractSema())); + ASSERT_TRUE(roundTrip(getUFractSema())); + ASSERT_TRUE(roundTrip(getUSFractSema())); + ASSERT_TRUE(roundTrip(getULAccumSema())); + ASSERT_TRUE(roundTrip(getUAccumSema())); + ASSERT_TRUE(roundTrip(getUSAccumSema())); + ASSERT_TRUE(roundTrip(getLFractSema())); + ASSERT_TRUE(roundTrip(getFractSema())); + ASSERT_TRUE(roundTrip(getSFractSema())); + ASSERT_TRUE(roundTrip(getLAccumSema())); + ASSERT_TRUE(roundTrip(getAccumSema())); + ASSERT_TRUE(roundTrip(getSAccumSema())); +} + } // namespace From 1809d0fa1c15b16ca94381d8be3ef70c4a83c36b Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Tue, 8 Oct 2024 21:24:17 -0700 Subject: [PATCH 07/55] [clang-format] Insert a space between l_paren and ref-qualifier (#111465) Fixes #111346. --- clang/lib/Format/TokenAnnotator.cpp | 4 ++++ clang/unittests/Format/FormatTest.cpp | 1 + 2 files changed, 5 insertions(+) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index f6e5798057bbd2..364d7e9855e8cf 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -5454,6 +5454,10 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, } if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser))) return ShouldAddSpacesInAngles(); + if (Left.is(tok::r_paren) && Right.is(TT_PointerOrReference) && + Right.isOneOf(tok::amp, tok::ampamp)) { + return true; + } // Space before TT_StructuredBindingLSquare. if (Right.is(TT_StructuredBindingLSquare)) { return !Left.isOneOf(tok::amp, tok::ampamp) || diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 61287aafe8273d..2c5e5857445c35 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -11519,6 +11519,7 @@ TEST_F(FormatTest, UnderstandsFunctionRefQualification) { AlignLeft); verifyFormat("template void operator=(T) & {}", AlignLeft); verifyFormat("template void operator=(T) && {}", AlignLeft); + verifyFormat("for (foo& cb : X)", AlignLeft); FormatStyle AlignMiddle = getLLVMStyle(); AlignMiddle.PointerAlignment = FormatStyle::PAS_Middle; From d0b9c2c5647656738cda3fb670aa5d3b3a69d784 Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Wed, 9 Oct 2024 05:47:00 +0100 Subject: [PATCH 08/55] [compiler-rt] Remove SHA2 interceptions for NetBSD/FreeBSD. (#110246) To Fix #110215 Interceptors introduced with 18a7ebda99044473fdbce6376993714ff54e6690 --- .../sanitizer_common_interceptors.inc | 180 --------------- .../sanitizer_platform_interceptors.h | 2 - .../TestCases/FreeBSD/md5.cpp | 119 ---------- .../TestCases/FreeBSD/sha2.cpp | 214 ------------------ .../sanitizer_common/TestCases/NetBSD/md5.cpp | 114 ---------- .../TestCases/NetBSD/sha2.cpp | 206 ----------------- 6 files changed, 835 deletions(-) delete mode 100644 compiler-rt/test/sanitizer_common/TestCases/FreeBSD/md5.cpp delete mode 100644 compiler-rt/test/sanitizer_common/TestCases/FreeBSD/sha2.cpp delete mode 100644 compiler-rt/test/sanitizer_common/TestCases/NetBSD/md5.cpp delete mode 100644 compiler-rt/test/sanitizer_common/TestCases/NetBSD/sha2.cpp diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index a6dd2bbf45f520..b8627f8557afe2 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -8877,83 +8877,6 @@ INTERCEPTOR(char *, RMD160Data, u8 *data, SIZE_T len, char *buf) { #define INIT_RMD160 #endif -#if SANITIZER_INTERCEPT_MD5 -INTERCEPTOR(void, MD5Init, void *context) { - void *ctx; - COMMON_INTERCEPTOR_ENTER(ctx, MD5Init, context); - REAL(MD5Init)(context); - if (context) - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD5_CTX_sz); -} - -INTERCEPTOR(void, MD5Update, void *context, const unsigned char *data, - unsigned int len) { - void *ctx; - COMMON_INTERCEPTOR_ENTER(ctx, MD5Update, context, data, len); - if (data && len > 0) - COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len); - if (context) - COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz); - REAL(MD5Update)(context, data, len); - if (context) - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD5_CTX_sz); -} - -INTERCEPTOR(void, MD5Final, unsigned char digest[16], void *context) { - void *ctx; - COMMON_INTERCEPTOR_ENTER(ctx, MD5Final, digest, context); - if (context) - COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz); - REAL(MD5Final)(digest, context); - if (digest) - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, sizeof(unsigned char) * 16); -} - -INTERCEPTOR(char *, MD5End, void *context, char *buf) { - void *ctx; - COMMON_INTERCEPTOR_ENTER(ctx, MD5End, context, buf); - if (context) - COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz); - char *ret = REAL(MD5End)(context, buf); - if (ret) - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length); - return ret; -} - -INTERCEPTOR(char *, MD5File, const char *filename, char *buf) { - void *ctx; - COMMON_INTERCEPTOR_ENTER(ctx, MD5File, filename, buf); - if (filename) - COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1); - char *ret = REAL(MD5File)(filename, buf); - if (ret) - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length); - return ret; -} - -INTERCEPTOR(char *, MD5Data, const unsigned char *data, unsigned int len, - char *buf) { - void *ctx; - COMMON_INTERCEPTOR_ENTER(ctx, MD5Data, data, len, buf); - if (data && len > 0) - COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len); - char *ret = REAL(MD5Data)(data, len, buf); - if (ret) - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length); - return ret; -} - -#define INIT_MD5 \ - COMMON_INTERCEPT_FUNCTION(MD5Init); \ - COMMON_INTERCEPT_FUNCTION(MD5Update); \ - COMMON_INTERCEPT_FUNCTION(MD5Final); \ - COMMON_INTERCEPT_FUNCTION(MD5End); \ - COMMON_INTERCEPT_FUNCTION(MD5File); \ - COMMON_INTERCEPT_FUNCTION(MD5Data) -#else -#define INIT_MD5 -#endif - #if SANITIZER_INTERCEPT_FSEEK INTERCEPTOR(int, fseek, __sanitizer_FILE *stream, long int offset, int whence) { void *ctx; @@ -9084,107 +9007,6 @@ INTERCEPTOR(char *, MD2Data, const unsigned char *data, unsigned int len, #define INIT_MD2 #endif -#if SANITIZER_INTERCEPT_SHA2 -#define SHA2_INTERCEPTORS(LEN, SHA2_STATE_T) \ - INTERCEPTOR(void, SHA##LEN##_Init, void *context) { \ - void *ctx; \ - COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Init, context); \ - REAL(SHA##LEN##_Init)(context); \ - if (context) \ - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, SHA##LEN##_CTX_sz); \ - } \ - INTERCEPTOR(void, SHA##LEN##_Update, void *context, \ - const u8 *data, SIZE_T len) { \ - void *ctx; \ - COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Update, context, data, len); \ - if (data && len > 0) \ - COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len); \ - if (context) \ - COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA##LEN##_CTX_sz); \ - REAL(SHA##LEN##_Update)(context, data, len); \ - if (context) \ - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, SHA##LEN##_CTX_sz); \ - } \ - INTERCEPTOR(void, SHA##LEN##_Final, u8 digest[LEN/8], \ - void *context) { \ - void *ctx; \ - CHECK_EQ(SHA##LEN##_digest_length, LEN/8); \ - COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Final, digest, context); \ - if (context) \ - COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA##LEN##_CTX_sz); \ - REAL(SHA##LEN##_Final)(digest, context); \ - if (digest) \ - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, \ - sizeof(digest[0]) * \ - SHA##LEN##_digest_length); \ - } \ - INTERCEPTOR(char *, SHA##LEN##_End, void *context, char *buf) { \ - void *ctx; \ - COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_End, context, buf); \ - if (context) \ - COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA##LEN##_CTX_sz); \ - char *ret = REAL(SHA##LEN##_End)(context, buf); \ - if (ret) \ - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \ - return ret; \ - } \ - INTERCEPTOR(char *, SHA##LEN##_File, const char *filename, char *buf) { \ - void *ctx; \ - COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_File, filename, buf); \ - if (filename) \ - COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);\ - char *ret = REAL(SHA##LEN##_File)(filename, buf); \ - if (ret) \ - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \ - return ret; \ - } \ - INTERCEPTOR(char *, SHA##LEN##_FileChunk, const char *filename, char *buf, \ - OFF_T offset, OFF_T length) { \ - void *ctx; \ - COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_FileChunk, filename, buf, offset, \ - length); \ - if (filename) \ - COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);\ - char *ret = REAL(SHA##LEN##_FileChunk)(filename, buf, offset, length); \ - if (ret) \ - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \ - return ret; \ - } \ - INTERCEPTOR(char *, SHA##LEN##_Data, u8 *data, SIZE_T len, char *buf) { \ - void *ctx; \ - COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Data, data, len, buf); \ - if (data && len > 0) \ - COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len); \ - char *ret = REAL(SHA##LEN##_Data)(data, len, buf); \ - if (ret) \ - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \ - return ret; \ - } - -SHA2_INTERCEPTORS(224, u32) -SHA2_INTERCEPTORS(256, u32) -SHA2_INTERCEPTORS(384, u64) -SHA2_INTERCEPTORS(512, u64) - -#define INIT_SHA2_INTECEPTORS(LEN) \ - COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Init); \ - COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Update); \ - COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Final); \ - COMMON_INTERCEPT_FUNCTION(SHA##LEN##_End); \ - COMMON_INTERCEPT_FUNCTION(SHA##LEN##_File); \ - COMMON_INTERCEPT_FUNCTION(SHA##LEN##_FileChunk); \ - COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Data) - -#define INIT_SHA2 \ - INIT_SHA2_INTECEPTORS(224); \ - INIT_SHA2_INTECEPTORS(256); \ - INIT_SHA2_INTECEPTORS(384); \ - INIT_SHA2_INTECEPTORS(512) -#undef SHA2_INTERCEPTORS -#else -#define INIT_SHA2 -#endif - #if SANITIZER_INTERCEPT_VIS INTERCEPTOR(char *, vis, char *dst, int c, int flag, int nextc) { void *ctx; @@ -10659,10 +10481,8 @@ static void InitializeCommonInterceptors() { INIT_SHA1; INIT_MD4; INIT_RMD160; - INIT_MD5; INIT_FSEEK; INIT_MD2; - INIT_SHA2; INIT_VIS; INIT_CDB; INIT_GETFSENT; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 28bb6384daf2cd..6959a6d52d604e 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -565,10 +565,8 @@ #define SANITIZER_INTERCEPT_SHA1 SI_NETBSD #define SANITIZER_INTERCEPT_MD4 SI_NETBSD #define SANITIZER_INTERCEPT_RMD160 SI_NETBSD -#define SANITIZER_INTERCEPT_MD5 (SI_NETBSD || SI_FREEBSD) #define SANITIZER_INTERCEPT_FSEEK (SI_NETBSD || SI_FREEBSD) #define SANITIZER_INTERCEPT_MD2 SI_NETBSD -#define SANITIZER_INTERCEPT_SHA2 (SI_NETBSD || SI_FREEBSD) #define SANITIZER_INTERCEPT_CDB SI_NETBSD #define SANITIZER_INTERCEPT_VIS (SI_NETBSD || SI_FREEBSD) #define SANITIZER_INTERCEPT_POPEN SI_POSIX diff --git a/compiler-rt/test/sanitizer_common/TestCases/FreeBSD/md5.cpp b/compiler-rt/test/sanitizer_common/TestCases/FreeBSD/md5.cpp deleted file mode 100644 index 13325880a023a1..00000000000000 --- a/compiler-rt/test/sanitizer_common/TestCases/FreeBSD/md5.cpp +++ /dev/null @@ -1,119 +0,0 @@ -// RUN: %clangxx -O0 -g %s -o %t -lmd && %run %t 2>&1 | FileCheck %s - -#include - -#include -#include -#include -#include -#include - -void test1() { - MD5_CTX ctx; - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - uint8_t digest[MD5_DIGEST_LENGTH]; - size_t entropysz = sizeof(entropy); - size_t digestsz = sizeof(digest); - - MD5Init(&ctx); - MD5Update(&ctx, entropy, entropysz); - MD5Final(digest, &ctx); - - printf("test1: '"); - for (size_t i = 0; i < digestsz; i++) - printf("%02x", digest[i]); - printf("'\n"); -} - -void test2() { - MD5_CTX ctx; - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - char digest[MD5_DIGEST_STRING_LENGTH]; - size_t entropysz = sizeof(entropy); - - MD5Init(&ctx); - MD5Update(&ctx, entropy, entropysz); - char *p = MD5End(&ctx, digest); - assert(p); - - printf("test2: '%s'\n", digest); -} - -void test3() { - MD5_CTX ctx; - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - size_t entropysz = sizeof(entropy); - - MD5Init(&ctx); - MD5Update(&ctx, entropy, entropysz); - char *p = MD5End(&ctx, NULL); - assert(strlen(p) == MD5_DIGEST_STRING_LENGTH - 1); - - printf("test3: '%s'\n", p); - - free(p); -} - -void test4() { - char digest[MD5_DIGEST_STRING_LENGTH]; - - char *p = MD5File("/etc/fstab", digest); - assert(p == digest); - - printf("test4: '%s'\n", p); -} - -void test5() { - char *p = MD5File("/etc/fstab", NULL); - assert(strlen(p) == MD5_DIGEST_STRING_LENGTH - 1); - - printf("test5: '%s'\n", p); - - free(p); -} - -void test6() { - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - char digest[MD5_DIGEST_STRING_LENGTH]; - size_t entropysz = sizeof(entropy); - - char *p = MD5Data(entropy, entropysz, digest); - assert(p == digest); - - printf("test6: '%s'\n", p); -} - -void test7() { - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - size_t entropysz = sizeof(entropy); - - char *p = MD5Data(entropy, entropysz, NULL); - assert(strlen(p) == MD5_DIGEST_STRING_LENGTH - 1); - - printf("test7: '%s'\n", p); - - free(p); -} - -int main(void) { - printf("MD5\n"); - - test1(); - test2(); - test3(); - test4(); - test5(); - test6(); - test7(); - - // CHECK: MD5 - // CHECK: test1: '86e65b1ef4a830af347ac05ab4f0e999' - // CHECK: test2: '86e65b1ef4a830af347ac05ab4f0e999' - // CHECK: test3: '86e65b1ef4a830af347ac05ab4f0e999' - // CHECK: test4: '{{.*}}' - // CHECK: test5: '{{.*}}' - // CHECK: test6: '86e65b1ef4a830af347ac05ab4f0e999' - // CHECK: test7: '86e65b1ef4a830af347ac05ab4f0e999' - - return 0; -} diff --git a/compiler-rt/test/sanitizer_common/TestCases/FreeBSD/sha2.cpp b/compiler-rt/test/sanitizer_common/TestCases/FreeBSD/sha2.cpp deleted file mode 100644 index 3012aca7d72072..00000000000000 --- a/compiler-rt/test/sanitizer_common/TestCases/FreeBSD/sha2.cpp +++ /dev/null @@ -1,214 +0,0 @@ -// RUN: %clangxx -O0 -g %s -DSHASIZE=224 -o %t -lmd && %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-224 -// RUN: %clangxx -O0 -g %s -DSHASIZE=256 -o %t -lmd && %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-256 -// RUN: %clangxx -O0 -g %s -DSHASIZE=384 -o %t -lmd && %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-384 -// RUN: %clangxx -O0 -g %s -DSHASIZE=512 -o %t -lmd && %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-512 - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef SHASIZE -#error SHASIZE must be defined -#endif - -#define _SHA_CTX(x) SHA##x##_CTX -#define SHA_CTX(x) _SHA_CTX(x) - -#define _SHA_DIGEST_LENGTH(x) SHA##x##_DIGEST_LENGTH -#define SHA_DIGEST_LENGTH(x) _SHA_DIGEST_LENGTH(x) - -#define _SHA_DIGEST_STRING_LENGTH(x) SHA##x##_DIGEST_STRING_LENGTH -#define SHA_DIGEST_STRING_LENGTH(x) _SHA_DIGEST_STRING_LENGTH(x) - -#define _SHA_Init(x) SHA##x##_Init -#define SHA_Init(x) _SHA_Init(x) - -#define _SHA_Update(x) SHA##x##_Update -#define SHA_Update(x) _SHA_Update(x) - -#define _SHA_Final(x) SHA##x##_Final -#define SHA_Final(x) _SHA_Final(x) - -#define _SHA_End(x) SHA##x##_End -#define SHA_End(x) _SHA_End(x) - -#define _SHA_File(x) SHA##x##_File -#define SHA_File(x) _SHA_File(x) - -#define _SHA_FileChunk(x) SHA##x##_FileChunk -#define SHA_FileChunk(x) _SHA_FileChunk(x) - -#define _SHA_Data(x) SHA##x##_Data -#define SHA_Data(x) _SHA_Data(x) - -void test1() { - SHA_CTX(SHASIZE) ctx; - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - uint8_t digest[SHA_DIGEST_LENGTH(SHASIZE)]; - size_t entropysz = sizeof(entropy); - size_t digestsz = sizeof(digest); - - SHA_Init(SHASIZE)(&ctx); - SHA_Update(SHASIZE)(&ctx, entropy, entropysz); - SHA_Final(SHASIZE)(digest, &ctx); - - printf("test1: '"); - for (size_t i = 0; i < digestsz; i++) - printf("%02x", digest[i]); - printf("'\n"); -} - -void test2() { - SHA_CTX(SHASIZE) ctx; - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - char digest[SHA_DIGEST_STRING_LENGTH(SHASIZE)]; - size_t entropysz = sizeof(entropy); - - SHA_Init(SHASIZE)(&ctx); - SHA_Update(SHASIZE)(&ctx, entropy, entropysz); - char *p = SHA_End(SHASIZE)(&ctx, digest); - assert(p == digest); - - printf("test2: '%s'\n", digest); -} - -void test3() { - SHA_CTX(SHASIZE) ctx; - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - size_t entropysz = sizeof(entropy); - - SHA_Init(SHASIZE)(&ctx); - SHA_Update(SHASIZE)(&ctx, entropy, entropysz); - char *p = SHA_End(SHASIZE)(&ctx, NULL); - assert(strlen(p) == SHA_DIGEST_STRING_LENGTH(SHASIZE) - 1); - - printf("test3: '%s'\n", p); - - free(p); -} - -void test4() { - char digest[SHA_DIGEST_STRING_LENGTH(SHASIZE)]; - - char *p = SHA_File(SHASIZE)("/etc/fstab", digest); - assert(p == digest); - - printf("test4: '%s'\n", p); -} - -void test5() { - char *p = SHA_File(SHASIZE)("/etc/fstab", NULL); - assert(strlen(p) == SHA_DIGEST_STRING_LENGTH(SHASIZE) - 1); - - printf("test5: '%s'\n", p); - - free(p); -} - -void test6() { - char digest[SHA_DIGEST_STRING_LENGTH(SHASIZE)]; - - char *p = SHA_FileChunk(SHASIZE)("/etc/fstab", digest, 10, 20); - assert(p == digest); - - printf("test6: '%s'\n", p); -} - -void test7() { - char *p = SHA_FileChunk(SHASIZE)("/etc/fstab", NULL, 10, 20); - assert(strlen(p) == SHA_DIGEST_STRING_LENGTH(SHASIZE) - 1); - - printf("test7: '%s'\n", p); - - free(p); -} - -void test8() { - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - char digest[SHA_DIGEST_STRING_LENGTH(SHASIZE)]; - size_t entropysz = sizeof(entropy); - - char *p = SHA_Data(SHASIZE)(entropy, entropysz, digest); - assert(p == digest); - - printf("test8: '%s'\n", p); -} - -void test9() { - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - size_t entropysz = sizeof(entropy); - - char *p = SHA_Data(SHASIZE)(entropy, entropysz, NULL); - assert(strlen(p) == SHA_DIGEST_STRING_LENGTH(SHASIZE) - 1); - - printf("test9: '%s'\n", p); - - free(p); -} - -int main(void) { - printf("SHA%d\n", SHASIZE); - - test1(); - test2(); - test3(); - test4(); - test5(); - test6(); - test7(); - test8(); - test9(); - - // CHECK-224: SHA224 - // CHECK-224: test1: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48' - // CHECK-224: test2: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48' - // CHECK-224: test3: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48' - // CHECK-224: test4: '{{.*}}' - // CHECK-224: test5: '{{.*}}' - // CHECK-224: test6: '{{.*}}' - // CHECK-224: test7: '{{.*}}' - // CHECK-224: test8: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48' - // CHECK-224: test9: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48' - - // CHECK-256: SHA256 - // CHECK-256: test1: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8' - // CHECK-256: test2: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8' - // CHECK-256: test3: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8' - // CHECK-256: test4: '{{.*}}' - // CHECK-256: test5: '{{.*}}' - // CHECK-256: test6: '{{.*}}' - // CHECK-256: test7: '{{.*}}' - // CHECK-256: test8: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8' - // CHECK-256: test9: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8' - - // CHECK-384: SHA384 - // CHECK-384: test1: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12' - // CHECK-384: test2: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12' - // CHECK-384: test3: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12' - // CHECK-384: test4: '{{.*}}' - // CHECK-384: test5: '{{.*}}' - // CHECK-384: test6: '{{.*}}' - // CHECK-384: test7: '{{.*}}' - // CHECK-384: test8: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12' - // CHECK-384: test9: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12' - - // CHECK-512: SHA512 - // CHECK-512: test1: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e' - // CHECK-512: test2: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e' - // CHECK-512: test3: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e' - // CHECK-512: test4: '{{.*}}' - // CHECK-512: test5: '{{.*}}' - // CHECK-512: test6: '{{.*}}' - // CHECK-512: test7: '{{.*}}' - // CHECK-512: test8: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e' - // CHECK-512: test9: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e' - - return 0; -} diff --git a/compiler-rt/test/sanitizer_common/TestCases/NetBSD/md5.cpp b/compiler-rt/test/sanitizer_common/TestCases/NetBSD/md5.cpp deleted file mode 100644 index aee21681800d89..00000000000000 --- a/compiler-rt/test/sanitizer_common/TestCases/NetBSD/md5.cpp +++ /dev/null @@ -1,114 +0,0 @@ -// RUN: %clangxx -O0 -g %s -o %t && %run %t 2>&1 | FileCheck %s - -#include - -#include -#include -#include -#include -#include -#include - -void test1() { - MD5_CTX ctx; - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - uint8_t digest[MD5_DIGEST_LENGTH]; - - MD5Init(&ctx); - MD5Update(&ctx, entropy, __arraycount(entropy)); - MD5Final(digest, &ctx); - - printf("test1: '"); - for (size_t i = 0; i < __arraycount(digest); i++) - printf("%02x", digest[i]); - printf("'\n"); -} - -void test2() { - MD5_CTX ctx; - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - char digest[MD5_DIGEST_STRING_LENGTH]; - - MD5Init(&ctx); - MD5Update(&ctx, entropy, __arraycount(entropy)); - char *p = MD5End(&ctx, digest); - assert(p); - - printf("test2: '%s'\n", digest); -} - -void test3() { - MD5_CTX ctx; - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - - MD5Init(&ctx); - MD5Update(&ctx, entropy, __arraycount(entropy)); - char *p = MD5End(&ctx, NULL); - assert(strlen(p) == MD5_DIGEST_STRING_LENGTH - 1); - - printf("test3: '%s'\n", p); - - free(p); -} - -void test4() { - char digest[MD5_DIGEST_STRING_LENGTH]; - - char *p = MD5File("/etc/fstab", digest); - assert(p == digest); - - printf("test4: '%s'\n", p); -} - -void test5() { - char *p = MD5File("/etc/fstab", NULL); - assert(strlen(p) == MD5_DIGEST_STRING_LENGTH - 1); - - printf("test5: '%s'\n", p); - - free(p); -} - -void test6() { - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - char digest[MD5_DIGEST_STRING_LENGTH]; - - char *p = MD5Data(entropy, __arraycount(entropy), digest); - assert(p == digest); - - printf("test6: '%s'\n", p); -} - -void test7() { - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - - char *p = MD5Data(entropy, __arraycount(entropy), NULL); - assert(strlen(p) == MD5_DIGEST_STRING_LENGTH - 1); - - printf("test7: '%s'\n", p); - - free(p); -} - -int main(void) { - printf("MD5\n"); - - test1(); - test2(); - test3(); - test4(); - test5(); - test6(); - test7(); - - // CHECK: MD5 - // CHECK: test1: '86e65b1ef4a830af347ac05ab4f0e999' - // CHECK: test2: '86e65b1ef4a830af347ac05ab4f0e999' - // CHECK: test3: '86e65b1ef4a830af347ac05ab4f0e999' - // CHECK: test4: '{{.*}}' - // CHECK: test5: '{{.*}}' - // CHECK: test6: '86e65b1ef4a830af347ac05ab4f0e999' - // CHECK: test7: '86e65b1ef4a830af347ac05ab4f0e999' - - return 0; -} diff --git a/compiler-rt/test/sanitizer_common/TestCases/NetBSD/sha2.cpp b/compiler-rt/test/sanitizer_common/TestCases/NetBSD/sha2.cpp deleted file mode 100644 index e905e3b610fd37..00000000000000 --- a/compiler-rt/test/sanitizer_common/TestCases/NetBSD/sha2.cpp +++ /dev/null @@ -1,206 +0,0 @@ -// RUN: %clangxx -O0 -g %s -DSHASIZE=224 -o %t && %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-224 -// RUN: %clangxx -O0 -g %s -DSHASIZE=256 -o %t && %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-256 -// RUN: %clangxx -O0 -g %s -DSHASIZE=384 -o %t && %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-384 -// RUN: %clangxx -O0 -g %s -DSHASIZE=512 -o %t && %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-512 - -#include - -#include -#include -#include -#include -#include -#include - -#ifndef SHASIZE -#error SHASIZE must be defined -#endif - -#define _SHA_CTX(x) SHA##x##_CTX -#define SHA_CTX(x) _SHA_CTX(x) - -#define _SHA_DIGEST_LENGTH(x) SHA##x##_DIGEST_LENGTH -#define SHA_DIGEST_LENGTH(x) _SHA_DIGEST_LENGTH(x) - -#define _SHA_DIGEST_STRING_LENGTH(x) SHA##x##_DIGEST_STRING_LENGTH -#define SHA_DIGEST_STRING_LENGTH(x) _SHA_DIGEST_STRING_LENGTH(x) - -#define _SHA_Init(x) SHA##x##_Init -#define SHA_Init(x) _SHA_Init(x) - -#define _SHA_Update(x) SHA##x##_Update -#define SHA_Update(x) _SHA_Update(x) - -#define _SHA_Final(x) SHA##x##_Final -#define SHA_Final(x) _SHA_Final(x) - -#define _SHA_End(x) SHA##x##_End -#define SHA_End(x) _SHA_End(x) - -#define _SHA_File(x) SHA##x##_File -#define SHA_File(x) _SHA_File(x) - -#define _SHA_FileChunk(x) SHA##x##_FileChunk -#define SHA_FileChunk(x) _SHA_FileChunk(x) - -#define _SHA_Data(x) SHA##x##_Data -#define SHA_Data(x) _SHA_Data(x) - -void test1() { - SHA_CTX(SHASIZE) ctx; - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - uint8_t digest[SHA_DIGEST_LENGTH(SHASIZE)]; - - SHA_Init(SHASIZE)(&ctx); - SHA_Update(SHASIZE)(&ctx, entropy, __arraycount(entropy)); - SHA_Final(SHASIZE)(digest, &ctx); - - printf("test1: '"); - for (size_t i = 0; i < __arraycount(digest); i++) - printf("%02x", digest[i]); - printf("'\n"); -} - -void test2() { - SHA_CTX(SHASIZE) ctx; - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - char digest[SHA_DIGEST_STRING_LENGTH(SHASIZE)]; - - SHA_Init(SHASIZE)(&ctx); - SHA_Update(SHASIZE)(&ctx, entropy, __arraycount(entropy)); - char *p = SHA_End(SHASIZE)(&ctx, digest); - assert(p == digest); - - printf("test2: '%s'\n", digest); -} - -void test3() { - SHA_CTX(SHASIZE) ctx; - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - - SHA_Init(SHASIZE)(&ctx); - SHA_Update(SHASIZE)(&ctx, entropy, __arraycount(entropy)); - char *p = SHA_End(SHASIZE)(&ctx, NULL); - assert(strlen(p) == SHA_DIGEST_STRING_LENGTH(SHASIZE) - 1); - - printf("test3: '%s'\n", p); - - free(p); -} - -void test4() { - char digest[SHA_DIGEST_STRING_LENGTH(SHASIZE)]; - - char *p = SHA_File(SHASIZE)("/etc/fstab", digest); - assert(p == digest); - - printf("test4: '%s'\n", p); -} - -void test5() { - char *p = SHA_File(SHASIZE)("/etc/fstab", NULL); - assert(strlen(p) == SHA_DIGEST_STRING_LENGTH(SHASIZE) - 1); - - printf("test5: '%s'\n", p); - - free(p); -} - -void test6() { - char digest[SHA_DIGEST_STRING_LENGTH(SHASIZE)]; - - char *p = SHA_FileChunk(SHASIZE)("/etc/fstab", digest, 10, 20); - assert(p == digest); - - printf("test6: '%s'\n", p); -} - -void test7() { - char *p = SHA_FileChunk(SHASIZE)("/etc/fstab", NULL, 10, 20); - assert(strlen(p) == SHA_DIGEST_STRING_LENGTH(SHASIZE) - 1); - - printf("test7: '%s'\n", p); - - free(p); -} - -void test8() { - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - char digest[SHA_DIGEST_STRING_LENGTH(SHASIZE)]; - - char *p = SHA_Data(SHASIZE)(entropy, __arraycount(entropy), digest); - assert(p == digest); - - printf("test8: '%s'\n", p); -} - -void test9() { - uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - - char *p = SHA_Data(SHASIZE)(entropy, __arraycount(entropy), NULL); - assert(strlen(p) == SHA_DIGEST_STRING_LENGTH(SHASIZE) - 1); - - printf("test9: '%s'\n", p); - - free(p); -} - -int main(void) { - printf("SHA" ___STRING(SHASIZE) "\n"); - - test1(); - test2(); - test3(); - test4(); - test5(); - test6(); - test7(); - test8(); - test9(); - - // CHECK-224: SHA224 - // CHECK-224: test1: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48' - // CHECK-224: test2: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48' - // CHECK-224: test3: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48' - // CHECK-224: test4: '{{.*}}' - // CHECK-224: test5: '{{.*}}' - // CHECK-224: test6: '{{.*}}' - // CHECK-224: test7: '{{.*}}' - // CHECK-224: test8: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48' - // CHECK-224: test9: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48' - - // CHECK-256: SHA256 - // CHECK-256: test1: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8' - // CHECK-256: test2: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8' - // CHECK-256: test3: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8' - // CHECK-256: test4: '{{.*}}' - // CHECK-256: test5: '{{.*}}' - // CHECK-256: test6: '{{.*}}' - // CHECK-256: test7: '{{.*}}' - // CHECK-256: test8: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8' - // CHECK-256: test9: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8' - - // CHECK-384: SHA384 - // CHECK-384: test1: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12' - // CHECK-384: test2: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12' - // CHECK-384: test3: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12' - // CHECK-384: test4: '{{.*}}' - // CHECK-384: test5: '{{.*}}' - // CHECK-384: test6: '{{.*}}' - // CHECK-384: test7: '{{.*}}' - // CHECK-384: test8: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12' - // CHECK-384: test9: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12' - - // CHECK-512: SHA512 - // CHECK-512: test1: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e' - // CHECK-512: test2: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e' - // CHECK-512: test3: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e' - // CHECK-512: test4: '{{.*}}' - // CHECK-512: test5: '{{.*}}' - // CHECK-512: test6: '{{.*}}' - // CHECK-512: test7: '{{.*}}' - // CHECK-512: test8: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e' - // CHECK-512: test9: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e' - - return 0; -} From d50302f31cee86d3270a34f5739c63a41f60f2c1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 9 Oct 2024 08:52:32 +0400 Subject: [PATCH 09/55] clang/AMDGPU: Stop emitting amdgpu-unsafe-fp-atomics attribute (#111579) --- clang/lib/CodeGen/Targets/AMDGPU.cpp | 3 --- clang/test/CodeGenCUDA/amdgpu-func-attrs.cu | 22 --------------------- clang/test/OpenMP/amdgcn-attributes.cpp | 3 --- 3 files changed, 28 deletions(-) delete mode 100644 clang/test/CodeGenCUDA/amdgpu-func-attrs.cu diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 37e6af3d4196a8..b852dcffb295c9 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -452,9 +452,6 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( if (FD) setFunctionDeclAttributes(FD, F, M); - if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics()) - F->addFnAttr("amdgpu-unsafe-fp-atomics", "true"); - if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts) F->addFnAttr("amdgpu-ieee", "false"); } diff --git a/clang/test/CodeGenCUDA/amdgpu-func-attrs.cu b/clang/test/CodeGenCUDA/amdgpu-func-attrs.cu deleted file mode 100644 index 89add87919c12d..00000000000000 --- a/clang/test/CodeGenCUDA/amdgpu-func-attrs.cu +++ /dev/null @@ -1,22 +0,0 @@ -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ -// RUN: | FileCheck -check-prefixes=NO-UNSAFE-FP-ATOMICS %s -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ -// RUN: -munsafe-fp-atomics \ -// RUN: | FileCheck -check-prefixes=UNSAFE-FP-ATOMICS %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ -// RUN: -o - -x hip %s -munsafe-fp-atomics \ -// RUN: | FileCheck -check-prefix=NO-UNSAFE-FP-ATOMICS %s - -#include "Inputs/cuda.h" - -__device__ void test() { -// UNSAFE-FP-ATOMICS: define{{.*}} void @_Z4testv() [[ATTR:#[0-9]+]] -} - - -// Make sure this is silently accepted on other targets. -// NO-UNSAFE-FP-ATOMICS-NOT: "amdgpu-unsafe-fp-atomics" - -// UNSAFE-FP-ATOMICS-DAG: attributes [[ATTR]] = {{.*}}"amdgpu-unsafe-fp-atomics"="true" diff --git a/clang/test/OpenMP/amdgcn-attributes.cpp b/clang/test/OpenMP/amdgcn-attributes.cpp index 5ddc34537d12fb..2c9e16a4f5098e 100644 --- a/clang/test/OpenMP/amdgcn-attributes.cpp +++ b/clang/test/OpenMP/amdgcn-attributes.cpp @@ -5,7 +5,6 @@ // RUN: %clang_cc1 -target-cpu gfx900 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefixes=CPU,ALL %s // RUN: %clang_cc1 -menable-no-nans -mno-amdgpu-ieee -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefixes=NOIEEE,ALL %s -// RUN: %clang_cc1 -munsafe-fp-atomics -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefixes=UNSAFEATOMIC,ALL %s // expected-no-diagnostics @@ -35,9 +34,7 @@ int callable(int x) { // DEFAULT: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } // CPU: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size"="true" } // NOIEEE: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "amdgpu-ieee"="false" "kernel" "no-nans-fp-math"="true" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } -// UNSAFEATOMIC: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "amdgpu-unsafe-fp-atomics"="true" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } // DEFAULT: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // CPU: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } // NOIEEE: attributes #2 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// UNSAFEATOMIC: attributes #2 = { convergent mustprogress noinline nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } From 4336f00f2156970cc0af2816331387a0a4039317 Mon Sep 17 00:00:00 2001 From: Matheus Izvekov Date: Wed, 9 Oct 2024 01:55:21 -0300 Subject: [PATCH 10/55] [clang] Track function template instantiation from definition (#110387) This fixes instantiation of definition for friend function templates, when the declaration found and the one containing the definition have different template contexts. In these cases, the the function declaration corresponding to the definition is not available; it may not even be instantiated at all. So this patch adds a bit which tracks which function template declaration was instantiated from the member template. It's used to find which primary template serves as a context for the purpose of obtaining the template arguments needed to instantiate the definition. Fixes #55509 --- clang/docs/ReleaseNotes.rst | 1 + clang/include/clang/AST/Decl.h | 7 ++ clang/include/clang/AST/DeclBase.h | 10 +- clang/include/clang/AST/DeclTemplate.h | 9 ++ clang/include/clang/Sema/Sema.h | 6 ++ clang/lib/AST/Decl.cpp | 1 + clang/lib/Sema/SemaTemplateDeduction.cpp | 17 +-- clang/lib/Sema/SemaTemplateInstantiate.cpp | 17 ++- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 22 +++- clang/lib/Serialization/ASTReaderDecl.cpp | 1 + clang/lib/Serialization/ASTWriterDecl.cpp | 3 +- clang/test/SemaTemplate/GH55509.cpp | 101 ++++++++++++++++++ 12 files changed, 169 insertions(+), 26 deletions(-) create mode 100644 clang/test/SemaTemplate/GH55509.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index c1122e1180ab91..8d02cc3eae9fd9 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -474,6 +474,7 @@ Bug Fixes to C++ Support - Fixed an assertion failure in debug mode, and potential crashes in release mode, when diagnosing a failed cast caused indirectly by a failed implicit conversion to the type of the constructor parameter. - Fixed an assertion failure by adjusting integral to boolean vector conversions (#GH108326) +- Clang is now better at keeping track of friend function template instance contexts. (#GH55509) - Fixed an issue deducing non-type template arguments of reference type. (#GH73460) - Fixed an issue in constraint evaluation, where type constraints on the lambda expression containing outer unexpanded parameters were not correctly expanded. (#GH101754) diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 7ff35d73df5997..6afc86710a8137 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -2299,6 +2299,13 @@ class FunctionDecl : public DeclaratorDecl, FunctionDeclBits.IsLateTemplateParsed = ILT; } + bool isInstantiatedFromMemberTemplate() const { + return FunctionDeclBits.IsInstantiatedFromMemberTemplate; + } + void setInstantiatedFromMemberTemplate(bool Val = true) { + FunctionDeclBits.IsInstantiatedFromMemberTemplate = Val; + } + /// Whether this function is "trivial" in some specialized C++ senses. /// Can only be true for default constructors, copy constructors, /// copy assignment operators, and destructors. Not meaningful until diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index ee662ed73d7e0e..eb67dc03157e64 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -1763,6 +1763,8 @@ class DeclContext { uint64_t HasImplicitReturnZero : 1; LLVM_PREFERRED_TYPE(bool) uint64_t IsLateTemplateParsed : 1; + LLVM_PREFERRED_TYPE(bool) + uint64_t IsInstantiatedFromMemberTemplate : 1; /// Kind of contexpr specifier as defined by ConstexprSpecKind. LLVM_PREFERRED_TYPE(ConstexprSpecKind) @@ -1813,7 +1815,7 @@ class DeclContext { }; /// Number of inherited and non-inherited bits in FunctionDeclBitfields. - enum { NumFunctionDeclBits = NumDeclContextBits + 31 }; + enum { NumFunctionDeclBits = NumDeclContextBits + 32 }; /// Stores the bits used by CXXConstructorDecl. If modified /// NumCXXConstructorDeclBits and the accessor @@ -1824,12 +1826,12 @@ class DeclContext { LLVM_PREFERRED_TYPE(FunctionDeclBitfields) uint64_t : NumFunctionDeclBits; - /// 20 bits to fit in the remaining available space. + /// 19 bits to fit in the remaining available space. /// Note that this makes CXXConstructorDeclBitfields take /// exactly 64 bits and thus the width of NumCtorInitializers /// will need to be shrunk if some bit is added to NumDeclContextBitfields, /// NumFunctionDeclBitfields or CXXConstructorDeclBitfields. - uint64_t NumCtorInitializers : 17; + uint64_t NumCtorInitializers : 16; LLVM_PREFERRED_TYPE(bool) uint64_t IsInheritingConstructor : 1; @@ -1843,7 +1845,7 @@ class DeclContext { }; /// Number of inherited and non-inherited bits in CXXConstructorDeclBitfields. - enum { NumCXXConstructorDeclBits = NumFunctionDeclBits + 20 }; + enum { NumCXXConstructorDeclBits = NumFunctionDeclBits + 19 }; /// Stores the bits used by ObjCMethodDecl. /// If modified NumObjCMethodDeclBits and the accessor diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index 05739f39d2a496..2fb49ec1aea0d0 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -1008,6 +1008,15 @@ class FunctionTemplateDecl : public RedeclarableTemplateDecl { return getTemplatedDecl()->isThisDeclarationADefinition(); } + bool isCompatibleWithDefinition() const { + return getTemplatedDecl()->isInstantiatedFromMemberTemplate() || + isThisDeclarationADefinition(); + } + void setInstantiatedFromMemberTemplate(FunctionTemplateDecl *D) { + getTemplatedDecl()->setInstantiatedFromMemberTemplate(); + RedeclarableTemplateDecl::setInstantiatedFromMemberTemplate(D); + } + /// Return the specialization with the provided arguments if it exists, /// otherwise return the insertion point. FunctionDecl *findSpecialization(ArrayRef Args, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 7ff9c2754a6fe0..043456438b6d03 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -13027,6 +13027,12 @@ class Sema final : public SemaBase { std::optional> Innermost = std::nullopt, bool RelativeToPrimary = false, bool ForConstraintInstantiation = false); + void getTemplateInstantiationArgs( + MultiLevelTemplateArgumentList &Result, const NamedDecl *D, + const DeclContext *DC = nullptr, bool Final = false, + std::optional> Innermost = std::nullopt, + bool RelativeToPrimary = false, bool ForConstraintInstantiation = false); + /// RAII object to handle the state changes required to synthesize /// a function body. class SynthesizedFunctionScope { diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 58d11a0312c505..8f54b5f1589d4f 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3067,6 +3067,7 @@ FunctionDecl::FunctionDecl(Kind DK, ASTContext &C, DeclContext *DC, FunctionDeclBits.IsIneligibleOrNotSelected = false; FunctionDeclBits.HasImplicitReturnZero = false; FunctionDeclBits.IsLateTemplateParsed = false; + FunctionDeclBits.IsInstantiatedFromMemberTemplate = false; FunctionDeclBits.ConstexprKind = static_cast(ConstexprKind); FunctionDeclBits.BodyContainsImmediateEscalatingExpression = false; FunctionDeclBits.InstantiationIsPending = false; diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 8e80ab730ac342..f9a8d2d9ff0b1d 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -4007,22 +4007,7 @@ TemplateDeductionResult Sema::FinishTemplateArgumentDeduction( if (FunctionTemplate->getFriendObjectKind()) Owner = FunctionTemplate->getLexicalDeclContext(); FunctionDecl *FD = FunctionTemplate->getTemplatedDecl(); - // additional check for inline friend, - // ``` - // template int foo(F1 X); - // template struct A { - // template friend int foo(F1 X) { return A1; } - // }; - // template struct A<1>; - // int a = foo(1.0); - // ``` - const FunctionDecl *FDFriend; - if (FD->getFriendObjectKind() == Decl::FriendObjectKind::FOK_None && - FD->isDefined(FDFriend, /*CheckForPendingFriendDefinition*/ true) && - FDFriend->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None) { - FD = const_cast(FDFriend); - Owner = FD->getLexicalDeclContext(); - } + MultiLevelTemplateArgumentList SubstArgs( FunctionTemplate, CanonicalDeducedArgumentList->asArray(), /*Final=*/false); diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index f2007fc5d85a50..9c5b3e7c9066c7 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -512,13 +512,13 @@ struct TemplateInstantiationArgumentCollecter } // namespace -MultiLevelTemplateArgumentList Sema::getTemplateInstantiationArgs( - const NamedDecl *ND, const DeclContext *DC, bool Final, +void Sema::getTemplateInstantiationArgs( + MultiLevelTemplateArgumentList &Result, const NamedDecl *ND, + const DeclContext *DC, bool Final, std::optional> Innermost, bool RelativeToPrimary, bool ForConstraintInstantiation) { assert((ND || DC) && "Can't find arguments for a decl if one isn't provided"); // Accumulate the set of template argument lists in this structure. - MultiLevelTemplateArgumentList Result; const Decl *CurDecl = ND; if (!CurDecl) @@ -529,6 +529,17 @@ MultiLevelTemplateArgumentList Sema::getTemplateInstantiationArgs( do { CurDecl = Collecter.Visit(const_cast(CurDecl)); } while (CurDecl); +} + +MultiLevelTemplateArgumentList Sema::getTemplateInstantiationArgs( + const NamedDecl *ND, const DeclContext *DC, bool Final, + std::optional> Innermost, bool RelativeToPrimary, + bool ForConstraintInstantiation) { + assert((ND || DC) && "Can't find arguments for a decl if one isn't provided"); + // Accumulate the set of template argument lists in this structure. + MultiLevelTemplateArgumentList Result; + getTemplateInstantiationArgs(Result, ND, DC, Final, Innermost, + RelativeToPrimary, ForConstraintInstantiation); return Result; } diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index d29434486dcb06..17d167b3a5e0c6 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -5214,8 +5214,26 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation, RebuildTypeSourceInfoForDefaultSpecialMembers(); SetDeclDefaulted(Function, PatternDecl->getLocation()); } else { - MultiLevelTemplateArgumentList TemplateArgs = getTemplateInstantiationArgs( - Function, Function->getLexicalDeclContext()); + DeclContext *DC = Function; + MultiLevelTemplateArgumentList TemplateArgs; + if (auto *Primary = Function->getPrimaryTemplate(); + Primary && + !isGenericLambdaCallOperatorOrStaticInvokerSpecialization(Function)) { + auto It = llvm::find_if(Primary->redecls(), + [](const RedeclarableTemplateDecl *RTD) { + return cast(RTD) + ->isCompatibleWithDefinition(); + }); + assert(It != Primary->redecls().end() && + "Should't get here without a definition"); + DC = (*It)->getLexicalDeclContext(); + if (Function->getTemplateSpecializationKind() != + TSK_ExplicitSpecialization) + TemplateArgs.addOuterTemplateArguments( + Function, Function->getTemplateSpecializationArgs()->asArray(), + /*Final=*/false); + } + getTemplateInstantiationArgs(TemplateArgs, /*D=*/nullptr, DC); // Substitute into the qualifier; we can get a substitution failure here // through evil use of alias templates. diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 1ccc810f415eb4..a44df84a8bcef2 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -1087,6 +1087,7 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) { FD->setHasImplicitReturnZero(FunctionDeclBits.getNextBit()); FD->setIsMultiVersion(FunctionDeclBits.getNextBit()); FD->setLateTemplateParsed(FunctionDeclBits.getNextBit()); + FD->setInstantiatedFromMemberTemplate(FunctionDeclBits.getNextBit()); FD->setFriendConstraintRefersToEnclosingTemplate( FunctionDeclBits.getNextBit()); FD->setUsesSEHTry(FunctionDeclBits.getNextBit()); diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index f21cbd11b6ab89..dec93317dc7b37 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -626,7 +626,7 @@ void ASTDeclWriter::VisitDeclaratorDecl(DeclaratorDecl *D) { } void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { - static_assert(DeclContext::NumFunctionDeclBits == 44, + static_assert(DeclContext::NumFunctionDeclBits == 45, "You need to update the serializer after you change the " "FunctionDeclBits"); @@ -732,6 +732,7 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { FunctionDeclBits.addBit(D->hasImplicitReturnZero()); FunctionDeclBits.addBit(D->isMultiVersion()); FunctionDeclBits.addBit(D->isLateTemplateParsed()); + FunctionDeclBits.addBit(D->isInstantiatedFromMemberTemplate()); FunctionDeclBits.addBit(D->FriendConstraintRefersToEnclosingTemplate()); FunctionDeclBits.addBit(D->usesSEHTry()); Record.push_back(FunctionDeclBits); diff --git a/clang/test/SemaTemplate/GH55509.cpp b/clang/test/SemaTemplate/GH55509.cpp new file mode 100644 index 00000000000000..f95833fbed7b19 --- /dev/null +++ b/clang/test/SemaTemplate/GH55509.cpp @@ -0,0 +1,101 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++26 %s + +namespace t1 { + template struct A { + template friend auto cica(const A&, C) { + return N; + } + }; + + template<> struct A<0> { + template friend auto cica(const A<0>&, C); + // expected-note@-1 {{declared here}} + }; + + void test() { + cica(A<0>{}, 0); + // expected-error@-1 {{function 'cica' with deduced return type cannot be used before it is defined}} + + (void)A<1>{}; + cica(A<0>{}, 0); + } +} // namespace t1 +namespace t2 { + template struct A { + template friend auto cica(const A&, C) { + return N; + } + }; + + template<> struct A<0> { + template friend auto cica(const A<0>&, C); + }; + + template {}, nullptr))> + void MakeCica(); + // expected-note@-1 {{candidate function}} + + template void MakeCica(A = {}); + // expected-note@-1 {{candidate function}} + + void test() { + MakeCica<0>(); + + MakeCica<0>(); + // expected-error@-1 {{call to 'MakeCica' is ambiguous}} + } +} // namespace t2 +namespace t3 { + template struct A { + template friend auto cica(const A&, C) { + return N-1; + } + }; + + template<> struct A<0> { + template friend auto cica(const A<0>&, C); + }; + + template + static constexpr bool MakeCica(int); + + template + static constexpr bool MakeCica(short, A = {}); + + template , class Val = decltype(MakeCica(0))> + static constexpr bool has_cica = Val{}; + + constexpr bool cica2 = has_cica<0> || has_cica<0>; +} // namespace t3 +namespace t4 { + template struct A { + template friend auto cica(const A&, C); + }; + + template<> struct A<0> { + template friend auto cica(const A<0>&, C) { + C a; + } + }; + + template struct A<1>; + + void test() { + cica(A<0>{}, 0); + } +} // namespace t4 +namespace regression1 { + template class A; + + template [[gnu::abi_tag("TAG")]] void foo(A); + + template struct A { + friend void foo <>(A); + }; + + template struct A; + + template [[gnu::abi_tag("TAG")]] void foo(A) {} + + template void foo(A); +} // namespace regression1 From fbd2a918652894b31199dffe1ce5335b019332d7 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 9 Oct 2024 09:24:37 +0400 Subject: [PATCH 11/55] InferAddressSpaces: Handle llvm.fake.use (#109567) --- .../Transforms/Scalar/InferAddressSpaces.cpp | 14 +++ .../Transforms/InferAddressSpaces/fake-use.ll | 97 +++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 llvm/test/Transforms/InferAddressSpaces/fake-use.ll diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index 566cdc51f6e74a..60fd2a286119b3 100644 --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -414,6 +414,10 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II, II->setCalledFunction(NewDecl); return true; } + case Intrinsic::fake_use: { + II->replaceUsesOfWith(OldV, NewV); + return true; + } default: { Value *Rewrite = TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); if (!Rewrite) @@ -455,6 +459,16 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands( appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(1), PostorderStack, Visited); break; + case Intrinsic::fake_use: { + for (Value *Op : II->operands()) { + if (Op->getType()->isPtrOrPtrVectorTy()) { + appendsFlatAddressExpressionToPostorderStack(Op, PostorderStack, + Visited); + } + } + + break; + } default: SmallVector OpIndexes; if (TTI->collectFlatAddressOperands(OpIndexes, IID)) { diff --git a/llvm/test/Transforms/InferAddressSpaces/fake-use.ll b/llvm/test/Transforms/InferAddressSpaces/fake-use.ll new file mode 100644 index 00000000000000..ad7f621dc40e85 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/fake-use.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=infer-address-spaces -assume-default-is-flat-addrspace %s | FileCheck %s + +declare void @llvm.fake.use(...) + +@gv = internal addrspace(3) global float 0.0, align 4 + +define void @one_fake_use(ptr addrspace(1) %global.ptr) { +; CHECK-LABEL: define void @one_fake_use( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]]) { +; CHECK-NEXT: call void (...) @llvm.fake.use(ptr addrspace(1) [[GLOBAL_PTR]]) +; CHECK-NEXT: ret void +; + %cast0 = addrspacecast ptr addrspace(1) %global.ptr to ptr + call void (...) @llvm.fake.use(ptr %cast0) + ret void +} + +define void @one_fake_use_repeat_operands(ptr addrspace(1) %global.ptr) { +; CHECK-LABEL: define void @one_fake_use_repeat_operands( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]]) { +; CHECK-NEXT: call void (...) @llvm.fake.use(ptr addrspace(1) [[GLOBAL_PTR]], ptr addrspace(1) [[GLOBAL_PTR]]) +; CHECK-NEXT: ret void +; + %cast0 = addrspacecast ptr addrspace(1) %global.ptr to ptr + call void (...) @llvm.fake.use(ptr %cast0, ptr %cast0) + ret void +} + +define void @one_fake_use_refers_original_ptr(ptr addrspace(1) %global.ptr) { +; CHECK-LABEL: define void @one_fake_use_refers_original_ptr( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]]) { +; CHECK-NEXT: call void (...) @llvm.fake.use(ptr addrspace(1) [[GLOBAL_PTR]], ptr addrspace(1) [[GLOBAL_PTR]]) +; CHECK-NEXT: ret void +; + %cast0 = addrspacecast ptr addrspace(1) %global.ptr to ptr + call void (...) @llvm.fake.use(ptr %cast0, ptr addrspace(1) %global.ptr) + ret void +} + +define void @multiple_inferrable_fake_use(ptr addrspace(1) %global.ptr0, ptr addrspace(1) %global.ptr1) { +; CHECK-LABEL: define void @multiple_inferrable_fake_use( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR0:%.*]], ptr addrspace(1) [[GLOBAL_PTR1:%.*]]) { +; CHECK-NEXT: call void (...) @llvm.fake.use(ptr addrspace(1) [[GLOBAL_PTR0]], ptr addrspace(1) [[GLOBAL_PTR1]]) +; CHECK-NEXT: ret void +; + %cast0 = addrspacecast ptr addrspace(1) %global.ptr0 to ptr + %cast1 = addrspacecast ptr addrspace(1) %global.ptr1 to ptr + call void (...) @llvm.fake.use(ptr %cast0, ptr %cast1) + ret void +} + +define void @multiple_fake_use_one_inferrable(ptr %flat.ptr0, ptr addrspace(1) %global.ptr1) { +; CHECK-LABEL: define void @multiple_fake_use_one_inferrable( +; CHECK-SAME: ptr [[FLAT_PTR0:%.*]], ptr addrspace(1) [[GLOBAL_PTR1:%.*]]) { +; CHECK-NEXT: call void (...) @llvm.fake.use(ptr [[FLAT_PTR0]], ptr addrspace(1) [[GLOBAL_PTR1]]) +; CHECK-NEXT: call void (...) @llvm.fake.use(ptr addrspace(1) [[GLOBAL_PTR1]], ptr [[FLAT_PTR0]]) +; CHECK-NEXT: ret void +; + %cast1 = addrspacecast ptr addrspace(1) %global.ptr1 to ptr + call void (...) @llvm.fake.use(ptr %flat.ptr0, ptr %cast1) + call void (...) @llvm.fake.use(ptr %cast1, ptr %flat.ptr0) + ret void +} + +define void @vector_of_pointers(<2 x ptr addrspace(1)> %global.ptr) { +; CHECK-LABEL: define void @vector_of_pointers( +; CHECK-SAME: <2 x ptr addrspace(1)> [[GLOBAL_PTR:%.*]]) { +; CHECK-NEXT: call void (...) @llvm.fake.use(<2 x ptr addrspace(1)> [[GLOBAL_PTR]]) +; CHECK-NEXT: ret void +; + %cast0 = addrspacecast <2 x ptr addrspace(1)> %global.ptr to <2 x ptr> + call void (...) @llvm.fake.use(<2 x ptr> %cast0) + ret void +} + +define void @use_global_var() { +; CHECK-LABEL: define void @use_global_var() { +; CHECK-NEXT: call void (...) @llvm.fake.use(ptr addrspace(3) @gv) +; CHECK-NEXT: ret void +; + call void (...) @llvm.fake.use(ptr addrspacecast (ptr addrspace(3) @gv to ptr)) + ret void +} + +define void @use_gep_cast(ptr addrspace(1) %global.ptr) { +; CHECK-LABEL: define void @use_gep_cast( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr addrspace(1) [[GLOBAL_PTR]], i64 16 +; CHECK-NEXT: call void (...) @llvm.fake.use(ptr addrspace(1) [[GEP]], ptr addrspace(1) [[GLOBAL_PTR]]) +; CHECK-NEXT: ret void +; + %cast0 = addrspacecast ptr addrspace(1) %global.ptr to ptr + %gep = getelementptr i8, ptr %cast0, i64 16 + call void (...) @llvm.fake.use(ptr %gep, ptr %cast0) + ret void +} From c198f775cdcd731f68fb482c223702c7273d41ca Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 9 Oct 2024 09:27:28 +0400 Subject: [PATCH 12/55] AMDGPU: Remove flat/global fmin/fmax intrinsics (#105642) These have been replaced with atomicrmw --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 5 - llvm/lib/IR/AutoUpgrade.cpp | 25 +- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 4 - .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 4 - .../Target/AMDGPU/AMDGPUSearchableTables.td | 4 - .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 4 - llvm/lib/Target/AMDGPU/FLATInstructions.td | 11 - llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 16 - llvm/test/Bitcode/amdgcn-atomic.ll | 64 +++ .../AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll | 236 ++---------- .../CodeGen/AMDGPU/dag-divergence-atomic.ll | 9 +- .../AMDGPU/fp-min-max-flat-atomics-f64.ll | 51 --- .../CodeGen/AMDGPU/fp-min-max-flat-atomics.ll | 83 ---- .../AMDGPU/fp-min-max-global-atomics-f64.ll | 51 --- .../AMDGPU/fp-min-max-global-atomics.ll | 87 ----- .../CodeGen/AMDGPU/fp64-atomics-gfx90a.ll | 364 +----------------- .../AMDGPU/flat-fadd-fmin-fmax-intrinsics.ll | 224 ----------- .../InferAddressSpaces/AMDGPU/flat_atomic.ll | 63 +-- 18 files changed, 127 insertions(+), 1178 deletions(-) delete mode 100644 llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics-f64.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics-f64.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics.ll delete mode 100644 llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat-fadd-fmin-fmax-intrinsics.ll diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 50179c1ceddb47..2738eb77b675ab 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2966,11 +2966,6 @@ def int_amdgcn_mfma_f32_16x16x8bf16 : AMDGPUMfmaIntrinsic; -def int_amdgcn_global_atomic_fmax : AMDGPUAtomicRtn; -def int_amdgcn_flat_atomic_fmin : AMDGPUAtomicRtn; -def int_amdgcn_flat_atomic_fmax : AMDGPUAtomicRtn; - defset list AMDGPUMFMAIntrinsics90A = { def int_amdgcn_mfma_f32_32x32x4bf16_1k : AMDGPUMfmaIntrinsic; def int_amdgcn_mfma_f32_16x16x4bf16_1k : AMDGPUMfmaIntrinsic; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 3753509f9aa718..215bfc8c6cfe3e 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1041,14 +1041,17 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, break; // No other 'amdgcn.atomic.*' } - if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") || - Name.starts_with("ds.fmax") || - Name.starts_with("global.atomic.fadd") || - Name.starts_with("flat.atomic.fadd")) { - // Replaced with atomicrmw fadd/fmin/fmax, so there's no new - // declaration. - NewFn = nullptr; - return true; + if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") || + Name.consume_front("flat.atomic.")) { + if (Name.starts_with("fadd") || + // FIXME: We should also remove fmin.num and fmax.num intrinsics. + (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) || + (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) { + // Replaced with atomicrmw fadd/fmin/fmax, so there's no new + // declaration. + NewFn = nullptr; + return true; + } } if (Name.starts_with("ldexp.")) { @@ -4218,7 +4221,11 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap) .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap) .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd) - .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd); + .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd) + .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin) + .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin) + .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax) + .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax); unsigned NumOperands = CI->getNumOperands(); if (NumOperands < 3) // Malformed bitcode. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index aa5b151adef3a4..09987a6504b9d0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -618,10 +618,6 @@ multiclass local_addr_space_atomic_op { } } -defm int_amdgcn_flat_atomic_fmin : noret_op; -defm int_amdgcn_flat_atomic_fmax : noret_op; -defm int_amdgcn_global_atomic_fmin : noret_op; -defm int_amdgcn_global_atomic_fmax : noret_op; defm int_amdgcn_global_atomic_csub : noret_op; defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op; defm int_amdgcn_flat_atomic_fmin_num : noret_op; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index f597c1ae68a175..32dfbc98df581a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4913,12 +4913,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; } case Intrinsic::amdgcn_global_atomic_csub: - case Intrinsic::amdgcn_global_atomic_fmin: - case Intrinsic::amdgcn_global_atomic_fmax: case Intrinsic::amdgcn_global_atomic_fmin_num: case Intrinsic::amdgcn_global_atomic_fmax_num: - case Intrinsic::amdgcn_flat_atomic_fmin: - case Intrinsic::amdgcn_flat_atomic_fmax: case Intrinsic::amdgcn_flat_atomic_fmin_num: case Intrinsic::amdgcn_flat_atomic_fmax_num: case Intrinsic::amdgcn_atomic_cond_sub_u32: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td index 2cd5fb2b94285c..60fa2adc62dc8c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -239,13 +239,9 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; -def : SourceOfDivergence; -def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; -def : SourceOfDivergence; -def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index d348166c2d9a04..0a2d4e6494305f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -1045,8 +1045,6 @@ bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl &OpIndexes, switch (IID) { case Intrinsic::amdgcn_is_shared: case Intrinsic::amdgcn_is_private: - case Intrinsic::amdgcn_flat_atomic_fmax: - case Intrinsic::amdgcn_flat_atomic_fmin: case Intrinsic::amdgcn_flat_atomic_fmax_num: case Intrinsic::amdgcn_flat_atomic_fmin_num: OpIndexes.push_back(0); @@ -1106,8 +1104,6 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, return B.CreateIntrinsic(Intrinsic::ptrmask, {NewV->getType(), MaskTy}, {NewV, MaskOp}); } - case Intrinsic::amdgcn_flat_atomic_fmax: - case Intrinsic::amdgcn_flat_atomic_fmin: case Intrinsic::amdgcn_flat_atomic_fmax_num: case Intrinsic::amdgcn_flat_atomic_fmin_num: { Type *DestTy = II->getType(); diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 6b5e47902c5a53..a9ab0c5a453e8e 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -1604,15 +1604,11 @@ let OtherPredicates = [isGFX12Plus] in { let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in { defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>; -defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>; -defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>; } let SubtargetPredicate = HasAtomicFMinFMaxF32FlatInsts in { defm : FlatAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>; defm : FlatAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>; -defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin", f32>; -defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax", f32>; } let OtherPredicates = [isGFX12Only] in { @@ -1642,13 +1638,6 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_globa let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in { defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>; -defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>; -defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>; -} - -let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in { -defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>; -defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>; } let OtherPredicates = [HasFlatBufferGlobalAtomicFaddF64Inst] in { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 6172687f4b4abf..bbdc006b9afcf0 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1367,13 +1367,9 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, MachineMemOperand::MODereferenceable; return true; } - case Intrinsic::amdgcn_global_atomic_fmin: - case Intrinsic::amdgcn_global_atomic_fmax: case Intrinsic::amdgcn_global_atomic_fmin_num: case Intrinsic::amdgcn_global_atomic_fmax_num: case Intrinsic::amdgcn_global_atomic_ordered_add_b64: - case Intrinsic::amdgcn_flat_atomic_fmin: - case Intrinsic::amdgcn_flat_atomic_fmax: case Intrinsic::amdgcn_flat_atomic_fmin_num: case Intrinsic::amdgcn_flat_atomic_fmax_num: case Intrinsic::amdgcn_atomic_cond_sub_u32: { @@ -1485,14 +1481,10 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II, case Intrinsic::amdgcn_ds_consume: case Intrinsic::amdgcn_ds_ordered_add: case Intrinsic::amdgcn_ds_ordered_swap: - case Intrinsic::amdgcn_flat_atomic_fmax: case Intrinsic::amdgcn_flat_atomic_fmax_num: - case Intrinsic::amdgcn_flat_atomic_fmin: case Intrinsic::amdgcn_flat_atomic_fmin_num: case Intrinsic::amdgcn_global_atomic_csub: - case Intrinsic::amdgcn_global_atomic_fmax: case Intrinsic::amdgcn_global_atomic_fmax_num: - case Intrinsic::amdgcn_global_atomic_fmin: case Intrinsic::amdgcn_global_atomic_fmin_num: case Intrinsic::amdgcn_global_atomic_ordered_add_b64: case Intrinsic::amdgcn_global_load_tr_b64: @@ -9397,12 +9389,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, DAG.setNodeMemRefs(NewNode, {MemRef}); return SDValue(NewNode, 0); } - case Intrinsic::amdgcn_global_atomic_fmin: - case Intrinsic::amdgcn_global_atomic_fmax: case Intrinsic::amdgcn_global_atomic_fmin_num: case Intrinsic::amdgcn_global_atomic_fmax_num: - case Intrinsic::amdgcn_flat_atomic_fmin: - case Intrinsic::amdgcn_flat_atomic_fmax: case Intrinsic::amdgcn_flat_atomic_fmin_num: case Intrinsic::amdgcn_flat_atomic_fmax_num: { MemSDNode *M = cast(Op); @@ -9413,16 +9401,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, }; unsigned Opcode = 0; switch (IntrID) { - case Intrinsic::amdgcn_global_atomic_fmin: case Intrinsic::amdgcn_global_atomic_fmin_num: - case Intrinsic::amdgcn_flat_atomic_fmin: case Intrinsic::amdgcn_flat_atomic_fmin_num: { Opcode = ISD::ATOMIC_LOAD_FMIN; break; } - case Intrinsic::amdgcn_global_atomic_fmax: case Intrinsic::amdgcn_global_atomic_fmax_num: - case Intrinsic::amdgcn_flat_atomic_fmax: case Intrinsic::amdgcn_flat_atomic_fmax_num: { Opcode = ISD::ATOMIC_LOAD_FMAX; break; diff --git a/llvm/test/Bitcode/amdgcn-atomic.ll b/llvm/test/Bitcode/amdgcn-atomic.ll index 87ca1e3a617ed9..3e28cd050fc880 100644 --- a/llvm/test/Bitcode/amdgcn-atomic.ll +++ b/llvm/test/Bitcode/amdgcn-atomic.ll @@ -354,6 +354,70 @@ define float @upgrade_amdgcn_global_atomic_fadd_f32_p1_f32(ptr addrspace(1) %ptr ret float %result } +declare float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr nocapture, float) #0 + +define float @upgrade_amdgcn_flat_atomic_fmin_f32_p0_f32(ptr %ptr, float %data) { + ; CHECK: %{{.+}} = atomicrmw fmin ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + %result = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %ptr, float %data) + ret float %result +} + +declare float @llvm.amdgcn.global.atomic.fmin.f32.p1.f32(ptr addrspace(1) nocapture, float) #0 + +define float @upgrade_amdgcn_global_atomic_fmin_f32_p1_f32(ptr addrspace(1) %ptr, float %data) { + ; CHECK: %{{.+}} = atomicrmw fmin ptr addrspace(1) %ptr, float %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + %result = call float @llvm.amdgcn.global.atomic.fmin.f32.p1.f32(ptr addrspace(1) %ptr, float %data) + ret float %result +} + +declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr nocapture, double) #0 + +define double @upgrade_amdgcn_flat_atomic_fmin_f64_p0_f64(ptr %ptr, double %data) { + ; CHECK: %{{.+}} = atomicrmw fmin ptr %ptr, double %data syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + %result = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data) + ret double %result +} + +declare double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) nocapture, double) #0 + +define double @upgrade_amdgcn_global_atomic_fmin_f64_p1_f64(ptr addrspace(1) %ptr, double %data) { + ; CHECK: %{{.+}} = atomicrmw fmin ptr addrspace(1) %ptr, double %data syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + %result = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data) + ret double %result +} + +declare float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr nocapture, float) #0 + +define float @upgrade_amdgcn_flat_atomic_fmax_f32_p0_f32(ptr %ptr, float %data) { + ; CHECK: %{{.+}} = atomicrmw fmax ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + %result = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %ptr, float %data) + ret float %result +} + +declare float @llvm.amdgcn.global.atomic.fmax.f32.p1.f32(ptr addrspace(1) nocapture, float) #0 + +define float @upgrade_amdgcn_global_atomic_fmax_f32_p1_f32(ptr addrspace(1) %ptr, float %data) { + ; CHECK: %{{.+}} = atomicrmw fmax ptr addrspace(1) %ptr, float %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + %result = call float @llvm.amdgcn.global.atomic.fmax.f32.p1.f32(ptr addrspace(1) %ptr, float %data) + ret float %result +} + +declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr nocapture, double) #0 + +define double @upgrade_amdgcn_flat_atomic_fmax_f64_p0_f64(ptr %ptr, double %data) { + ; CHECK: %{{.+}} = atomicrmw fmax ptr %ptr, double %data syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + %result = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data) + ret double %result +} + +declare double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) nocapture, double) #0 + +define double @upgrade_amdgcn_global_atomic_fmax_f64_p1_f64(ptr addrspace(1) %ptr, double %data) { + ; CHECK: %{{.+}} = atomicrmw fmax ptr addrspace(1) %ptr, double %data syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + %result = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data) + ret double %result +} + attributes #0 = { argmemonly nounwind willreturn } ; CHECK: !0 = !{i32 5, i32 6} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll index eb39ca2d7daa7f..92ce2af47e22ad 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll @@ -14,10 +14,6 @@ declare double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double, <4 x i32>, i32 declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg) declare double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32 immarg) declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32 immarg) -declare double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data) -declare double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data) -declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data) -declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data) define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) { ; GFX90A-LABEL: raw_buffer_atomic_add_noret_f64: @@ -1015,52 +1011,6 @@ main_body: ret void } -define amdgpu_kernel void @global_atomic_fmin_f64_noret(ptr addrspace(1) %ptr, double %data) { -; GFX90A-LABEL: global_atomic_fmin_f64_noret: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-NEXT: global_atomic_min_f64 v2, v[0:1], s[4:5] -; GFX90A-NEXT: s_endpgm -; -; GFX940-LABEL: global_atomic_fmin_f64_noret: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: global_atomic_min_f64 v2, v[0:1], s[4:5] -; GFX940-NEXT: s_endpgm -main_body: - %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data) - ret void -} - -define amdgpu_kernel void @global_atomic_fmax_f64_noret(ptr addrspace(1) %ptr, double %data) { -; GFX90A-LABEL: global_atomic_fmax_f64_noret: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-NEXT: global_atomic_max_f64 v2, v[0:1], s[4:5] -; GFX90A-NEXT: s_endpgm -; -; GFX940-LABEL: global_atomic_fmax_f64_noret: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: global_atomic_max_f64 v2, v[0:1], s[4:5] -; GFX940-NEXT: s_endpgm -main_body: - %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data) - ret void -} - define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %ptr) #1 { ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat: ; GFX90A: ; %bb.0: ; %main_body @@ -1070,7 +1020,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt ; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0 ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX90A-NEXT: s_cbranch_execz .LBB38_2 +; GFX90A-NEXT: s_cbranch_execz .LBB36_2 ; GFX90A-NEXT: ; %bb.1: ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24 ; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1] @@ -1083,7 +1033,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_invl2 ; GFX90A-NEXT: buffer_wbinvl1_vol -; GFX90A-NEXT: .LBB38_2: +; GFX90A-NEXT: .LBB36_2: ; GFX90A-NEXT: s_endpgm ; ; GFX940-LABEL: global_atomic_fadd_f64_noret_pat: @@ -1094,7 +1044,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt ; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0 ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX940-NEXT: s_cbranch_execz .LBB38_2 +; GFX940-NEXT: s_cbranch_execz .LBB36_2 ; GFX940-NEXT: ; %bb.1: ; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24 ; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] @@ -1106,7 +1056,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5] sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: .LBB38_2: +; GFX940-NEXT: .LBB36_2: ; GFX940-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 @@ -1122,7 +1072,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace( ; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0 ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX90A-NEXT: s_cbranch_execz .LBB39_2 +; GFX90A-NEXT: s_cbranch_execz .LBB37_2 ; GFX90A-NEXT: ; %bb.1: ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24 ; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1] @@ -1133,7 +1083,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace( ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol -; GFX90A-NEXT: .LBB39_2: +; GFX90A-NEXT: .LBB37_2: ; GFX90A-NEXT: s_endpgm ; ; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_agent: @@ -1144,7 +1094,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace( ; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0 ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX940-NEXT: s_cbranch_execz .LBB39_2 +; GFX940-NEXT: s_cbranch_execz .LBB37_2 ; GFX940-NEXT: ; %bb.1: ; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24 ; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] @@ -1156,7 +1106,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace( ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: .LBB39_2: +; GFX940-NEXT: .LBB37_2: ; GFX940-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 @@ -1172,7 +1122,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace ; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0 ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX90A-NEXT: s_cbranch_execz .LBB40_2 +; GFX90A-NEXT: s_cbranch_execz .LBB38_2 ; GFX90A-NEXT: ; %bb.1: ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24 ; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1] @@ -1185,7 +1135,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_invl2 ; GFX90A-NEXT: buffer_wbinvl1_vol -; GFX90A-NEXT: .LBB40_2: +; GFX90A-NEXT: .LBB38_2: ; GFX90A-NEXT: s_endpgm ; ; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_system: @@ -1196,7 +1146,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace ; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0 ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX940-NEXT: s_cbranch_execz .LBB40_2 +; GFX940-NEXT: s_cbranch_execz .LBB38_2 ; GFX940-NEXT: ; %bb.1: ; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24 ; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] @@ -1208,7 +1158,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5] sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: .LBB40_2: +; GFX940-NEXT: .LBB38_2: ; GFX940-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0 @@ -1224,7 +1174,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace( ; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0 ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX90A-NEXT: s_cbranch_execz .LBB41_2 +; GFX90A-NEXT: s_cbranch_execz .LBB39_2 ; GFX90A-NEXT: ; %bb.1: ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24 ; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1] @@ -1235,7 +1185,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace( ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol -; GFX90A-NEXT: .LBB41_2: +; GFX90A-NEXT: .LBB39_2: ; GFX90A-NEXT: s_endpgm ; ; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_flush: @@ -1246,7 +1196,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace( ; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0 ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX940-NEXT: s_cbranch_execz .LBB41_2 +; GFX940-NEXT: s_cbranch_execz .LBB39_2 ; GFX940-NEXT: ; %bb.1: ; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24 ; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] @@ -1258,7 +1208,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace( ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: .LBB41_2: +; GFX940-NEXT: .LBB39_2: ; GFX940-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 @@ -1344,44 +1294,6 @@ main_body: ret double %ret } -define double @global_atomic_fmax_f64_rtn(ptr addrspace(1) %ptr, double %data) { -; GFX90A-LABEL: global_atomic_fmax_f64_rtn: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off glc -; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: global_atomic_fmax_f64_rtn: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] -main_body: - %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data) - ret double %ret -} - -define double @global_atomic_fmin_f64_rtn(ptr addrspace(1) %ptr, double %data) { -; GFX90A-LABEL: global_atomic_fmin_f64_rtn: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off glc -; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: global_atomic_fmin_f64_rtn: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] -main_body: - %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data) - ret double %ret -} - define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrspace(1) %ptr) { ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe: ; GFX90A: ; %bb.0: ; %main_body @@ -1391,7 +1303,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs ; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0 ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX90A-NEXT: s_cbranch_execz .LBB47_2 +; GFX90A-NEXT: s_cbranch_execz .LBB43_2 ; GFX90A-NEXT: ; %bb.1: ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24 ; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1] @@ -1402,7 +1314,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol -; GFX90A-NEXT: .LBB47_2: +; GFX90A-NEXT: .LBB43_2: ; GFX90A-NEXT: s_endpgm ; ; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe: @@ -1413,7 +1325,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs ; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0 ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX940-NEXT: s_cbranch_execz .LBB47_2 +; GFX940-NEXT: s_cbranch_execz .LBB43_2 ; GFX940-NEXT: ; %bb.1: ; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24 ; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] @@ -1425,7 +1337,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: .LBB47_2: +; GFX940-NEXT: .LBB43_2: ; GFX940-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 @@ -1633,90 +1545,6 @@ main_body: ret void } -define amdgpu_kernel void @flat_atomic_fmin_f64_noret(ptr %ptr, double %data) { -; GFX90A-LABEL: flat_atomic_fmin_f64_noret: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] -; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] -; GFX90A-NEXT: s_endpgm -; -; GFX940-LABEL: flat_atomic_fmin_f64_noret: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_endpgm -main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data) - ret void -} - -define double @flat_atomic_fmin_f64_rtn(ptr %ptr, double %data) { -; GFX90A-LABEL: flat_atomic_fmin_f64_rtn: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX90A-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: flat_atomic_fmin_f64_rtn: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] -main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data) - ret double %ret -} - -define amdgpu_kernel void @flat_atomic_fmax_f64_noret(ptr %ptr, double %data) { -; GFX90A-LABEL: flat_atomic_fmax_f64_noret: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] -; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] -; GFX90A-NEXT: s_endpgm -; -; GFX940-LABEL: flat_atomic_fmax_f64_noret: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_endpgm -main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data) - ret void -} - -define double @flat_atomic_fmax_f64_rtn(ptr %ptr, double %data) { -; GFX90A-LABEL: flat_atomic_fmax_f64_rtn: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] glc -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX90A-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: flat_atomic_fmax_f64_rtn: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] -main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data) - ret double %ret -} - define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr) #1 { ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat: ; GFX90A: ; %bb.0: ; %main_body @@ -1726,7 +1554,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr ; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0 ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX90A-NEXT: s_cbranch_execz .LBB59_2 +; GFX90A-NEXT: s_cbranch_execz .LBB51_2 ; GFX90A-NEXT: ; %bb.1: ; GFX90A-NEXT: s_load_dword s2, s[2:3], 0x24 ; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1] @@ -1736,7 +1564,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr ; GFX90A-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NEXT: ds_add_f64 v2, v[0:1] ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: .LBB59_2: +; GFX90A-NEXT: .LBB51_2: ; GFX90A-NEXT: s_endpgm ; ; GFX940-LABEL: local_atomic_fadd_f64_noret_pat: @@ -1747,7 +1575,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr ; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0 ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX940-NEXT: s_cbranch_execz .LBB59_2 +; GFX940-NEXT: s_cbranch_execz .LBB51_2 ; GFX940-NEXT: ; %bb.1: ; GFX940-NEXT: s_load_dword s2, s[2:3], 0x24 ; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] @@ -1757,7 +1585,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr ; GFX940-NEXT: v_mov_b32_e32 v2, s2 ; GFX940-NEXT: ds_add_f64 v2, v[0:1] ; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: .LBB59_2: +; GFX940-NEXT: .LBB51_2: ; GFX940-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 @@ -1773,7 +1601,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3 ; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0 ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX90A-NEXT: s_cbranch_execz .LBB60_2 +; GFX90A-NEXT: s_cbranch_execz .LBB52_2 ; GFX90A-NEXT: ; %bb.1: ; GFX90A-NEXT: s_load_dword s2, s[2:3], 0x24 ; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1] @@ -1783,7 +1611,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3 ; GFX90A-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NEXT: ds_add_f64 v2, v[0:1] ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: .LBB60_2: +; GFX90A-NEXT: .LBB52_2: ; GFX90A-NEXT: s_endpgm ; ; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush: @@ -1794,7 +1622,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3 ; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0 ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX940-NEXT: s_cbranch_execz .LBB60_2 +; GFX940-NEXT: s_cbranch_execz .LBB52_2 ; GFX940-NEXT: ; %bb.1: ; GFX940-NEXT: s_load_dword s2, s[2:3], 0x24 ; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] @@ -1804,7 +1632,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3 ; GFX940-NEXT: v_mov_b32_e32 v2, s2 ; GFX940-NEXT: ds_add_f64 v2, v[0:1] ; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: .LBB60_2: +; GFX940-NEXT: .LBB52_2: ; GFX940-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 @@ -1820,7 +1648,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp ; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0 ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX90A-NEXT: s_cbranch_execz .LBB61_2 +; GFX90A-NEXT: s_cbranch_execz .LBB53_2 ; GFX90A-NEXT: ; %bb.1: ; GFX90A-NEXT: s_load_dword s2, s[2:3], 0x24 ; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1] @@ -1830,7 +1658,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp ; GFX90A-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NEXT: ds_add_f64 v2, v[0:1] ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: .LBB61_2: +; GFX90A-NEXT: .LBB53_2: ; GFX90A-NEXT: s_endpgm ; ; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe: @@ -1841,7 +1669,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp ; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0 ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX940-NEXT: s_cbranch_execz .LBB61_2 +; GFX940-NEXT: s_cbranch_execz .LBB53_2 ; GFX940-NEXT: ; %bb.1: ; GFX940-NEXT: s_load_dword s2, s[2:3], 0x24 ; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] @@ -1851,7 +1679,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp ; GFX940-NEXT: v_mov_b32_e32 v2, s2 ; GFX940-NEXT: ds_add_f64 v2, v[0:1] ; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: .LBB61_2: +; GFX940-NEXT: .LBB53_2: ; GFX940-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 diff --git a/llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll b/llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll index de14d64dbf7e9d..017a1f047bb5f3 100644 --- a/llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll +++ b/llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll @@ -508,7 +508,8 @@ define protected amdgpu_kernel void @fmin(ptr addrspace(1) %p, ptr addrspace(1) ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 ; CHECK-NEXT: global_store_dword v[0:1], v2, off ; CHECK-NEXT: s_endpgm - %f64 = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %p, double 1.0) + + %f64 = atomicrmw fmin ptr addrspace(1) %p, double 1.0 syncscope("agent") monotonic, !amdgpu.no.fine.grained.memory !0 %n32 = fptoui double %f64 to i32 %n64 = zext i32 %n32 to i64 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 @@ -533,7 +534,7 @@ define protected amdgpu_kernel void @fmax(ptr addrspace(1) %p, ptr addrspace(1) ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 ; CHECK-NEXT: global_store_dword v[0:1], v2, off ; CHECK-NEXT: s_endpgm - %f64 = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %p, double 1.0) + %f64 = atomicrmw fmax ptr addrspace(1) %p, double 1.0 syncscope("agent") monotonic, !amdgpu.no.fine.grained.memory !0 %n32 = fptoui double %f64 to i32 %n64 = zext i32 %n32 to i64 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 @@ -905,8 +906,6 @@ define protected amdgpu_kernel void @buffer.ptr.atomic.fmax(ptr addrspace(8) %rs ret void } -declare double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1), double) -declare double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1), double) declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32, ptr addrspace(8), i32, i32, i32) declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32) declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub.i32(i32, ptr addrspace(8), i32, i32, i32) @@ -923,3 +922,5 @@ declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspa declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32) declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double, ptr addrspace(8), i32, i32, i32) declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32) + +!0 = !{} diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics-f64.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics-f64.ll deleted file mode 100644 index 8633a3965259bf..00000000000000 --- a/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics-f64.ll +++ /dev/null @@ -1,51 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX10,GFX10-SDAG -; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL - -declare double @llvm.amdgcn.flat.atomic.fmin.f64.p1.f64(ptr %ptr, double %data) -declare double @llvm.amdgcn.flat.atomic.fmax.f64.p1.f64(ptr %ptr, double %data) - -define amdgpu_cs void @flat_atomic_fmin_f64_noret(ptr %ptr, double %data) { -; GFX10-LABEL: flat_atomic_fmin_f64_noret: -; GFX10: ; %bb.0: -; GFX10-NEXT: flat_atomic_fmin_x2 v[0:1], v[2:3] -; GFX10-NEXT: s_endpgm - %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p1.f64(ptr %ptr, double %data) - ret void -} - -define amdgpu_cs void @flat_atomic_fmax_f64_noret(ptr %ptr, double %data) { -; GFX10-LABEL: flat_atomic_fmax_f64_noret: -; GFX10: ; %bb.0: -; GFX10-NEXT: flat_atomic_fmax_x2 v[0:1], v[2:3] -; GFX10-NEXT: s_endpgm - %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p1.f64(ptr %ptr, double %data) - ret void -} - -define amdgpu_cs void @flat_atomic_fmin_f64_rtn(ptr %ptr, double %data, ptr %out) { -; GFX10-LABEL: flat_atomic_fmin_f64_rtn: -; GFX10: ; %bb.0: -; GFX10-NEXT: flat_atomic_fmin_x2 v[0:1], v[0:1], v[2:3] glc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: flat_store_dwordx2 v[4:5], v[0:1] -; GFX10-NEXT: s_endpgm - %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p1.f64(ptr %ptr, double %data) - store double %ret, ptr %out - ret void -} - -define amdgpu_cs void @flat_atomic_fmax_f64_rtn(ptr %ptr, double %data, ptr %out) { -; GFX10-LABEL: flat_atomic_fmax_f64_rtn: -; GFX10: ; %bb.0: -; GFX10-NEXT: flat_atomic_fmax_x2 v[0:1], v[0:1], v[2:3] glc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: flat_store_dwordx2 v[4:5], v[0:1] -; GFX10-NEXT: s_endpgm - %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p1.f64(ptr %ptr, double %data) - store double %ret, ptr %out - ret void -} -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX10-GISEL: {{.*}} -; GFX10-SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll deleted file mode 100644 index 1d2e3fc636f44a..00000000000000 --- a/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll +++ /dev/null @@ -1,83 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX10,GFX10-SDAG -; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX11,GFX11-SDAG -; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL -; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX11,GFX11-GISEL - -declare float @llvm.amdgcn.flat.atomic.fmin.f32.p1.f32(ptr %ptr, float %data) -declare float @llvm.amdgcn.flat.atomic.fmax.f32.p1.f32(ptr %ptr, float %data) - -define amdgpu_cs void @flat_atomic_fmin_f32_noret(ptr %ptr, float %data) { -; GFX10-LABEL: flat_atomic_fmin_f32_noret: -; GFX10: ; %bb.0: -; GFX10-NEXT: flat_atomic_fmin v[0:1], v2 -; GFX10-NEXT: s_endpgm -; -; GFX11-LABEL: flat_atomic_fmin_f32_noret: -; GFX11: ; %bb.0: -; GFX11-NEXT: flat_atomic_min_f32 v[0:1], v2 -; GFX11-NEXT: s_endpgm - %ret = call float @llvm.amdgcn.flat.atomic.fmin.f32.p1.f32(ptr %ptr, float %data) - ret void -} - -define amdgpu_cs void @flat_atomic_fmax_f32_noret(ptr %ptr, float %data) { -; GFX10-LABEL: flat_atomic_fmax_f32_noret: -; GFX10: ; %bb.0: -; GFX10-NEXT: flat_atomic_fmax v[0:1], v2 -; GFX10-NEXT: s_endpgm -; -; GFX11-LABEL: flat_atomic_fmax_f32_noret: -; GFX11: ; %bb.0: -; GFX11-NEXT: flat_atomic_max_f32 v[0:1], v2 -; GFX11-NEXT: s_endpgm - %ret = call float @llvm.amdgcn.flat.atomic.fmax.f32.p1.f32(ptr %ptr, float %data) - ret void -} - -define amdgpu_cs float @flat_atomic_fmin_f32_rtn(ptr %ptr, float %data, ptr %out) { -; GFX10-LABEL: flat_atomic_fmin_f32_rtn: -; GFX10: ; %bb.0: -; GFX10-NEXT: flat_atomic_fmin v0, v[0:1], v2 glc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: flat_store_dword v[3:4], v0 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -; -; GFX11-LABEL: flat_atomic_fmin_f32_rtn: -; GFX11: ; %bb.0: -; GFX11-NEXT: flat_atomic_min_f32 v0, v[0:1], v2 glc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: flat_store_b32 v[3:4], v0 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: ; return to shader part epilog - %ret = call float @llvm.amdgcn.flat.atomic.fmin.f32.p1.f32(ptr %ptr, float %data) - store float %ret, ptr %out - ret float %ret -} - -define amdgpu_cs float @flat_atomic_fmax_f32_rtn(ptr %ptr, float %data, ptr %out) { -; GFX10-LABEL: flat_atomic_fmax_f32_rtn: -; GFX10: ; %bb.0: -; GFX10-NEXT: flat_atomic_fmax v0, v[0:1], v2 glc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: flat_store_dword v[3:4], v0 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -; -; GFX11-LABEL: flat_atomic_fmax_f32_rtn: -; GFX11: ; %bb.0: -; GFX11-NEXT: flat_atomic_max_f32 v0, v[0:1], v2 glc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: flat_store_b32 v[3:4], v0 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: ; return to shader part epilog - %ret = call float @llvm.amdgcn.flat.atomic.fmax.f32.p1.f32(ptr %ptr, float %data) - store float %ret, ptr %out - ret float %ret -} -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX10-GISEL: {{.*}} -; GFX10-SDAG: {{.*}} -; GFX11-GISEL: {{.*}} -; GFX11-SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics-f64.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics-f64.ll deleted file mode 100644 index bb06ee3165e3ac..00000000000000 --- a/llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics-f64.ll +++ /dev/null @@ -1,51 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX10,GFX10-SDAG -; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL - -declare double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data) -declare double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data) - -define amdgpu_cs void @global_atomic_fmin_f64_noret(ptr addrspace(1) %ptr, double %data) { -; GFX10-LABEL: global_atomic_fmin_f64_noret: -; GFX10: ; %bb.0: -; GFX10-NEXT: global_atomic_fmin_x2 v[0:1], v[2:3], off -; GFX10-NEXT: s_endpgm - %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data) - ret void -} - -define amdgpu_cs void @global_atomic_fmax_f64_noret(ptr addrspace(1) %ptr, double %data) { -; GFX10-LABEL: global_atomic_fmax_f64_noret: -; GFX10: ; %bb.0: -; GFX10-NEXT: global_atomic_fmax_x2 v[0:1], v[2:3], off -; GFX10-NEXT: s_endpgm - %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data) - ret void -} - -define amdgpu_cs void @global_atomic_fmin_f64_rtn(ptr addrspace(1) %ptr, double %data, ptr addrspace(1) %out) { -; GFX10-LABEL: global_atomic_fmin_f64_rtn: -; GFX10: ; %bb.0: -; GFX10-NEXT: global_atomic_fmin_x2 v[0:1], v[0:1], v[2:3], off glc -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: global_store_dwordx2 v[4:5], v[0:1], off -; GFX10-NEXT: s_endpgm - %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data) - store double %ret, ptr addrspace(1) %out - ret void -} - -define amdgpu_cs void @global_atomic_fmax_f64_rtn(ptr addrspace(1) %ptr, double %data, ptr addrspace(1) %out) { -; GFX10-LABEL: global_atomic_fmax_f64_rtn: -; GFX10: ; %bb.0: -; GFX10-NEXT: global_atomic_fmax_x2 v[0:1], v[0:1], v[2:3], off glc -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: global_store_dwordx2 v[4:5], v[0:1], off -; GFX10-NEXT: s_endpgm - %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data) - store double %ret, ptr addrspace(1) %out - ret void -} -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX10-GISEL: {{.*}} -; GFX10-SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics.ll deleted file mode 100644 index 699bb8b41b69d4..00000000000000 --- a/llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics.ll +++ /dev/null @@ -1,87 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX10,GFX10-SDAG -; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX11,GFX11-SDAG -; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL -; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX11,GFX11-GISEL - -declare float @llvm.amdgcn.global.atomic.fmin.f32.p1.f32(ptr addrspace(1) %ptr, float %data) -declare float @llvm.amdgcn.global.atomic.fmax.f32.p1.f32(ptr addrspace(1) %ptr, float %data) - -define amdgpu_cs void @global_atomic_fmin_f32_noret(ptr addrspace(1) %ptr, float %data) { -; GFX10-LABEL: global_atomic_fmin_f32_noret: -; GFX10: ; %bb.0: -; GFX10-NEXT: global_atomic_fmin v[0:1], v2, off -; GFX10-NEXT: s_endpgm -; -; GFX11-LABEL: global_atomic_fmin_f32_noret: -; GFX11: ; %bb.0: -; GFX11-NEXT: global_atomic_min_f32 v[0:1], v2, off -; GFX11-NEXT: s_nop 0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX11-NEXT: s_endpgm - %ret = call float @llvm.amdgcn.global.atomic.fmin.f32.p1.f32(ptr addrspace(1) %ptr, float %data) - ret void -} - -define amdgpu_cs void @global_atomic_fmax_f32_noret(ptr addrspace(1) %ptr, float %data) { -; GFX10-LABEL: global_atomic_fmax_f32_noret: -; GFX10: ; %bb.0: -; GFX10-NEXT: global_atomic_fmax v[0:1], v2, off -; GFX10-NEXT: s_endpgm -; -; GFX11-LABEL: global_atomic_fmax_f32_noret: -; GFX11: ; %bb.0: -; GFX11-NEXT: global_atomic_max_f32 v[0:1], v2, off -; GFX11-NEXT: s_nop 0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX11-NEXT: s_endpgm - %ret = call float @llvm.amdgcn.global.atomic.fmax.f32.p1.f32(ptr addrspace(1) %ptr, float %data) - ret void -} - -define amdgpu_cs void @global_atomic_fmax_f32_rtn(ptr addrspace(1) %ptr, float %data, ptr addrspace(1) %out) { -; GFX10-LABEL: global_atomic_fmax_f32_rtn: -; GFX10: ; %bb.0: -; GFX10-NEXT: global_atomic_fmax v0, v[0:1], v2, off glc -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: global_store_dword v[3:4], v0, off -; GFX10-NEXT: s_endpgm -; -; GFX11-LABEL: global_atomic_fmax_f32_rtn: -; GFX11: ; %bb.0: -; GFX11-NEXT: global_atomic_max_f32 v0, v[0:1], v2, off glc -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: global_store_b32 v[3:4], v0, off -; GFX11-NEXT: s_nop 0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX11-NEXT: s_endpgm - %ret = call float @llvm.amdgcn.global.atomic.fmax.f32.p1.f32(ptr addrspace(1) %ptr, float %data) - store float %ret, ptr addrspace(1) %out - ret void -} - -define amdgpu_cs void @global_atomic_fmin_f32_rtn(ptr addrspace(1) %ptr, float %data, ptr addrspace(1) %out) { -; GFX10-LABEL: global_atomic_fmin_f32_rtn: -; GFX10: ; %bb.0: -; GFX10-NEXT: global_atomic_fmin v0, v[0:1], v2, off glc -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: global_store_dword v[3:4], v0, off -; GFX10-NEXT: s_endpgm -; -; GFX11-LABEL: global_atomic_fmin_f32_rtn: -; GFX11: ; %bb.0: -; GFX11-NEXT: global_atomic_min_f32 v0, v[0:1], v2, off glc -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: global_store_b32 v[3:4], v0, off -; GFX11-NEXT: s_nop 0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX11-NEXT: s_endpgm - %ret = call float @llvm.amdgcn.global.atomic.fmin.f32.p1.f32(ptr addrspace(1) %ptr, float %data) - store float %ret, ptr addrspace(1) %out - ret void -} -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX10-GISEL: {{.*}} -; GFX10-SDAG: {{.*}} -; GFX11-GISEL: {{.*}} -; GFX11-SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll index 957c10ddf85e5d..e45b5cb30ab894 100644 --- a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll @@ -14,10 +14,6 @@ declare double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double, <4 x i32>, i32 declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg) declare double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32 immarg) declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32 immarg) -declare double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data) -declare double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data) -declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data) -declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data) declare double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) nocapture, double, i32, i32, i1) define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) { @@ -1016,56 +1012,6 @@ main_body: ret void } -define amdgpu_kernel void @global_atomic_fmin_f64_noret(ptr addrspace(1) %ptr, double %data) { -; GFX90A-LABEL: global_atomic_fmin_f64_noret: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_mov_b32_e32 v0, s6 -; GFX90A-NEXT: v_mov_b32_e32 v1, s7 -; GFX90A-NEXT: global_atomic_min_f64 v2, v[0:1], s[4:5] -; GFX90A-NEXT: s_endpgm -; -; GFX940-LABEL: global_atomic_fmin_f64_noret: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, s6 -; GFX940-NEXT: v_mov_b32_e32 v1, s7 -; GFX940-NEXT: global_atomic_min_f64 v2, v[0:1], s[4:5] -; GFX940-NEXT: s_endpgm -main_body: - %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data) - ret void -} - -define amdgpu_kernel void @global_atomic_fmax_f64_noret(ptr addrspace(1) %ptr, double %data) { -; GFX90A-LABEL: global_atomic_fmax_f64_noret: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_mov_b32_e32 v0, s6 -; GFX90A-NEXT: v_mov_b32_e32 v1, s7 -; GFX90A-NEXT: global_atomic_max_f64 v2, v[0:1], s[4:5] -; GFX90A-NEXT: s_endpgm -; -; GFX940-LABEL: global_atomic_fmax_f64_noret: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, s6 -; GFX940-NEXT: v_mov_b32_e32 v1, s7 -; GFX940-NEXT: global_atomic_max_f64 v2, v[0:1], s[4:5] -; GFX940-NEXT: s_endpgm -main_body: - %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data) - ret void -} - define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %ptr) #1 { ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat: ; GFX90A: ; %bb.0: ; %main_body @@ -1265,44 +1211,6 @@ main_body: ret double %ret } -define double @global_atomic_fmax_f64_rtn(ptr addrspace(1) %ptr, double %data) { -; GFX90A-LABEL: global_atomic_fmax_f64_rtn: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off glc -; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: global_atomic_fmax_f64_rtn: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] -main_body: - %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data) - ret double %ret -} - -define double @global_atomic_fmin_f64_rtn(ptr addrspace(1) %ptr, double %data) { -; GFX90A-LABEL: global_atomic_fmin_f64_rtn: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off glc -; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: global_atomic_fmin_f64_rtn: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] -main_body: - %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data) - ret double %ret -} - define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrspace(1) %ptr) { ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe: ; GFX90A: ; %bb.0: ; %main_body @@ -1313,7 +1221,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1] -; GFX90A-NEXT: .LBB47_1: ; %atomicrmw.start +; GFX90A-NEXT: .LBB43_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc @@ -1323,7 +1231,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs ; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX90A-NEXT: s_cbranch_execnz .LBB47_1 +; GFX90A-NEXT: s_cbranch_execnz .LBB43_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm ; @@ -1524,7 +1432,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) { ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] ; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1] ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[4:5], s[4:5] op_sel:[0,1] -; GFX90A-NEXT: .LBB54_1: ; %atomicrmw.start +; GFX90A-NEXT: .LBB50_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 @@ -1535,7 +1443,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) { ; GFX90A-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX90A-NEXT: s_cbranch_execnz .LBB54_1 +; GFX90A-NEXT: s_cbranch_execnz .LBB50_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm ; @@ -1555,98 +1463,6 @@ main_body: ret void } -define amdgpu_kernel void @flat_atomic_fmin_f64_noret(ptr %ptr, double %data) { -; GFX90A-LABEL: flat_atomic_fmin_f64_noret: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_mov_b32_e32 v0, s4 -; GFX90A-NEXT: v_mov_b32_e32 v1, s5 -; GFX90A-NEXT: v_mov_b32_e32 v2, s6 -; GFX90A-NEXT: v_mov_b32_e32 v3, s7 -; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] -; GFX90A-NEXT: s_endpgm -; -; GFX940-LABEL: flat_atomic_fmin_f64_noret: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, s4 -; GFX940-NEXT: v_mov_b32_e32 v1, s5 -; GFX940-NEXT: v_mov_b32_e32 v2, s6 -; GFX940-NEXT: v_mov_b32_e32 v3, s7 -; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_endpgm -main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data) - ret void -} - -define double @flat_atomic_fmin_f64_rtn(ptr %ptr, double %data) { -; GFX90A-LABEL: flat_atomic_fmin_f64_rtn: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX90A-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: flat_atomic_fmin_f64_rtn: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] -main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data) - ret double %ret -} - -define amdgpu_kernel void @flat_atomic_fmax_f64_noret(ptr %ptr, double %data) { -; GFX90A-LABEL: flat_atomic_fmax_f64_noret: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_mov_b32_e32 v0, s4 -; GFX90A-NEXT: v_mov_b32_e32 v1, s5 -; GFX90A-NEXT: v_mov_b32_e32 v2, s6 -; GFX90A-NEXT: v_mov_b32_e32 v3, s7 -; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] -; GFX90A-NEXT: s_endpgm -; -; GFX940-LABEL: flat_atomic_fmax_f64_noret: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, s4 -; GFX940-NEXT: v_mov_b32_e32 v1, s5 -; GFX940-NEXT: v_mov_b32_e32 v2, s6 -; GFX940-NEXT: v_mov_b32_e32 v3, s7 -; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_endpgm -main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data) - ret void -} - -define double @flat_atomic_fmax_f64_rtn(ptr %ptr, double %data) { -; GFX90A-LABEL: flat_atomic_fmax_f64_rtn: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] glc -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX90A-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: flat_atomic_fmax_f64_rtn: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] -main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data) - ret double %ret -} - define amdgpu_kernel void @local_atomic_fadd_f64_noret(ptr addrspace(3) %ptr, double %data) { ; GFX90A-LABEL: local_atomic_fadd_f64_noret: ; GFX90A: ; %bb.0: ; %main_body @@ -1843,178 +1659,6 @@ main_body: ret double %ret } -define double @flat_atomic_fmin_f64_intrinsic_rtn__posoffset(ptr %ptr, double %data) #1 { -; GFX90A-LABEL: flat_atomic_fmin_f64_intrinsic_rtn__posoffset: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX90A-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: flat_atomic_fmin_f64_intrinsic_rtn__posoffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 511 - %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data) - ret double %ret -} - -define double @flat_atomic_fmin_f64_intrinsic_rtn__negoffset(ptr %ptr, double %data) #1 { -; GFX90A-LABEL: flat_atomic_fmin_f64_intrinsic_rtn__negoffset: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0 -; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX90A-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: flat_atomic_fmin_f64_intrinsic_rtn__negoffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 -511 - %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %gep, double %data) - ret double %ret -} - -define void @flat_atomic_fmin_f64_intrinsic_noret__posoffset(ptr %ptr, double %data) #1 { -; GFX90A-LABEL: flat_atomic_fmin_f64_intrinsic_noret__posoffset: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX90A-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: flat_atomic_fmin_f64_intrinsic_noret__posoffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 511 - %unused = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data) - ret void -} - -define void @flat_atomic_fmin_f64_intrinsic_noret__negoffset(ptr %ptr, double %data) #1 { -; GFX90A-LABEL: flat_atomic_fmin_f64_intrinsic_noret__negoffset: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0 -; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX90A-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: flat_atomic_fmin_f64_intrinsic_noret__negoffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 -511 - %unused = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %gep, double %data) - ret void -} - -define double @flat_atomic_fmax_f64_intrinsic_rtn__posoffset(ptr %ptr, double %data) #1 { -; GFX90A-LABEL: flat_atomic_fmax_f64_intrinsic_rtn__posoffset: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] glc -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX90A-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: flat_atomic_fmax_f64_intrinsic_rtn__posoffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 511 - %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data) - ret double %ret -} - -define double @flat_atomic_fmax_f64_intrinsic_rtn__negoffset(ptr %ptr, double %data) #1 { -; GFX90A-LABEL: flat_atomic_fmax_f64_intrinsic_rtn__negoffset: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0 -; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] glc -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX90A-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: flat_atomic_fmax_f64_intrinsic_rtn__negoffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 -511 - %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %gep, double %data) - ret double %ret -} - -define void @flat_atomic_fmax_f64_intrinsic_noret__posoffset(ptr %ptr, double %data) #1 { -; GFX90A-LABEL: flat_atomic_fmax_f64_intrinsic_noret__posoffset: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX90A-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: flat_atomic_fmax_f64_intrinsic_noret__posoffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 511 - %unused = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data) - ret void -} - -define void @flat_atomic_fmax_f64_intrinsic_noret__negoffset(ptr %ptr, double %data) #1 { -; GFX90A-LABEL: flat_atomic_fmax_f64_intrinsic_noret__negoffset: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0 -; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX90A-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: flat_atomic_fmax_f64_intrinsic_noret__negoffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 -511 - %unused = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %gep, double %data) - ret void -} - attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" } attributes #1 = { nounwind } attributes #2 = { "denormal-fp-math"="ieee,ieee" } diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat-fadd-fmin-fmax-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat-fadd-fmin-fmax-intrinsics.ll deleted file mode 100644 index 3d529a2c6ef69a..00000000000000 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat-fadd-fmin-fmax-intrinsics.ll +++ /dev/null @@ -1,224 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s - -declare float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %ptr, float %data) -declare float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %ptr, float %data) - -define amdgpu_kernel void @flat_atomic_fadd_f32_p1(ptr addrspace(1) %ptr, float %data) { -; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p1 -; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]], float [[DATA:%.*]]) { -; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p1.f32(ptr addrspace(1) [[PTR]], float [[DATA]]) -; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p1.f32(ptr addrspace(1) [[PTR]], float [[DATA]]) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(1) %ptr to ptr - %max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data) - %min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data) - ret void -} - -define amdgpu_kernel void @flat_atomic_fadd_f32_p2(ptr addrspace(2) %ptr, float %data) { -; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p2 -; CHECK-SAME: (ptr addrspace(2) [[PTR:%.*]], float [[DATA:%.*]]) { -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr -; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr [[CAST]], float [[DATA]]) -; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr [[CAST]], float [[DATA]]) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(2) %ptr to ptr - %max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data) - %min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data) - ret void -} - -define amdgpu_kernel void @flat_atomic_fadd_f32_p3(ptr addrspace(3) %ptr, float %data) { -; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p3 -; CHECK-SAME: (ptr addrspace(3) [[PTR:%.*]], float [[DATA:%.*]]) { -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr -; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr [[CAST]], float [[DATA]]) -; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr [[CAST]], float [[DATA]]) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(3) %ptr to ptr - %max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data) - %min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data) - ret void -} - -define amdgpu_kernel void @flat_atomic_fadd_f32_p4(ptr addrspace(4) %ptr, float %data) { -; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p4 -; CHECK-SAME: (ptr addrspace(4) [[PTR:%.*]], float [[DATA:%.*]]) { -; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p4.f32(ptr addrspace(4) [[PTR]], float [[DATA]]) -; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p4.f32(ptr addrspace(4) [[PTR]], float [[DATA]]) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(4) %ptr to ptr - %max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data) - %min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data) - ret void -} - -define amdgpu_kernel void @flat_atomic_fadd_f32_p5(ptr addrspace(5) %ptr, float %data) { -; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p5 -; CHECK-SAME: (ptr addrspace(5) [[PTR:%.*]], float [[DATA:%.*]]) { -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr -; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr [[CAST]], float [[DATA]]) -; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr [[CAST]], float [[DATA]]) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(5) %ptr to ptr - %max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data) - %min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data) - ret void -} - -define amdgpu_kernel void @flat_atomic_fadd_f32_p6(ptr addrspace(6) %ptr, float %data) { -; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p6 -; CHECK-SAME: (ptr addrspace(6) [[PTR:%.*]], float [[DATA:%.*]]) { -; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p6.f32(ptr addrspace(6) [[PTR]], float [[DATA]]) -; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p6.f32(ptr addrspace(6) [[PTR]], float [[DATA]]) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(6) %ptr to ptr - %max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data) - %min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data) - ret void -} - -define amdgpu_kernel void @flat_atomic_fadd_f32_p7(ptr addrspace(7) %ptr, float %data) { -; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p7 -; CHECK-SAME: (ptr addrspace(7) [[PTR:%.*]], float [[DATA:%.*]]) { -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(7) [[PTR]] to ptr -; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr [[CAST]], float [[DATA]]) -; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr [[CAST]], float [[DATA]]) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(7) %ptr to ptr - %max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data) - %min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data) - ret void -} - -define amdgpu_kernel void @flat_atomic_fadd_f32_p99(ptr addrspace(99) %ptr, float %data) { -; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p99 -; CHECK-SAME: (ptr addrspace(99) [[PTR:%.*]], float [[DATA:%.*]]) { -; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p99.f32(ptr addrspace(99) [[PTR]], float [[DATA]]) -; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p99.f32(ptr addrspace(99) [[PTR]], float [[DATA]]) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(99) %ptr to ptr - %max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data) - %min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data) - ret void -} - -declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data) -declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data) - -define amdgpu_kernel void @flat_atomic_fadd_f64_p1(ptr addrspace(1) %ptr, double %data) { -; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p1 -; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]], double [[DATA:%.*]]) { -; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p1.f64(ptr addrspace(1) [[PTR]], double [[DATA]]) -; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p1.f64(ptr addrspace(1) [[PTR]], double [[DATA]]) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(1) %ptr to ptr - %max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data) - %min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data) - ret void -} - -define amdgpu_kernel void @flat_atomic_fadd_f64_p2(ptr addrspace(2) %ptr, double %data) { -; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p2 -; CHECK-SAME: (ptr addrspace(2) [[PTR:%.*]], double [[DATA:%.*]]) { -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr -; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr [[CAST]], double [[DATA]]) -; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr [[CAST]], double [[DATA]]) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(2) %ptr to ptr - %max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data) - %min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data) - ret void -} - -define amdgpu_kernel void @flat_atomic_fadd_f64_p3(ptr addrspace(3) %ptr, double %data) { -; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p3 -; CHECK-SAME: (ptr addrspace(3) [[PTR:%.*]], double [[DATA:%.*]]) { -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr -; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr [[CAST]], double [[DATA]]) -; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr [[CAST]], double [[DATA]]) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(3) %ptr to ptr - %max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data) - %min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data) - ret void -} - -define amdgpu_kernel void @flat_atomic_fadd_f64_p4(ptr addrspace(4) %ptr, double %data) { -; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p4 -; CHECK-SAME: (ptr addrspace(4) [[PTR:%.*]], double [[DATA:%.*]]) { -; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p4.f64(ptr addrspace(4) [[PTR]], double [[DATA]]) -; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p4.f64(ptr addrspace(4) [[PTR]], double [[DATA]]) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(4) %ptr to ptr - %max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data) - %min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data) - ret void -} - -define amdgpu_kernel void @flat_atomic_fadd_f64_p5(ptr addrspace(5) %ptr, double %data) { -; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p5 -; CHECK-SAME: (ptr addrspace(5) [[PTR:%.*]], double [[DATA:%.*]]) { -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr -; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr [[CAST]], double [[DATA]]) -; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr [[CAST]], double [[DATA]]) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(5) %ptr to ptr - %max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data) - %min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data) - ret void -} - -define amdgpu_kernel void @flat_atomic_fadd_f64_p6(ptr addrspace(6) %ptr, double %data) { -; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p6 -; CHECK-SAME: (ptr addrspace(6) [[PTR:%.*]], double [[DATA:%.*]]) { -; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p6.f64(ptr addrspace(6) [[PTR]], double [[DATA]]) -; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p6.f64(ptr addrspace(6) [[PTR]], double [[DATA]]) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(6) %ptr to ptr - %max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data) - %min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data) - ret void -} - -define amdgpu_kernel void @flat_atomic_fadd_f64_p7(ptr addrspace(7) %ptr, double %data) { -; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p7 -; CHECK-SAME: (ptr addrspace(7) [[PTR:%.*]], double [[DATA:%.*]]) { -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(7) [[PTR]] to ptr -; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr [[CAST]], double [[DATA]]) -; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr [[CAST]], double [[DATA]]) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(7) %ptr to ptr - %max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data) - %min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data) - ret void -} - -define amdgpu_kernel void @flat_atomic_fadd_f64_p99(ptr addrspace(99) %ptr, double %data) { -; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p99 -; CHECK-SAME: (ptr addrspace(99) [[PTR:%.*]], double [[DATA:%.*]]) { -; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p99.f64(ptr addrspace(99) [[PTR]], double [[DATA]]) -; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p99.f64(ptr addrspace(99) [[PTR]], double [[DATA]]) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(99) %ptr to ptr - %max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data) - %min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data) - ret void -} diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat_atomic.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat_atomic.ll index 57e6fdb35113e6..a0856ac9127e65 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat_atomic.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat_atomic.ll @@ -1,9 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s -declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr nocapture, double) #0 -declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr nocapture, double) #0 - define protected amdgpu_kernel void @InferNothing(i32 %a, ptr %b, double %c) { ; CHECK-LABEL: InferNothing: ; CHECK: ; %bb.0: ; %entry @@ -66,54 +63,6 @@ entry: ret void } -define protected amdgpu_kernel void @InferFmax(i32 %a, ptr addrspace(1) %b, double %c) { -; CHECK-LABEL: InferFmax: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: s_load_dword s0, s[2:3], 0x24 -; CHECK-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x2c -; CHECK-NEXT: v_mov_b32_e32 v2, 0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_ashr_i32 s1, s0, 31 -; CHECK-NEXT: s_lshl_b64 s[0:1], s[0:1], 3 -; CHECK-NEXT: s_add_u32 s0, s4, s0 -; CHECK-NEXT: v_mov_b32_e32 v0, s6 -; CHECK-NEXT: v_mov_b32_e32 v1, s7 -; CHECK-NEXT: s_addc_u32 s1, s5, s1 -; CHECK-NEXT: global_atomic_max_f64 v2, v[0:1], s[0:1] offset:-8 -; CHECK-NEXT: s_endpgm -entry: - %i = add nsw i32 %a, -1 - %i.2 = sext i32 %i to i64 - %i.3 = getelementptr inbounds double, ptr addrspace(1) %b, i64 %i.2 - %i.4 = addrspacecast ptr addrspace(1) %i.3 to ptr - %i.5 = tail call contract double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %i.4, double %c) #1 - ret void -} - -define protected amdgpu_kernel void @InferFmin(i32 %a, ptr addrspace(1) %b, double %c) { -; CHECK-LABEL: InferFmin: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: s_load_dword s0, s[2:3], 0x24 -; CHECK-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x2c -; CHECK-NEXT: v_mov_b32_e32 v2, 0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_ashr_i32 s1, s0, 31 -; CHECK-NEXT: s_lshl_b64 s[0:1], s[0:1], 3 -; CHECK-NEXT: s_add_u32 s0, s4, s0 -; CHECK-NEXT: v_mov_b32_e32 v0, s6 -; CHECK-NEXT: v_mov_b32_e32 v1, s7 -; CHECK-NEXT: s_addc_u32 s1, s5, s1 -; CHECK-NEXT: global_atomic_min_f64 v2, v[0:1], s[0:1] offset:-8 -; CHECK-NEXT: s_endpgm -entry: - %i = add nsw i32 %a, -1 - %i.2 = sext i32 %i to i64 - %i.3 = getelementptr inbounds double, ptr addrspace(1) %b, i64 %i.2 - %i.4 = addrspacecast ptr addrspace(1) %i.3 to ptr - %i.5 = tail call contract double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %i.4, double %c) #1 - ret void -} - define protected amdgpu_kernel void @InferMixed(i32 %a, ptr addrspace(1) %b, double %c, ptr %d) { ; CHECK-LABEL: InferMixed: ; CHECK: ; %bb.0: ; %entry @@ -131,7 +80,7 @@ define protected amdgpu_kernel void @InferMixed(i32 %a, ptr addrspace(1) %b, dou ; CHECK-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; CHECK-NEXT: s_and_saveexec_b64 s[8:9], vcc -; CHECK-NEXT: s_cbranch_execz .LBB4_2 +; CHECK-NEXT: s_cbranch_execz .LBB2_2 ; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: s_load_dword s2, s[2:3], 0x24 ; CHECK-NEXT: v_mov_b32_e32 v2, 0 @@ -146,7 +95,7 @@ define protected amdgpu_kernel void @InferMixed(i32 %a, ptr addrspace(1) %b, dou ; CHECK-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3] offset:-7 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: buffer_wbinvl1_vol -; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: s_endpgm entry: %i = add nsw i32 %a, -1 @@ -180,17 +129,17 @@ define protected amdgpu_kernel void @InferPHI(i32 %a, ptr addrspace(1) %b, doubl ; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v0 -; CHECK-NEXT: .LBB5_1: ; %bb0 +; CHECK-NEXT: .LBB3_1: ; %bb0 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1] -; CHECK-NEXT: s_cbranch_vccnz .LBB5_1 +; CHECK-NEXT: s_cbranch_vccnz .LBB3_1 ; CHECK-NEXT: ; %bb.2: ; %bb1 ; CHECK-NEXT: s_mov_b64 s[0:1], exec ; CHECK-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; CHECK-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CHECK-NEXT: s_cbranch_execz .LBB5_4 +; CHECK-NEXT: s_cbranch_execz .LBB3_4 ; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: s_bcnt1_i32_b64 s0, s[0:1] ; CHECK-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 @@ -199,7 +148,7 @@ define protected amdgpu_kernel void @InferPHI(i32 %a, ptr addrspace(1) %b, doubl ; CHECK-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3] ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: buffer_wbinvl1_vol -; CHECK-NEXT: .LBB5_4: +; CHECK-NEXT: .LBB3_4: ; CHECK-NEXT: s_endpgm entry: %i = add nsw i32 %a, -1 From 3dba4ca155e0b460ca82917b25d3624eb5825940 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 9 Oct 2024 14:22:24 +1100 Subject: [PATCH 13/55] [ORC][MachO] Remove the ExecutionSession& argument to MachOPlatform constructor. We can get a reference to the ExecutionSession from the ObjectLinkingLayer argument, so there's no need to pass it in separately. --- llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h | 3 +-- llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h index 565f5aef0f4924..19f935d6658234 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h @@ -292,8 +292,7 @@ class MachOPlatform : public Platform { static MachOExecutorSymbolFlags flagsForSymbol(jitlink::Symbol &Sym); - MachOPlatform(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer, - JITDylib &PlatformJD, + MachOPlatform(ObjectLinkingLayer &ObjLinkingLayer, JITDylib &PlatformJD, std::unique_ptr OrcRuntimeGenerator, HeaderOptions PlatformJDOpts, MachOHeaderMUBuilder BuildMachOHeaderMU, Error &Err); diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp index f728323d460eb9..e5609053c74d7b 100644 --- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp @@ -329,7 +329,7 @@ MachOPlatform::Create(ObjectLinkingLayer &ObjLinkingLayer, JITDylib &PlatformJD, // Create the instance. Error Err = Error::success(); auto P = std::unique_ptr(new MachOPlatform( - ES, ObjLinkingLayer, PlatformJD, std::move(OrcRuntime), + ObjLinkingLayer, PlatformJD, std::move(OrcRuntime), std::move(PlatformJDOpts), std::move(BuildMachOHeaderMU), Err)); if (Err) return std::move(Err); @@ -473,12 +473,12 @@ MachOPlatform::flagsForSymbol(jitlink::Symbol &Sym) { } MachOPlatform::MachOPlatform( - ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer, - JITDylib &PlatformJD, + ObjectLinkingLayer &ObjLinkingLayer, JITDylib &PlatformJD, std::unique_ptr OrcRuntimeGenerator, HeaderOptions PlatformJDOpts, MachOHeaderMUBuilder BuildMachOHeaderMU, Error &Err) - : ES(ES), PlatformJD(PlatformJD), ObjLinkingLayer(ObjLinkingLayer), + : ES(ObjLinkingLayer.getExecutionSession()), PlatformJD(PlatformJD), + ObjLinkingLayer(ObjLinkingLayer), BuildMachOHeaderMU(std::move(BuildMachOHeaderMU)) { ErrorAsOutParameter _(&Err); ObjLinkingLayer.addPlugin(std::make_unique(*this)); From 55dd29c61d1bd5509504ef0c6014b3879567eb17 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 8 Oct 2024 23:02:04 -0700 Subject: [PATCH 14/55] [llvm-profdata] Avoid repeated hash lookups (NFC) (#111629) --- llvm/tools/llvm-profdata/llvm-profdata.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 8a36726769f221..17933e4500ead6 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -1220,11 +1220,9 @@ adjustInstrProfile(std::unique_ptr &WC, } } - if (!StaticFuncMap.contains(NewName)) { - StaticFuncMap[NewName] = Name; - } else { - StaticFuncMap[NewName] = DuplicateNameStr; - } + auto [It, Inserted] = StaticFuncMap.try_emplace(NewName, Name); + if (!Inserted) + It->second = DuplicateNameStr; }; // We need to flatten the SampleFDO profile as the InstrFDO From b26aac5a440d03791a367a1ee19d0341b68a28bc Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 8 Oct 2024 22:44:11 -0700 Subject: [PATCH 15/55] [sanitizer] Report -> VReport for ThreadLister failure --- .../sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp index 73ba884052a3a3..d9f803a276dadc 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp @@ -217,7 +217,7 @@ bool ThreadSuspender::SuspendAllThreads() { switch (thread_lister.ListThreads(&threads)) { case ThreadLister::Error: ResumeAllThreads(); - Report("Failed to list threads\n"); + VReport(1, "Failed to list threads\n"); return false; case ThreadLister::Incomplete: retry = true; From a06591b4d4fb270b587fc5ef67b5a03dad752b40 Mon Sep 17 00:00:00 2001 From: Hristo Hristov Date: Wed, 9 Oct 2024 09:19:14 +0300 Subject: [PATCH 16/55] [libc++][type_traits] P2674R1: A trait for implicit lifetime types (#106870) Implements P2674R1: https://wg21.link/P2674R1 - https://eel.is/c++draft/type.traits - https://eel.is/c++draft/meta.type.synop - https://eel.is/c++draft/meta.unary.prop - https://eel.is/c++draft/support.limits - https://eel.is/c++draft/version.syn Implementation details: - Uses compiler intrinsic `__builtin_is_implicit_lifetime`: - https://github.com/llvm/llvm-project/pull/101807 - Tests based on: - https://github.com/llvm/llvm-project/blob/d213981c80626698a07b11ce872acba098a863d4/clang/test/SemaCXX/type-traits.cpp#L1989 References: - Implicit-lifetime - Implicit-lifetime types [basic.types.general]/9: https://eel.is/c++draft/basic.types.general - Implicit-lifetime class [class.prop]/9: https://eel.is/c++draft/class.prop - P0593R6 Implicit creation of objects for low-level object manipulation: https://wg21.link/P0593R6 - P1010R1 Container support for implicit lifetime types: https://wg21.link/P1010R1 - P0593R6 Implicit creation of objects for low-level object manipulation: https://wg21.link/P0593R6 Closes: #105259 --------- Co-authored-by: Hristo Hristov --- libcxx/docs/FeatureTestMacroTable.rst | 2 + libcxx/docs/ReleaseNotes/20.rst | 1 + libcxx/docs/Status/Cxx23Papers.csv | 2 +- libcxx/include/CMakeLists.txt | 1 + .../__type_traits/is_implicit_lifetime.h | 35 +++ libcxx/include/module.modulemap | 4 + libcxx/include/type_traits | 8 + libcxx/include/version | 4 + libcxx/modules/std/type_traits.inc | 8 +- .../type_traits.version.compile.pass.cpp | 43 ++++ .../version.version.compile.pass.cpp | 43 ++++ .../is_implicit_lifetime.pass.cpp | 237 ++++++++++++++++++ .../is_implicit_lifetime.verify.cpp | 26 ++ .../generate_feature_test_macro_components.py | 7 + 14 files changed, 418 insertions(+), 3 deletions(-) create mode 100644 libcxx/include/__type_traits/is_implicit_lifetime.h create mode 100644 libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_implicit_lifetime.pass.cpp create mode 100644 libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_implicit_lifetime.verify.cpp diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index c909a4300db1a6..05b08da5215350 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -336,6 +336,8 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_ios_noreplace`` ``202207L`` ---------------------------------------------------------- ----------------- + ``__cpp_lib_is_implicit_lifetime`` ``202302L`` + ---------------------------------------------------------- ----------------- ``__cpp_lib_is_scoped_enum`` ``202011L`` ---------------------------------------------------------- ----------------- ``__cpp_lib_mdspan`` ``202207L`` diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst index 82c8286b69e23c..dcb1102d81d641 100644 --- a/libcxx/docs/ReleaseNotes/20.rst +++ b/libcxx/docs/ReleaseNotes/20.rst @@ -42,6 +42,7 @@ Implemented Papers - P2609R3: Relaxing Ranges Just A Smidge (`Github `__) - P2985R0: A type trait for detecting virtual base classes (`Github `__) - ``std::jthread`` and ```` are not guarded behind ``-fexperimental-library`` anymore +- P2674R1: A trait for implicit lifetime types (`Github `__) Improvements and New Features ----------------------------- diff --git a/libcxx/docs/Status/Cxx23Papers.csv b/libcxx/docs/Status/Cxx23Papers.csv index e1d7bb8f37863e..da7b5881877135 100644 --- a/libcxx/docs/Status/Cxx23Papers.csv +++ b/libcxx/docs/Status/Cxx23Papers.csv @@ -113,7 +113,7 @@ "`P2572R1 `__","``std::format`` fill character allowances","2023-02 (Issaquah)","|Complete|","17.0","" "`P2693R1 `__","Formatting ``thread::id`` and ``stacktrace``","2023-02 (Issaquah)","|Partial|","","The formatter for ``stacktrace`` is not implemented, since ``stacktrace`` is not implemented yet" "`P2679R2 `__","Fixing ``std::start_lifetime_as`` for arrays","2023-02 (Issaquah)","","","" -"`P2674R1 `__","A trait for implicit lifetime types","2023-02 (Issaquah)","","","" +"`P2674R1 `__","A trait for implicit lifetime types","2023-02 (Issaquah)","|Complete|","20.0","" "`P2655R3 `__","``common_reference_t`` of ``reference_wrapper`` Should Be a Reference Type","2023-02 (Issaquah)","","","" "`P2652R2 `__","Disallow User Specialization of ``allocator_traits``","2023-02 (Issaquah)","|Complete|","19.0","" "`P2787R1 `__","``pmr::generator`` - Promise Types are not Values","2023-02 (Issaquah)","","","" diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 9bd1b41b8bfac4..c2a597f49e317f 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -787,6 +787,7 @@ set(files __type_traits/is_floating_point.h __type_traits/is_function.h __type_traits/is_fundamental.h + __type_traits/is_implicit_lifetime.h __type_traits/is_implicitly_default_constructible.h __type_traits/is_integral.h __type_traits/is_literal_type.h diff --git a/libcxx/include/__type_traits/is_implicit_lifetime.h b/libcxx/include/__type_traits/is_implicit_lifetime.h new file mode 100644 index 00000000000000..2aba420bd2b59d --- /dev/null +++ b/libcxx/include/__type_traits/is_implicit_lifetime.h @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_IMPLICIT_LIFETIME_H +#define _LIBCPP___TYPE_TRAITS_IS_IMPLICIT_LIFETIME_H + +#include <__config> +#include <__type_traits/integral_constant.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 23 +# if __has_builtin(__builtin_is_implicit_lifetime) + +template +struct _LIBCPP_TEMPLATE_VIS is_implicit_lifetime : public bool_constant<__builtin_is_implicit_lifetime(_Tp)> {}; + +template +inline constexpr bool is_implicit_lifetime_v = __builtin_is_implicit_lifetime(_Tp); + +# endif +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_IMPLICIT_LIFETIME_H diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index dee9b0b88b7948..22a1313498e73e 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -200,6 +200,10 @@ module std_core [system] { header "__type_traits/is_fundamental.h" export std_core.type_traits.integral_constant } + module is_implicit_lifetime { + header "__type_traits/is_implicit_lifetime.h" + export std_core.type_traits.integral_constant + } module is_implicitly_default_constructible { header "__type_traits/is_implicitly_default_constructible.h" export std_core.type_traits.integral_constant diff --git a/libcxx/include/type_traits b/libcxx/include/type_traits index 26c85f2284e2fd..baeed35ca8508b 100644 --- a/libcxx/include/type_traits +++ b/libcxx/include/type_traits @@ -137,6 +137,8 @@ namespace std template struct is_nothrow_swappable; // C++17 template struct is_nothrow_destructible; + template struct is_implicit_lifetime; // Since C++23 + template struct has_virtual_destructor; template struct has_unique_object_representations; // C++17 @@ -374,6 +376,8 @@ namespace std = is_nothrow_swappable::value; // C++17 template inline constexpr bool is_nothrow_destructible_v = is_nothrow_destructible::value; // C++17 + template + constexpr bool is_implicit_lifetime_v = is_implicit_lifetime::value; // Since C++23 template inline constexpr bool has_virtual_destructor_v = has_virtual_destructor::value; // C++17 template inline constexpr bool has_unique_object_representations_v // C++17 @@ -516,6 +520,10 @@ namespace std # include <__type_traits/unwrap_ref.h> #endif +#if _LIBCPP_STD_VER >= 23 +# include <__type_traits/is_implicit_lifetime.h> +#endif + #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/version b/libcxx/include/version index 5d679caac0b3b7..88387e311636c2 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -138,6 +138,7 @@ __cpp_lib_ios_noreplace 202207L __cpp_lib_is_aggregate 201703L __cpp_lib_is_constant_evaluated 201811L __cpp_lib_is_final 201402L +__cpp_lib_is_implicit_lifetime 202302L __cpp_lib_is_invocable 201703L __cpp_lib_is_layout_compatible 201907L __cpp_lib_is_nothrow_convertible 201806L @@ -473,6 +474,9 @@ __cpp_lib_void_t 201411L # define __cpp_lib_forward_like 202207L # define __cpp_lib_invoke_r 202106L # define __cpp_lib_ios_noreplace 202207L +# if __has_builtin(__builtin_is_implicit_lifetime) +# define __cpp_lib_is_implicit_lifetime 202302L +# endif # define __cpp_lib_is_scoped_enum 202011L # define __cpp_lib_mdspan 202207L # define __cpp_lib_modules 202207L diff --git a/libcxx/modules/std/type_traits.inc b/libcxx/modules/std/type_traits.inc index 485a5ddf63aed0..f544f95c7aaaae 100644 --- a/libcxx/modules/std/type_traits.inc +++ b/libcxx/modules/std/type_traits.inc @@ -98,7 +98,9 @@ export namespace std { using std::is_nothrow_destructible; - // using std::is_implicit_lifetime; +#if _LIBCPP_STD_VER >= 23 && __has_builtin(__builtin_is_implicit_lifetime) + using std::is_implicit_lifetime; +#endif using std::has_virtual_destructor; @@ -246,7 +248,9 @@ export namespace std { using std::is_destructible_v; using std::is_empty_v; using std::is_final_v; - // using std::is_implicit_lifetime_v; +#if _LIBCPP_STD_VER >= 23 && __has_builtin(__builtin_is_implicit_lifetime) + using std::is_implicit_lifetime_v; +#endif using std::is_move_assignable_v; using std::is_move_constructible_v; using std::is_nothrow_assignable_v; diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.compile.pass.cpp index 1cbf2699a95bcc..d9d698ace2b653 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.compile.pass.cpp @@ -23,6 +23,7 @@ __cpp_lib_is_aggregate 201703L [C++17] __cpp_lib_is_constant_evaluated 201811L [C++20] __cpp_lib_is_final 201402L [C++14] + __cpp_lib_is_implicit_lifetime 202302L [C++23] __cpp_lib_is_invocable 201703L [C++17] __cpp_lib_is_layout_compatible 201907L [C++20] __cpp_lib_is_nothrow_convertible 201806L [C++20] @@ -75,6 +76,10 @@ # error "__cpp_lib_is_final should not be defined before c++14" # endif +# ifdef __cpp_lib_is_implicit_lifetime +# error "__cpp_lib_is_implicit_lifetime should not be defined before c++23" +# endif + # ifdef __cpp_lib_is_invocable # error "__cpp_lib_is_invocable should not be defined before c++17" # endif @@ -179,6 +184,10 @@ # error "__cpp_lib_is_final should have the value 201402L in c++14" # endif +# ifdef __cpp_lib_is_implicit_lifetime +# error "__cpp_lib_is_implicit_lifetime should not be defined before c++23" +# endif + # ifdef __cpp_lib_is_invocable # error "__cpp_lib_is_invocable should not be defined before c++17" # endif @@ -301,6 +310,10 @@ # error "__cpp_lib_is_final should have the value 201402L in c++17" # endif +# ifdef __cpp_lib_is_implicit_lifetime +# error "__cpp_lib_is_implicit_lifetime should not be defined before c++23" +# endif + # ifndef __cpp_lib_is_invocable # error "__cpp_lib_is_invocable should be defined in c++17" # endif @@ -444,6 +457,10 @@ # error "__cpp_lib_is_final should have the value 201402L in c++20" # endif +# ifdef __cpp_lib_is_implicit_lifetime +# error "__cpp_lib_is_implicit_lifetime should not be defined before c++23" +# endif + # ifndef __cpp_lib_is_invocable # error "__cpp_lib_is_invocable should be defined in c++20" # endif @@ -614,6 +631,19 @@ # error "__cpp_lib_is_final should have the value 201402L in c++23" # endif +# if __has_builtin(__builtin_is_implicit_lifetime) +# ifndef __cpp_lib_is_implicit_lifetime +# error "__cpp_lib_is_implicit_lifetime should be defined in c++23" +# endif +# if __cpp_lib_is_implicit_lifetime != 202302L +# error "__cpp_lib_is_implicit_lifetime should have the value 202302L in c++23" +# endif +# else +# ifdef __cpp_lib_is_implicit_lifetime +# error "__cpp_lib_is_implicit_lifetime should not be defined when the requirement '__has_builtin(__builtin_is_implicit_lifetime)' is not met!" +# endif +# endif + # ifndef __cpp_lib_is_invocable # error "__cpp_lib_is_invocable should be defined in c++23" # endif @@ -796,6 +826,19 @@ # error "__cpp_lib_is_final should have the value 201402L in c++26" # endif +# if __has_builtin(__builtin_is_implicit_lifetime) +# ifndef __cpp_lib_is_implicit_lifetime +# error "__cpp_lib_is_implicit_lifetime should be defined in c++26" +# endif +# if __cpp_lib_is_implicit_lifetime != 202302L +# error "__cpp_lib_is_implicit_lifetime should have the value 202302L in c++26" +# endif +# else +# ifdef __cpp_lib_is_implicit_lifetime +# error "__cpp_lib_is_implicit_lifetime should not be defined when the requirement '__has_builtin(__builtin_is_implicit_lifetime)' is not met!" +# endif +# endif + # ifndef __cpp_lib_is_invocable # error "__cpp_lib_is_invocable should be defined in c++26" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp index 985ffeffab96db..0614f64a2ef04d 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp @@ -125,6 +125,7 @@ __cpp_lib_is_aggregate 201703L [C++17] __cpp_lib_is_constant_evaluated 201811L [C++20] __cpp_lib_is_final 201402L [C++14] + __cpp_lib_is_implicit_lifetime 202302L [C++23] __cpp_lib_is_invocable 201703L [C++17] __cpp_lib_is_layout_compatible 201907L [C++20] __cpp_lib_is_nothrow_convertible 201806L [C++20] @@ -671,6 +672,10 @@ # error "__cpp_lib_is_final should not be defined before c++14" # endif +# ifdef __cpp_lib_is_implicit_lifetime +# error "__cpp_lib_is_implicit_lifetime should not be defined before c++23" +# endif + # ifdef __cpp_lib_is_invocable # error "__cpp_lib_is_invocable should not be defined before c++17" # endif @@ -1550,6 +1555,10 @@ # error "__cpp_lib_is_final should have the value 201402L in c++14" # endif +# ifdef __cpp_lib_is_implicit_lifetime +# error "__cpp_lib_is_implicit_lifetime should not be defined before c++23" +# endif + # ifdef __cpp_lib_is_invocable # error "__cpp_lib_is_invocable should not be defined before c++17" # endif @@ -2564,6 +2573,10 @@ # error "__cpp_lib_is_final should have the value 201402L in c++17" # endif +# ifdef __cpp_lib_is_implicit_lifetime +# error "__cpp_lib_is_implicit_lifetime should not be defined before c++23" +# endif + # ifndef __cpp_lib_is_invocable # error "__cpp_lib_is_invocable should be defined in c++17" # endif @@ -3842,6 +3855,10 @@ # error "__cpp_lib_is_final should have the value 201402L in c++20" # endif +# ifdef __cpp_lib_is_implicit_lifetime +# error "__cpp_lib_is_implicit_lifetime should not be defined before c++23" +# endif + # ifndef __cpp_lib_is_invocable # error "__cpp_lib_is_invocable should be defined in c++20" # endif @@ -5306,6 +5323,19 @@ # error "__cpp_lib_is_final should have the value 201402L in c++23" # endif +# if __has_builtin(__builtin_is_implicit_lifetime) +# ifndef __cpp_lib_is_implicit_lifetime +# error "__cpp_lib_is_implicit_lifetime should be defined in c++23" +# endif +# if __cpp_lib_is_implicit_lifetime != 202302L +# error "__cpp_lib_is_implicit_lifetime should have the value 202302L in c++23" +# endif +# else +# ifdef __cpp_lib_is_implicit_lifetime +# error "__cpp_lib_is_implicit_lifetime should not be defined when the requirement '__has_builtin(__builtin_is_implicit_lifetime)' is not met!" +# endif +# endif + # ifndef __cpp_lib_is_invocable # error "__cpp_lib_is_invocable should be defined in c++23" # endif @@ -7112,6 +7142,19 @@ # error "__cpp_lib_is_final should have the value 201402L in c++26" # endif +# if __has_builtin(__builtin_is_implicit_lifetime) +# ifndef __cpp_lib_is_implicit_lifetime +# error "__cpp_lib_is_implicit_lifetime should be defined in c++26" +# endif +# if __cpp_lib_is_implicit_lifetime != 202302L +# error "__cpp_lib_is_implicit_lifetime should have the value 202302L in c++26" +# endif +# else +# ifdef __cpp_lib_is_implicit_lifetime +# error "__cpp_lib_is_implicit_lifetime should not be defined when the requirement '__has_builtin(__builtin_is_implicit_lifetime)' is not met!" +# endif +# endif + # ifndef __cpp_lib_is_invocable # error "__cpp_lib_is_invocable should be defined in c++26" # endif diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_implicit_lifetime.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_implicit_lifetime.pass.cpp new file mode 100644 index 00000000000000..a6ab77158aae1d --- /dev/null +++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_implicit_lifetime.pass.cpp @@ -0,0 +1,237 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// These compilers don't support __builtin_is_implicit_lifetime yet. +// UNSUPPORTED: clang-17, clang-18, clang-19, gcc-14, apple-clang-15, apple-clang-16 + +// + +// template struct is_implicit_lifetime; + +#include +#include +#include +#include +#include + +#include "test_macros.h" +#include "type_algorithms.h" + +enum Enum { EV }; +enum SignedEnum : signed int {}; +enum UnsignedEnum : unsigned int {}; + +enum class EnumClass { EV }; +enum class SignedEnumClass : signed int {}; +enum class UnsignedEnumClass : unsigned int {}; + +struct EmptyStruct {}; +struct IncompleteStruct; + +struct NoEligibleTrivialContructor { + NoEligibleTrivialContructor() {}; + NoEligibleTrivialContructor(const NoEligibleTrivialContructor&) {} + NoEligibleTrivialContructor(NoEligibleTrivialContructor&&) {} +}; + +struct OnlyDefaultConstructorIsTrivial { + OnlyDefaultConstructorIsTrivial() = default; + OnlyDefaultConstructorIsTrivial(const OnlyDefaultConstructorIsTrivial&) {} + OnlyDefaultConstructorIsTrivial(OnlyDefaultConstructorIsTrivial&&) {} +}; + +struct AllContstructorsAreTrivial { + AllContstructorsAreTrivial() = default; + AllContstructorsAreTrivial(const AllContstructorsAreTrivial&) = default; + AllContstructorsAreTrivial(AllContstructorsAreTrivial&&) = default; +}; + +struct InheritedNoEligibleTrivialConstructor : NoEligibleTrivialContructor { + using NoEligibleTrivialContructor::NoEligibleTrivialContructor; +}; + +struct InheritedOnlyDefaultConstructorIsTrivial : OnlyDefaultConstructorIsTrivial { + using OnlyDefaultConstructorIsTrivial::OnlyDefaultConstructorIsTrivial; +}; + +struct InheritedAllContstructorsAreTrivial : AllContstructorsAreTrivial { + using AllContstructorsAreTrivial::AllContstructorsAreTrivial; +}; + +struct UserDeclaredDestructor { + ~UserDeclaredDestructor() = default; +}; + +struct UserProvidedDestructor { + ~UserProvidedDestructor() {} +}; + +struct UserDeletedDestructorInAggregate { + ~UserDeletedDestructorInAggregate() = delete; +}; + +struct UserDeletedDestructorInNonAggregate { + virtual void NonAggregate(); + ~UserDeletedDestructorInNonAggregate() = delete; +}; + +struct DeletedDestructorViaBaseInAggregate : UserDeletedDestructorInAggregate {}; +struct DeletedDestructorViaBaseInNonAggregate : UserDeletedDestructorInNonAggregate {}; + +template +struct ConstrainedUserDeclaredDefaultConstructor { + ConstrainedUserDeclaredDefaultConstructor() + requires B + = default; + ConstrainedUserDeclaredDefaultConstructor(const ConstrainedUserDeclaredDefaultConstructor&) {} +}; + +template +struct ConstrainedUserProvidedDestructor { + ~ConstrainedUserProvidedDestructor() = default; + ~ConstrainedUserProvidedDestructor() + requires B + {} +}; + +struct StructWithFlexibleArrayMember { + int arr[]; +}; + +struct StructWithZeroSizedArray { + int arr[0]; +}; + +// Test implicit-lifetime type +template +constexpr void test_is_implicit_lifetime() { + assert(std::is_implicit_lifetime::value == Expected); + assert(std::is_implicit_lifetime_v == Expected); +} + +// Test pointer, reference, array, etc. types +template +constexpr void test_is_implicit_lifetime() { + test_is_implicit_lifetime(); + + // cv-qualified + test_is_implicit_lifetime(); + test_is_implicit_lifetime(); + + test_is_implicit_lifetime(); + test_is_implicit_lifetime(); + + // Pointer types + test_is_implicit_lifetime(); + + // Arrays + test_is_implicit_lifetime(); + test_is_implicit_lifetime(); +} + +struct AritmeticTypesTest { + template + constexpr void operator()() { + test_is_implicit_lifetime(); + } +}; + +constexpr bool test() { + // Standard fundamental C++ types + + test_is_implicit_lifetime(); + + test_is_implicit_lifetime(); + test_is_implicit_lifetime(); + test_is_implicit_lifetime(); + + types::for_each(types::arithmetic_types(), AritmeticTypesTest{}); + + test_is_implicit_lifetime(); + test_is_implicit_lifetime(); + test_is_implicit_lifetime(); + + test_is_implicit_lifetime(); + test_is_implicit_lifetime(); + test_is_implicit_lifetime(); + + test_is_implicit_lifetime(); + test_is_implicit_lifetime(); + test_is_implicit_lifetime(); + test_is_implicit_lifetime(); + test_is_implicit_lifetime(); + + // Implicit-lifetime class types + + test_is_implicit_lifetime(); + test_is_implicit_lifetime(); // Pointer-to-member + test_is_implicit_lifetime(); + test_is_implicit_lifetime(); + test_is_implicit_lifetime(); + test_is_implicit_lifetime(); + + test_is_implicit_lifetime(); + test_is_implicit_lifetime(); + + test_is_implicit_lifetime(); + + test_is_implicit_lifetime(); + + test_is_implicit_lifetime(); + + test_is_implicit_lifetime(); + + test_is_implicit_lifetime(); + + test_is_implicit_lifetime(); + + test_is_implicit_lifetime(); + + test_is_implicit_lifetime(); + + test_is_implicit_lifetime(); + + test_is_implicit_lifetime(); + + test_is_implicit_lifetime(); + + test_is_implicit_lifetime(); + + test_is_implicit_lifetime, true>(); + test_is_implicit_lifetime, false>(); + + test_is_implicit_lifetime, false>(); + test_is_implicit_lifetime, true>(); + + test_is_implicit_lifetime(); + + test_is_implicit_lifetime(); + + // C++ standard library types + + test_is_implicit_lifetime>(); + test_is_implicit_lifetime>(); + + // Standard C23 types + +#ifdef TEST_COMPILER_CLANG + test_is_implicit_lifetime<_BitInt(8)>(); + test_is_implicit_lifetime<_BitInt(128)>(); +#endif + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_implicit_lifetime.verify.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_implicit_lifetime.verify.cpp new file mode 100644 index 00000000000000..25bba30da612e6 --- /dev/null +++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_implicit_lifetime.verify.cpp @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// These compilers don't support __builtin_is_implicit_lifetime yet. +// UNSUPPORTED: clang-17, clang-18, clang-19, gcc-14, apple-clang-15, apple-clang-16 + +// + +// template struct is_implicit_lifetime; + +#include + +struct IncompleteStruct; + +// expected-error@*:* {{incomplete type 'IncompleteStruct' used in type trait expression}} +static_assert(!std::is_implicit_lifetime::value); + +// expected-error@*:* {{incomplete type 'IncompleteStruct' used in type trait expression}} +static_assert(!std::is_implicit_lifetime_v); diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index 3b8a52362ede68..db14c1781dc35a 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -742,6 +742,13 @@ def add_version_header(tc): "values": {"c++14": 201402}, "headers": ["type_traits"], }, + { + "name": "__cpp_lib_is_implicit_lifetime", + "values": {"c++23": 202302}, + "headers": ["type_traits"], + "test_suite_guard": "__has_builtin(__builtin_is_implicit_lifetime)", + "libcxx_guard": "__has_builtin(__builtin_is_implicit_lifetime)", + }, { "name": "__cpp_lib_is_invocable", "values": {"c++17": 201703}, From 3c1d9b8ec7474f076ddd842d2b6c562728e9b90a Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 9 Oct 2024 06:19:45 +0000 Subject: [PATCH 17/55] [gn build] Port a06591b4d4fb --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index d850a7f20952d9..5f2e0dd90da072 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -859,6 +859,7 @@ if (current_toolchain == default_toolchain) { "__type_traits/is_floating_point.h", "__type_traits/is_function.h", "__type_traits/is_fundamental.h", + "__type_traits/is_implicit_lifetime.h", "__type_traits/is_implicitly_default_constructible.h", "__type_traits/is_integral.h", "__type_traits/is_literal_type.h", From fb2960aad93f6c02e0ea8de0568c0aef8896eee8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 9 Oct 2024 09:30:32 +0300 Subject: [PATCH 18/55] [compiler-rt] [profile] Add missing (void) to prototypes, for C sources (#110642) If built as part of the main llvm build, via ENABLE_LLVM_PROJECTS=compiler-rt, the code gets built with more warning options than if built standalone. Some of these trigger warnings like: warning: a function declaration without a prototype is deprecated in all versions of C [-Wstrict-prototypes] --- compiler-rt/lib/profile/InstrProfiling.h | 16 ++++++++-------- compiler-rt/lib/profile/InstrProfilingInternal.h | 6 +++--- compiler-rt/lib/profile/InstrProfilingPort.h | 2 +- compiler-rt/lib/profile/InstrProfilingUtil.h | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h index 6906d52eacaf1b..9e43fd7c4789d8 100644 --- a/compiler-rt/lib/profile/InstrProfiling.h +++ b/compiler-rt/lib/profile/InstrProfiling.h @@ -114,11 +114,11 @@ char *__llvm_profile_begin_counters(void); char *__llvm_profile_end_counters(void); char *__llvm_profile_begin_bitmap(void); char *__llvm_profile_end_bitmap(void); -ValueProfNode *__llvm_profile_begin_vnodes(); -ValueProfNode *__llvm_profile_end_vnodes(); -const VTableProfData *__llvm_profile_begin_vtables(); -const VTableProfData *__llvm_profile_end_vtables(); -uint32_t *__llvm_profile_begin_orderfile(); +ValueProfNode *__llvm_profile_begin_vnodes(void); +ValueProfNode *__llvm_profile_end_vnodes(void); +const VTableProfData *__llvm_profile_begin_vtables(void); +const VTableProfData *__llvm_profile_end_vtables(void); +uint32_t *__llvm_profile_begin_orderfile(void); /*! * \brief Merge profile data from buffer. @@ -216,7 +216,7 @@ void __llvm_profile_initialize(void); * merge mode is turned on for instrumented programs with shared libs). * Side-effect: this API call will invoke malloc with dynamic memory allocation. */ -const char *__llvm_profile_get_path_prefix(); +const char *__llvm_profile_get_path_prefix(void); /*! * \brief Return filename (including path) of the profile data. Note that if the @@ -229,7 +229,7 @@ const char *__llvm_profile_get_path_prefix(); * instrumented image/DSO). This API only retrieves the filename from the copy * of the runtime available to the calling image. */ -const char *__llvm_profile_get_filename(); +const char *__llvm_profile_get_filename(void); /*! \brief Get the magic token for the file format. */ uint64_t __llvm_profile_get_magic(void); @@ -293,7 +293,7 @@ int __llvm_profile_get_padding_sizes_for_counters( * certain processes in case the processes don't have permission to write to * the disks, and trying to do so would result in side effects such as crashes. */ -void __llvm_profile_set_dumped(); +void __llvm_profile_set_dumped(void); /*! * This variable is defined in InstrProfilingRuntime.cpp as a hidden diff --git a/compiler-rt/lib/profile/InstrProfilingInternal.h b/compiler-rt/lib/profile/InstrProfilingInternal.h index d5bd0e41fb1291..b100343ca04f9e 100644 --- a/compiler-rt/lib/profile/InstrProfilingInternal.h +++ b/compiler-rt/lib/profile/InstrProfilingInternal.h @@ -167,20 +167,20 @@ int lprofWriteDataImpl(ProfDataWriter *Writer, void lprofMergeValueProfData(struct ValueProfData *SrcValueProfData, __llvm_profile_data *DstData); -VPDataReaderType *lprofGetVPDataReader(); +VPDataReaderType *lprofGetVPDataReader(void); /* Internal interface used by test to reset the max number of * tracked values per value site to be \p MaxVals. */ void lprofSetMaxValsPerSite(uint32_t MaxVals); -void lprofSetupValueProfiler(); +void lprofSetupValueProfiler(void); /* Return the profile header 'signature' value associated with the current * executable or shared library. The signature value can be used to for * a profile name that is unique to this load module so that it does not * collide with profiles from other binaries. It also allows shared libraries * to dump merged profile data into its own profile file. */ -uint64_t lprofGetLoadModuleSignature(); +uint64_t lprofGetLoadModuleSignature(void); /* * Return non zero value if the profile data has already been diff --git a/compiler-rt/lib/profile/InstrProfilingPort.h b/compiler-rt/lib/profile/InstrProfilingPort.h index ed0905cc5f2022..f77699ee8d59cf 100644 --- a/compiler-rt/lib/profile/InstrProfilingPort.h +++ b/compiler-rt/lib/profile/InstrProfilingPort.h @@ -111,7 +111,7 @@ #if defined(_WIN32) #include -static inline size_t getpagesize() { +static inline size_t getpagesize(void) { SYSTEM_INFO S; GetNativeSystemInfo(&S); return S.dwPageSize; diff --git a/compiler-rt/lib/profile/InstrProfilingUtil.h b/compiler-rt/lib/profile/InstrProfilingUtil.h index 841204b6ea8a38..227c2aa0a7caea 100644 --- a/compiler-rt/lib/profile/InstrProfilingUtil.h +++ b/compiler-rt/lib/profile/InstrProfilingUtil.h @@ -69,10 +69,10 @@ void *lprofPtrFetchAdd(void **Mem, long ByteIncr); /* Temporarily suspend SIGKILL. Return value of 1 means a restore is needed. * Other return values mean no restore is needed. */ -int lprofSuspendSigKill(); +int lprofSuspendSigKill(void); /* Restore previously suspended SIGKILL. */ -void lprofRestoreSigKill(); +void lprofRestoreSigKill(void); static inline size_t lprofRoundUpTo(size_t x, size_t boundary) { return (x + boundary - 1) & ~(boundary - 1); From 3be691651a2143f23bcf8f2704e55b01bbaa2550 Mon Sep 17 00:00:00 2001 From: Thomas Fransham Date: Wed, 9 Oct 2024 07:41:28 +0100 Subject: [PATCH 19/55] Add symbol visibility macros to abi-breaking.h.cmake (#110898) Annotating these symbols will fix missing symbols errors for Bugpoint when when the default symbol visibility is set to hidden for LLVM. This is part of the work to enable LLVM_BUILD_LLVM_DYLIB and plugins on window. Co-authored-by: Tom Stellard --- llvm/include/llvm/Config/abi-breaking.h.cmake | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/Config/abi-breaking.h.cmake b/llvm/include/llvm/Config/abi-breaking.h.cmake index 2d27e02b1d5457..81495f0569752c 100644 --- a/llvm/include/llvm/Config/abi-breaking.h.cmake +++ b/llvm/include/llvm/Config/abi-breaking.h.cmake @@ -12,6 +12,8 @@ #ifndef LLVM_ABI_BREAKING_CHECKS_H #define LLVM_ABI_BREAKING_CHECKS_H +#include "llvm/Support/Compiler.h" + /* Define to enable checks that alter the LLVM C++ ABI */ #cmakedefine01 LLVM_ENABLE_ABI_BREAKING_CHECKS @@ -43,12 +45,12 @@ #endif namespace llvm { #if LLVM_ENABLE_ABI_BREAKING_CHECKS -extern int EnableABIBreakingChecks; +LLVM_ABI extern int EnableABIBreakingChecks; LLVM_HIDDEN_VISIBILITY __attribute__((weak)) int *VerifyEnableABIBreakingChecks = &EnableABIBreakingChecks; #else -extern int DisableABIBreakingChecks; +LLVM_ABI extern int DisableABIBreakingChecks; LLVM_HIDDEN_VISIBILITY __attribute__((weak)) int *VerifyDisableABIBreakingChecks = &DisableABIBreakingChecks; From ada6372e52547ba0090f52a2e9e9d95d7eca28d3 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 7 Oct 2024 15:33:30 +0200 Subject: [PATCH 20/55] Revert "[clang] Finish implementation of P0522 (#96023)" This caused Clang to reject valid code, see discussion on the PR https://github.com/llvm/llvm-project/pull/96023#issuecomment-2393228464 and https://github.com/llvm/llvm-project/issues/111363 This reverts commit 6afe56732a172d3f2cbd0330b1fcb34bbfd002a9 and follow-up commit 9abb97f9663a27fe5b8e346ed557b3435aa9ec2f. --- clang/docs/ReleaseNotes.rst | 10 - .../clang/Basic/DiagnosticSemaKinds.td | 7 - clang/include/clang/Sema/Sema.h | 14 +- clang/lib/Frontend/FrontendActions.cpp | 2 - clang/lib/Sema/SemaTemplate.cpp | 115 +++--- clang/lib/Sema/SemaTemplateDeduction.cpp | 362 +++++------------- clang/lib/Sema/SemaTemplateInstantiate.cpp | 15 - .../temp/temp.arg/temp.arg.template/p3-0x.cpp | 31 +- .../temp.deduct/temp.deduct.type/p9-0x.cpp | 4 - clang/test/CXX/temp/temp.param/p12.cpp | 21 +- clang/test/Modules/cxx-templates.cpp | 15 +- clang/test/SemaCXX/make_integer_seq.cpp | 5 +- clang/test/SemaTemplate/cwg2398.cpp | 166 +------- clang/test/SemaTemplate/temp_arg_nontype.cpp | 26 +- clang/test/SemaTemplate/temp_arg_template.cpp | 38 +- .../SemaTemplate/temp_arg_template_p0522.cpp | 80 ++-- .../Templight/templight-empty-entries-fix.cpp | 12 - .../templight-prior-template-arg.cpp | 33 +- .../type_traits/is_specialization.verify.cpp | 2 +- 19 files changed, 277 insertions(+), 681 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 8d02cc3eae9fd9..36e8126bcda6ad 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -174,10 +174,6 @@ C++23 Feature Support C++20 Feature Support ^^^^^^^^^^^^^^^^^^^^^ -C++17 Feature Support -^^^^^^^^^^^^^^^^^^^^^ -- The implementation of the relaxed template template argument matching rules is - more complete and reliable, and should provide more accurate diagnostics. Resolutions to C++ Defect Reports ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -335,10 +331,6 @@ Improvements to Clang's diagnostics - Clang now diagnoses when the result of a [[nodiscard]] function is discarded after being cast in C. Fixes #GH104391. -- Clang now properly explains the reason a template template argument failed to - match a template template parameter, in terms of the C++17 relaxed matching rules - instead of the old ones. - - Don't emit duplicated dangling diagnostics. (#GH93386). - Improved diagnostic when trying to befriend a concept. (#GH45182). @@ -444,8 +436,6 @@ Bug Fixes to C++ Support - Correctly check constraints of explicit instantiations of member functions. (#GH46029) - When performing partial ordering of function templates, clang now checks that the deduction was consistent. Fixes (#GH18291). -- Fixes to several issues in partial ordering of template template parameters, which - were documented in the test suite. - Fixed an assertion failure about a constraint of a friend function template references to a value with greater template depth than the friend function template. (#GH98258) - Clang now rebuilds the template parameters of out-of-line declarations and specializations in the context diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 583475327c5227..057c3e6861a5fb 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5262,13 +5262,6 @@ def note_template_arg_refers_here_func : Note< def err_template_arg_template_params_mismatch : Error< "template template argument has different template parameters than its " "corresponding template template parameter">; -def note_template_arg_template_params_mismatch : Note< - "template template argument has different template parameters than its " - "corresponding template template parameter">; -def err_non_deduced_mismatch : Error< - "could not match %diff{$ against $|types}0,1">; -def err_inconsistent_deduction : Error< - "conflicting deduction %diff{$ against $|types}0,1 for parameter">; def err_template_arg_not_integral_or_enumeral : Error< "non-type template argument of type %0 must have an integral or enumeration" " type">; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 043456438b6d03..86053bd7da1725 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12417,9 +12417,8 @@ class Sema final : public SemaBase { sema::TemplateDeductionInfo &Info); bool isTemplateTemplateParameterAtLeastAsSpecializedAs( - TemplateParameterList *PParam, TemplateDecl *PArg, TemplateDecl *AArg, - const DefaultArguments &DefaultArgs, SourceLocation ArgLoc, - bool IsDeduced); + TemplateParameterList *PParam, TemplateDecl *AArg, + const DefaultArguments &DefaultArgs, SourceLocation Loc, bool IsDeduced); /// Mark which template parameters are used in a given expression. /// @@ -12728,9 +12727,6 @@ class Sema final : public SemaBase { /// We are instantiating a type alias template declaration. TypeAliasTemplateInstantiation, - - /// We are performing partial ordering for template template parameters. - PartialOrderingTTP, } Kind; /// Was the enclosing context a non-instantiation SFINAE context? @@ -12952,12 +12948,6 @@ class Sema final : public SemaBase { TemplateDecl *Entity, BuildingDeductionGuidesTag, SourceRange InstantiationRange = SourceRange()); - struct PartialOrderingTTP {}; - /// \brief Note that we are partial ordering template template parameters. - InstantiatingTemplate(Sema &SemaRef, SourceLocation ArgLoc, - PartialOrderingTTP, TemplateDecl *PArg, - SourceRange InstantiationRange = SourceRange()); - /// Note that we have finished instantiating this template. void Clear(); diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index e4b462b9b0fd81..64f90c493c1055 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -457,8 +457,6 @@ class DefaultTemplateInstCallback : public TemplateInstantiationCallback { return "BuildingDeductionGuides"; case CodeSynthesisContext::TypeAliasTemplateInstantiation: return "TypeAliasTemplateInstantiation"; - case CodeSynthesisContext::PartialOrderingTTP: - return "PartialOrderingTTP"; } return ""; } diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index b1a34edba9150b..dfd56debc75e99 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -5498,7 +5498,8 @@ bool Sema::CheckTemplateArgumentList( DefaultArgs && ParamIdx >= DefaultArgs.StartPos) { // All written arguments should have been consumed by this point. assert(ArgIdx == NumArgs && "bad default argument deduction"); - if (ParamIdx == DefaultArgs.StartPos) { + // FIXME: Don't ignore parameter packs. + if (ParamIdx == DefaultArgs.StartPos && !(*Param)->isParameterPack()) { assert(Param + DefaultArgs.Args.size() <= ParamEnd); // Default arguments from a DeducedTemplateName are already converted. for (const TemplateArgument &DefArg : DefaultArgs.Args) { @@ -5575,6 +5576,9 @@ bool Sema::CheckTemplateArgumentList( return true; } + // We're now done with this argument. + ++ArgIdx; + if ((*Param)->isTemplateParameterPack()) { // The template parameter was a template parameter pack, so take the // deduced argument and place it on the argument pack. Note that we @@ -5585,19 +5589,8 @@ bool Sema::CheckTemplateArgumentList( } else { // Move to the next template parameter. ++Param; - if (PartialOrderingTTP && PackExpansionIntoNonPack) { - // Keep converting the pattern in the argument against - // subsequent parameters. The argument is converted - // in place and will be added back when we are done. - SugaredConverted.pop_back(); - CanonicalConverted.pop_back(); - continue; - } } - // We're now done with this argument. - ++ArgIdx; - // If we just saw a pack expansion into a non-pack, then directly convert // the remaining arguments, because we don't know what parameters they'll // match up with. @@ -5731,10 +5724,14 @@ bool Sema::CheckTemplateArgumentList( // pack expansions; they might be empty. This can happen even if // PartialTemplateArgs is false (the list of arguments is complete but // still dependent). - while (ArgIdx < NumArgs && NewArgs[ArgIdx].getArgument().isPackExpansion()) { - const TemplateArgument &Arg = NewArgs[ArgIdx++].getArgument(); - SugaredConverted.push_back(Arg); - CanonicalConverted.push_back(Context.getCanonicalTemplateArgument(Arg)); + if (ArgIdx < NumArgs && CurrentInstantiationScope && + CurrentInstantiationScope->getPartiallySubstitutedPack()) { + while (ArgIdx < NumArgs && + NewArgs[ArgIdx].getArgument().isPackExpansion()) { + const TemplateArgument &Arg = NewArgs[ArgIdx++].getArgument(); + SugaredConverted.push_back(Arg); + CanonicalConverted.push_back(Context.getCanonicalTemplateArgument(Arg)); + } } // If we have any leftover arguments, then there were too many arguments. @@ -7324,46 +7321,64 @@ bool Sema::CheckTemplateTemplateArgument(TemplateTemplateParmDecl *Param, << Template; } - if (!getLangOpts().RelaxedTemplateTemplateArgs) - return !TemplateParameterListsAreEqual( - Template->getTemplateParameters(), Params, /*Complain=*/true, - TPL_TemplateTemplateArgumentMatch, Arg.getLocation()); - // C++1z [temp.arg.template]p3: (DR 150) // A template-argument matches a template template-parameter P when P // is at least as specialized as the template-argument A. - if (!isTemplateTemplateParameterAtLeastAsSpecializedAs( - Params, Param, Template, DefaultArgs, Arg.getLocation(), IsDeduced)) - return true; - // P2113 - // C++20[temp.func.order]p2 - // [...] If both deductions succeed, the partial ordering selects the - // more constrained template (if one exists) as determined below. - SmallVector ParamsAC, TemplateAC; - Params->getAssociatedConstraints(ParamsAC); - // C++20[temp.arg.template]p3 - // [...] In this comparison, if P is unconstrained, the constraints on A - // are not considered. - if (ParamsAC.empty()) - return false; + if (getLangOpts().RelaxedTemplateTemplateArgs) { + // Quick check for the common case: + // If P contains a parameter pack, then A [...] matches P if each of A's + // template parameters matches the corresponding template parameter in + // the template-parameter-list of P. + if (TemplateParameterListsAreEqual( + Template->getTemplateParameters(), Params, false, + TPL_TemplateTemplateArgumentMatch, Arg.getLocation()) && + // If the argument has no associated constraints, then the parameter is + // definitely at least as specialized as the argument. + // Otherwise - we need a more thorough check. + !Template->hasAssociatedConstraints()) + return false; - Template->getAssociatedConstraints(TemplateAC); + if (isTemplateTemplateParameterAtLeastAsSpecializedAs( + Params, Template, DefaultArgs, Arg.getLocation(), IsDeduced)) { + // P2113 + // C++20[temp.func.order]p2 + // [...] If both deductions succeed, the partial ordering selects the + // more constrained template (if one exists) as determined below. + SmallVector ParamsAC, TemplateAC; + Params->getAssociatedConstraints(ParamsAC); + // C++2a[temp.arg.template]p3 + // [...] In this comparison, if P is unconstrained, the constraints on A + // are not considered. + if (ParamsAC.empty()) + return false; - bool IsParamAtLeastAsConstrained; - if (IsAtLeastAsConstrained(Param, ParamsAC, Template, TemplateAC, - IsParamAtLeastAsConstrained)) - return true; - if (!IsParamAtLeastAsConstrained) { - Diag(Arg.getLocation(), - diag::err_template_template_parameter_not_at_least_as_constrained) - << Template << Param << Arg.getSourceRange(); - Diag(Param->getLocation(), diag::note_entity_declared_at) << Param; - Diag(Template->getLocation(), diag::note_entity_declared_at) << Template; - MaybeEmitAmbiguousAtomicConstraintsDiagnostic(Param, ParamsAC, Template, - TemplateAC); - return true; + Template->getAssociatedConstraints(TemplateAC); + + bool IsParamAtLeastAsConstrained; + if (IsAtLeastAsConstrained(Param, ParamsAC, Template, TemplateAC, + IsParamAtLeastAsConstrained)) + return true; + if (!IsParamAtLeastAsConstrained) { + Diag(Arg.getLocation(), + diag::err_template_template_parameter_not_at_least_as_constrained) + << Template << Param << Arg.getSourceRange(); + Diag(Param->getLocation(), diag::note_entity_declared_at) << Param; + Diag(Template->getLocation(), diag::note_entity_declared_at) + << Template; + MaybeEmitAmbiguousAtomicConstraintsDiagnostic(Param, ParamsAC, Template, + TemplateAC); + return true; + } + return false; + } + // FIXME: Produce better diagnostics for deduction failures. } - return false; + + return !TemplateParameterListsAreEqual(Template->getTemplateParameters(), + Params, + true, + TPL_TemplateTemplateArgumentMatch, + Arg.getLocation()); } static Sema::SemaDiagnosticBuilder noteLocation(Sema &S, const NamedDecl &Decl, diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index f9a8d2d9ff0b1d..dfae0d6cda0d9b 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -145,9 +145,7 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch( PartialOrderingKind POK, bool DeducedFromArrayBound, bool *HasDeducedAnyParam); -/// What directions packs are allowed to match non-packs. -enum class PackFold { ParameterToArgument, ArgumentToParameter, Both }; - +enum class PackFold { ParameterToArgument, ArgumentToParameter }; static TemplateDeductionResult DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams, ArrayRef Ps, @@ -1713,21 +1711,7 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch( DeducedTemplateArgument Result = checkDeducedTemplateArguments(S.Context, Deduced[Index], NewDeduced); if (Result.isNull()) { - // We can also get inconsistencies when matching NTTP type. - switch (NamedDecl *Param = TemplateParams->getParam(Index); - Param->getKind()) { - case Decl::TemplateTypeParm: - Info.Param = cast(Param); - break; - case Decl::NonTypeTemplateParm: - Info.Param = cast(Param); - break; - case Decl::TemplateTemplateParm: - Info.Param = cast(Param); - break; - default: - llvm_unreachable("unexpected kind"); - } + Info.Param = cast(TemplateParams->getParam(Index)); Info.FirstArg = Deduced[Index]; Info.SecondArg = NewDeduced; return TemplateDeductionResult::Inconsistent; @@ -2565,31 +2549,8 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams, if (const NonTypeTemplateParmDecl *NTTP = getDeducedParameterFromExpr(Info, P.getAsExpr())) { switch (A.getKind()) { - case TemplateArgument::Expression: { - const Expr *E = A.getAsExpr(); - // When checking NTTP, if either the parameter or the argument is - // dependent, as there would be otherwise nothing to deduce, we force - // the argument to the parameter type using this dependent implicit - // cast, in order to maintain invariants. Now we can deduce the - // resulting type from the original type, and deduce the original type - // against the parameter we are checking. - if (const auto *ICE = dyn_cast(E); - ICE && ICE->getCastKind() == clang::CK_Dependent) { - E = ICE->getSubExpr(); - if (auto Result = DeduceTemplateArgumentsByTypeMatch( - S, TemplateParams, ICE->getType(), E->getType(), Info, - Deduced, TDF_SkipNonDependent, - PartialOrdering ? PartialOrderingKind::NonCall - : PartialOrderingKind::None, - /*DeducedFromArrayBound=*/false, HasDeducedAnyParam); - Result != TemplateDeductionResult::Success) - return Result; - } - return DeduceNonTypeTemplateArgument( - S, TemplateParams, NTTP, DeducedTemplateArgument(A), E->getType(), - Info, PartialOrdering, Deduced, HasDeducedAnyParam); - } case TemplateArgument::Integral: + case TemplateArgument::Expression: case TemplateArgument::StructuralValue: return DeduceNonTypeTemplateArgument( S, TemplateParams, NTTP, DeducedTemplateArgument(A), @@ -2678,75 +2639,50 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams, SmallVectorImpl &Deduced, bool NumberOfArgumentsMustMatch, bool PartialOrdering, PackFold PackFold, bool *HasDeducedAnyParam) { - bool FoldPackParameter = PackFold == PackFold::ParameterToArgument || - PackFold == PackFold::Both, - FoldPackArgument = PackFold == PackFold::ArgumentToParameter || - PackFold == PackFold::Both; - + if (PackFold == PackFold::ArgumentToParameter) + std::swap(Ps, As); // C++0x [temp.deduct.type]p9: // If the template argument list of P contains a pack expansion that is not // the last template argument, the entire template argument list is a // non-deduced context. - if (FoldPackParameter && hasPackExpansionBeforeEnd(Ps)) - return TemplateDeductionResult::Success; - - if (FoldPackArgument && hasPackExpansionBeforeEnd(As)) + if (hasPackExpansionBeforeEnd(Ps)) return TemplateDeductionResult::Success; // C++0x [temp.deduct.type]p9: // If P has a form that contains or , then each argument Pi of the // respective template argument list P is compared with the corresponding // argument Ai of the corresponding template argument list of A. - for (unsigned ArgIdx = 0, ParamIdx = 0; /**/; /**/) { - if (!hasTemplateArgumentForDeduction(Ps, ParamIdx)) - return !FoldPackParameter && hasTemplateArgumentForDeduction(As, ArgIdx) - ? TemplateDeductionResult::MiscellaneousDeductionFailure - : TemplateDeductionResult::Success; - - if (!Ps[ParamIdx].isPackExpansion()) { + unsigned ArgIdx = 0, ParamIdx = 0; + for (; hasTemplateArgumentForDeduction(Ps, ParamIdx); ++ParamIdx) { + const TemplateArgument &P = Ps[ParamIdx]; + if (!P.isPackExpansion()) { // The simple case: deduce template arguments by matching Pi and Ai. // Check whether we have enough arguments. if (!hasTemplateArgumentForDeduction(As, ArgIdx)) - return !FoldPackArgument && NumberOfArgumentsMustMatch + return NumberOfArgumentsMustMatch ? TemplateDeductionResult::MiscellaneousDeductionFailure : TemplateDeductionResult::Success; - if (As[ArgIdx].isPackExpansion()) { - // C++1z [temp.deduct.type]p9: - // During partial ordering, if Ai was originally a pack expansion - // [and] Pi is not a pack expansion, template argument deduction - // fails. - if (!FoldPackArgument) - return TemplateDeductionResult::MiscellaneousDeductionFailure; - - TemplateArgument Pattern = As[ArgIdx].getPackExpansionPattern(); - for (;;) { - // Deduce template parameters from the pattern. - if (auto Result = DeduceTemplateArguments( - S, TemplateParams, Ps[ParamIdx], Pattern, Info, - PartialOrdering, Deduced, HasDeducedAnyParam); - Result != TemplateDeductionResult::Success) - return Result; + // C++1z [temp.deduct.type]p9: + // During partial ordering, if Ai was originally a pack expansion [and] + // Pi is not a pack expansion, template argument deduction fails. + if (As[ArgIdx].isPackExpansion()) + return TemplateDeductionResult::MiscellaneousDeductionFailure; - ++ParamIdx; - if (!hasTemplateArgumentForDeduction(Ps, ParamIdx)) - return TemplateDeductionResult::Success; - if (Ps[ParamIdx].isPackExpansion()) - break; - } - } else { - // Perform deduction for this Pi/Ai pair. - if (auto Result = DeduceTemplateArguments( - S, TemplateParams, Ps[ParamIdx], As[ArgIdx], Info, - PartialOrdering, Deduced, HasDeducedAnyParam); - Result != TemplateDeductionResult::Success) - return Result; + // Perform deduction for this Pi/Ai pair. + TemplateArgument Pi = P, Ai = As[ArgIdx]; + if (PackFold == PackFold::ArgumentToParameter) + std::swap(Pi, Ai); + if (auto Result = DeduceTemplateArguments(S, TemplateParams, Pi, Ai, Info, + PartialOrdering, Deduced, + HasDeducedAnyParam); + Result != TemplateDeductionResult::Success) + return Result; - ++ArgIdx; - ++ParamIdx; - continue; - } + // Move to the next argument. + ++ArgIdx; + continue; } // The parameter is a pack expansion. @@ -2756,7 +2692,7 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams, // each remaining argument in the template argument list of A. Each // comparison deduces template arguments for subsequent positions in the // template parameter packs expanded by Pi. - TemplateArgument Pattern = Ps[ParamIdx].getPackExpansionPattern(); + TemplateArgument Pattern = P.getPackExpansionPattern(); // Prepare to deduce the packs within the pattern. PackDeductionScope PackScope(S, TemplateParams, Deduced, Info, Pattern); @@ -2767,12 +2703,13 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams, for (; hasTemplateArgumentForDeduction(As, ArgIdx) && PackScope.hasNextElement(); ++ArgIdx) { - if (!FoldPackParameter && !As[ArgIdx].isPackExpansion()) - return TemplateDeductionResult::MiscellaneousDeductionFailure; + TemplateArgument Pi = Pattern, Ai = As[ArgIdx]; + if (PackFold == PackFold::ArgumentToParameter) + std::swap(Pi, Ai); // Deduce template arguments from the pattern. - if (auto Result = DeduceTemplateArguments( - S, TemplateParams, Pattern, As[ArgIdx], Info, PartialOrdering, - Deduced, HasDeducedAnyParam); + if (auto Result = DeduceTemplateArguments(S, TemplateParams, Pi, Ai, Info, + PartialOrdering, Deduced, + HasDeducedAnyParam); Result != TemplateDeductionResult::Success) return Result; @@ -2781,8 +2718,12 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams, // Build argument packs for each of the parameter packs expanded by this // pack expansion. - return PackScope.finish(); + if (auto Result = PackScope.finish(); + Result != TemplateDeductionResult::Success) + return Result; } + + return TemplateDeductionResult::Success; } TemplateDeductionResult Sema::DeduceTemplateArguments( @@ -3334,6 +3275,7 @@ static TemplateDeductionResult FinishTemplateArgumentDeduction( // Unevaluated SFINAE context. EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::Unevaluated); + Sema::SFINAETrap Trap(S); Sema::ContextRAII SavedContext(S, getAsDeclContextOrEnclosing(Template)); @@ -3350,42 +3292,21 @@ static TemplateDeductionResult FinishTemplateArgumentDeduction( return Result; // Check that we produced the correct argument list. - for (ArrayRef Ps = TemplateArgs, As = CanonicalBuilder; - !Ps.empty() && !As.empty(); - /**/) { - TemplateArgument P = Ps.front(), A = As.front(); - if (P.getKind() == TemplateArgument::Pack) { - assert(Ps.size() == 1 && "Pack not last element?"); - Ps = P.getPackAsArray(); - continue; - } - if (A.getKind() == TemplateArgument::Pack) { - assert(As.size() == 1 && "Pack not last element?"); - As = A.getPackAsArray(); - continue; + TemplateParameterList *TemplateParams = Template->getTemplateParameters(); + for (unsigned I = 0, E = TemplateParams->size(); I != E; ++I) { + TemplateArgument InstArg = CanonicalBuilder[I]; + if (!isSameTemplateArg(S.Context, TemplateArgs[I], InstArg, PartialOrdering, + /*PackExpansionMatchesPack=*/true)) { + Info.Param = makeTemplateParameter(TemplateParams->getParam(I)); + Info.FirstArg = TemplateArgs[I]; + Info.SecondArg = InstArg; + return TemplateDeductionResult::NonDeducedMismatch; } - - if (P.isPackExpansion()) - P = P.getPackExpansionPattern(); - else - Ps = Ps.drop_front(); - if (A.isPackExpansion()) - A = A.getPackExpansionPattern(); - else - As = As.drop_front(); - - if (isSameTemplateArg(S.Context, P, A, PartialOrdering)) - continue; - unsigned I = As.end() == CanonicalBuilder.end() - ? As.begin() - CanonicalBuilder.begin() - : CanonicalBuilder.size() - 1; - Info.Param = - makeTemplateParameter(Template->getTemplateParameters()->getParam(I)); - Info.FirstArg = P; - Info.SecondArg = A; - return TemplateDeductionResult::NonDeducedMismatch; } + if (Trap.hasErrorOccurred()) + return TemplateDeductionResult::SubstitutionFailure; + if (!PartialOrdering) { if (auto Result = CheckDeducedArgumentConstraints( S, Template, SugaredBuilder, CanonicalBuilder, Info); @@ -3406,6 +3327,7 @@ static TemplateDeductionResult FinishTemplateArgumentDeduction( // Unevaluated SFINAE context. EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::Unevaluated); + Sema::SFINAETrap Trap(S); Sema::ContextRAII SavedContext(S, getAsDeclContextOrEnclosing(TD)); @@ -3414,13 +3336,20 @@ static TemplateDeductionResult FinishTemplateArgumentDeduction( // explicitly specified, template argument deduction fails. SmallVector SugaredBuilder, CanonicalBuilder; if (auto Result = ConvertDeducedTemplateArguments( - S, TD, /*IsDeduced=*/false, Deduced, Info, SugaredBuilder, + S, TD, /*IsPartialOrdering=*/false, Deduced, Info, SugaredBuilder, CanonicalBuilder); Result != TemplateDeductionResult::Success) return Result; - return ::CheckDeducedArgumentConstraints(S, TD, SugaredBuilder, - CanonicalBuilder, Info); + if (Trap.hasErrorOccurred()) + return TemplateDeductionResult::SubstitutionFailure; + + if (auto Result = CheckDeducedArgumentConstraints(S, TD, SugaredBuilder, + CanonicalBuilder, Info); + Result != TemplateDeductionResult::Success) + return Result; + + return TemplateDeductionResult::Success; } /// Perform template argument deduction to determine whether the given template @@ -3467,20 +3396,16 @@ DeduceTemplateArguments(Sema &S, T *Partial, if (Inst.isInvalid()) return TemplateDeductionResult::InstantiationDepth; + if (Trap.hasErrorOccurred()) + return TemplateDeductionResult::SubstitutionFailure; + TemplateDeductionResult Result; S.runWithSufficientStackSpace(Info.getLocation(), [&] { Result = ::FinishTemplateArgumentDeduction(S, Partial, /*IsPartialOrdering=*/false, TemplateArgs, Deduced, Info); }); - - if (Result != TemplateDeductionResult::Success) - return Result; - - if (Trap.hasErrorOccurred()) - return TemplateDeductionResult::SubstitutionFailure; - - return TemplateDeductionResult::Success; + return Result; } TemplateDeductionResult @@ -3536,18 +3461,14 @@ Sema::DeduceTemplateArgumentsFromType(TemplateDecl *TD, QualType FromType, if (Inst.isInvalid()) return TemplateDeductionResult::InstantiationDepth; + if (Trap.hasErrorOccurred()) + return TemplateDeductionResult::SubstitutionFailure; + TemplateDeductionResult Result; runWithSufficientStackSpace(Info.getLocation(), [&] { Result = ::FinishTemplateArgumentDeduction(*this, TD, Deduced, Info); }); - - if (Result != TemplateDeductionResult::Success) - return Result; - - if (Trap.hasErrorOccurred()) - return TemplateDeductionResult::SubstitutionFailure; - - return TemplateDeductionResult::Success; + return Result; } /// Determine whether the given type T is a simple-template-id type. @@ -6173,23 +6094,14 @@ static bool isAtLeastAsSpecializedAs(Sema &S, QualType T1, QualType T2, return false; const auto *TST1 = cast(T1); - - Sema::SFINAETrap Trap(S); - - TemplateDeductionResult Result; + bool AtLeastAsSpecialized; S.runWithSufficientStackSpace(Info.getLocation(), [&] { - Result = ::FinishTemplateArgumentDeduction( - S, P2, /*IsPartialOrdering=*/true, TST1->template_arguments(), Deduced, - Info); + AtLeastAsSpecialized = + FinishTemplateArgumentDeduction( + S, P2, /*IsPartialOrdering=*/true, TST1->template_arguments(), + Deduced, Info) == TemplateDeductionResult::Success; }); - - if (Result != TemplateDeductionResult::Success) - return false; - - if (Trap.hasErrorOccurred()) - return false; - - return true; + return AtLeastAsSpecialized; } namespace { @@ -6427,9 +6339,8 @@ bool Sema::isMoreSpecializedThanPrimary( } bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs( - TemplateParameterList *P, TemplateDecl *PArg, TemplateDecl *AArg, - const DefaultArguments &DefaultArgs, SourceLocation ArgLoc, - bool IsDeduced) { + TemplateParameterList *P, TemplateDecl *AArg, + const DefaultArguments &DefaultArgs, SourceLocation Loc, bool IsDeduced) { // C++1z [temp.arg.template]p4: (DR 150) // A template template-parameter P is at least as specialized as a // template template-argument A if, given the following rewrite to two @@ -6441,12 +6352,6 @@ bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs( // TemplateParameterList *A = AArg->getTemplateParameters(); - Sema::InstantiatingTemplate Inst( - *this, ArgLoc, Sema::InstantiatingTemplate::PartialOrderingTTP(), PArg, - SourceRange(P->getTemplateLoc(), P->getRAngleLoc())); - if (Inst.isInvalid()) - return false; - // Given an invented class template X with the template parameter list of // A (including default arguments): // - Each function template has a single function parameter whose type is @@ -6461,6 +6366,8 @@ bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs( // templates. SmallVector PArgs; { + SFINAETrap Trap(*this); + Context.getInjectedTemplateArgs(P, PArgs); TemplateArgumentListInfo PArgList(P->getLAngleLoc(), P->getRAngleLoc()); @@ -6480,17 +6387,18 @@ bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs( // C++1z [temp.arg.template]p3: // If the rewrite produces an invalid type, then P is not at least as // specialized as A. - SmallVector CanonicalPArgs; - if (CheckTemplateArgumentList(AArg, ArgLoc, PArgList, DefaultArgs, false, - PArgs, CanonicalPArgs, + SmallVector SugaredPArgs; + if (CheckTemplateArgumentList(AArg, Loc, PArgList, DefaultArgs, false, + SugaredPArgs, PArgs, /*UpdateArgsWithConversions=*/true, /*ConstraintsNotSatisfied=*/nullptr, - /*PartialOrderingTTP=*/true)) + /*PartialOrderTTP=*/true) || + Trap.hasErrorOccurred()) return false; } // Determine whether P1 is at least as specialized as P2. - TemplateDeductionInfo Info(ArgLoc, A->getDepth()); + TemplateDeductionInfo Info(Loc, A->getDepth()); SmallVector Deduced; Deduced.resize(A->size()); @@ -6505,89 +6413,29 @@ bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs( // be inverted between Ps and As. On non-deduced context, matching needs to // happen both ways, according to [temp.arg.template]p3, but this is // currently implemented as a special case elsewhere. - switch (::DeduceTemplateArguments( - *this, A, AArgs, PArgs, Info, Deduced, - /*NumberOfArgumentsMustMatch=*/false, /*PartialOrdering=*/true, - IsDeduced ? PackFold::ArgumentToParameter : PackFold::Both, - /*HasDeducedAnyParam=*/nullptr)) { - case clang::TemplateDeductionResult::Success: - break; - - case TemplateDeductionResult::MiscellaneousDeductionFailure: - Diag(AArg->getLocation(), diag::err_template_param_list_different_arity) - << (A->size() > P->size()) << /*isTemplateTemplateParameter=*/true - << SourceRange(A->getTemplateLoc(), P->getRAngleLoc()); + if (::DeduceTemplateArguments(*this, A, AArgs, PArgs, Info, Deduced, + /*NumberOfArgumentsMustMatch=*/false, + /*PartialOrdering=*/true, + IsDeduced ? PackFold::ArgumentToParameter + : PackFold::ParameterToArgument, + /*HasDeducedAnyParam=*/nullptr) != + TemplateDeductionResult::Success) return false; - case TemplateDeductionResult::NonDeducedMismatch: - Diag(AArg->getLocation(), diag::err_non_deduced_mismatch) - << Info.FirstArg << Info.SecondArg; - return false; - case TemplateDeductionResult::Inconsistent: - Diag(getAsNamedDecl(Info.Param)->getLocation(), - diag::err_inconsistent_deduction) - << Info.FirstArg << Info.SecondArg; - return false; - case TemplateDeductionResult::AlreadyDiagnosed: - return false; - - // None of these should happen for a plain deduction. - case TemplateDeductionResult::Invalid: - case TemplateDeductionResult::InstantiationDepth: - case TemplateDeductionResult::Incomplete: - case TemplateDeductionResult::IncompletePack: - case TemplateDeductionResult::Underqualified: - case TemplateDeductionResult::SubstitutionFailure: - case TemplateDeductionResult::DeducedMismatch: - case TemplateDeductionResult::DeducedMismatchNested: - case TemplateDeductionResult::TooManyArguments: - case TemplateDeductionResult::TooFewArguments: - case TemplateDeductionResult::InvalidExplicitArguments: - case TemplateDeductionResult::NonDependentConversionFailure: - case TemplateDeductionResult::ConstraintsNotSatisfied: - case TemplateDeductionResult::CUDATargetMismatch: - llvm_unreachable("Unexpected Result"); - } SmallVector DeducedArgs(Deduced.begin(), Deduced.end()); + Sema::InstantiatingTemplate Inst(*this, Info.getLocation(), AArg, DeducedArgs, + Info); + if (Inst.isInvalid()) + return false; - TemplateDeductionResult TDK; + bool AtLeastAsSpecialized; runWithSufficientStackSpace(Info.getLocation(), [&] { - TDK = ::FinishTemplateArgumentDeduction( - *this, AArg, /*IsPartialOrdering=*/true, PArgs, Deduced, Info); + AtLeastAsSpecialized = + ::FinishTemplateArgumentDeduction( + *this, AArg, /*IsPartialOrdering=*/true, PArgs, Deduced, Info) == + TemplateDeductionResult::Success; }); - switch (TDK) { - case TemplateDeductionResult::Success: - return true; - - // It doesn't seem possible to get a non-deduced mismatch when partial - // ordering TTPs. - case TemplateDeductionResult::NonDeducedMismatch: - llvm_unreachable("Unexpected NonDeducedMismatch"); - - // Substitution failures should have already been diagnosed. - case TemplateDeductionResult::AlreadyDiagnosed: - case TemplateDeductionResult::SubstitutionFailure: - case TemplateDeductionResult::InstantiationDepth: - return false; - - // None of these should happen when just converting deduced arguments. - case TemplateDeductionResult::Invalid: - case TemplateDeductionResult::Incomplete: - case TemplateDeductionResult::IncompletePack: - case TemplateDeductionResult::Inconsistent: - case TemplateDeductionResult::Underqualified: - case TemplateDeductionResult::DeducedMismatch: - case TemplateDeductionResult::DeducedMismatchNested: - case TemplateDeductionResult::TooManyArguments: - case TemplateDeductionResult::TooFewArguments: - case TemplateDeductionResult::InvalidExplicitArguments: - case TemplateDeductionResult::NonDependentConversionFailure: - case TemplateDeductionResult::ConstraintsNotSatisfied: - case TemplateDeductionResult::MiscellaneousDeductionFailure: - case TemplateDeductionResult::CUDATargetMismatch: - llvm_unreachable("Unexpected Result"); - } - llvm_unreachable("Unexpected TDK"); + return AtLeastAsSpecialized; } namespace { diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 9c5b3e7c9066c7..261ef4edf17593 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -573,7 +573,6 @@ bool Sema::CodeSynthesisContext::isInstantiationRecord() const { case LambdaExpressionSubstitution: case BuildingDeductionGuides: case TypeAliasTemplateInstantiation: - case PartialOrderingTTP: return false; // This function should never be called when Kind's value is Memoization. @@ -806,11 +805,6 @@ Sema::InstantiatingTemplate::InstantiatingTemplate( SemaRef, CodeSynthesisContext::BuildingDeductionGuides, PointOfInstantiation, InstantiationRange, Entity) {} -Sema::InstantiatingTemplate::InstantiatingTemplate( - Sema &SemaRef, SourceLocation ArgLoc, PartialOrderingTTP, - TemplateDecl *PArg, SourceRange InstantiationRange) - : InstantiatingTemplate(SemaRef, CodeSynthesisContext::PartialOrderingTTP, - ArgLoc, InstantiationRange, PArg) {} void Sema::pushCodeSynthesisContext(CodeSynthesisContext Ctx) { Ctx.SavedInNonInstantiationSFINAEContext = InNonInstantiationSFINAEContext; @@ -1250,14 +1244,6 @@ void Sema::PrintInstantiationStack() { << cast(Active->Entity) << Active->InstantiationRange; break; - case CodeSynthesisContext::PartialOrderingTTP: - Diags.Report(Active->PointOfInstantiation, - diag::note_template_arg_template_params_mismatch); - if (SourceLocation ParamLoc = Active->Entity->getLocation(); - ParamLoc.isValid()) - Diags.Report(ParamLoc, diag::note_template_prev_declaration) - << /*isTemplateTemplateParam=*/true << Active->InstantiationRange; - break; } } } @@ -1300,7 +1286,6 @@ std::optional Sema::isSFINAEContext() const { case CodeSynthesisContext::PriorTemplateArgumentSubstitution: case CodeSynthesisContext::DefaultTemplateArgumentChecking: case CodeSynthesisContext::RewritingOperatorAsSpaceship: - case CodeSynthesisContext::PartialOrderingTTP: // A default template argument instantiation and substitution into // template parameters with arguments for prior parameters may or may // not be a SFINAE context; look further up the stack. diff --git a/clang/test/CXX/temp/temp.arg/temp.arg.template/p3-0x.cpp b/clang/test/CXX/temp/temp.arg/temp.arg.template/p3-0x.cpp index ce27e6aa83c3b9..19793fe8263726 100644 --- a/clang/test/CXX/temp/temp.arg/temp.arg.template/p3-0x.cpp +++ b/clang/test/CXX/temp/temp.arg/temp.arg.template/p3-0x.cpp @@ -2,13 +2,13 @@ template struct eval; // expected-note 3{{template is declared here}} -template