diff --git a/.github/workflows/new-prs.yml b/.github/workflows/new-prs.yml index c1952ddab83f78..9ba55d59ff15b4 100644 --- a/.github/workflows/new-prs.yml +++ b/.github/workflows/new-prs.yml @@ -1,56 +1,36 @@ name: "Labelling new pull requests" + +permissions: + contents: read + on: - workflow_run: - workflows: ["PR Receive"] + # It's safe to use pull_request_target here, because we aren't checking out + # code from the pull request branch. + # See https://securitylab.github.com/research/github-actions-preventing-pwn-requests/ + pull_request_target: + types: + - opened + - reopened + - ready_for_review + - synchronize jobs: automate-prs-labels: permissions: - contents: read pull-requests: write runs-on: ubuntu-latest + # Ignore PRs with more than 10 commits. Pull requests with a lot of + # commits tend to be accidents usually when someone made a mistake while trying + # to rebase. We want to ignore these pull requests to avoid excessive + # notifications. if: > github.repository == 'llvm/llvm-project' && - github.event.workflow_run.event == 'pull_request_target' && - github.event.workflow_run.conclusion == 'success' + github.event.pull_request.draft == false && + github.event.pull_request.commits < 10 steps: - # From: https://securitylab.github.com/research/github-actions-preventing-pwn-requests/ - # Updated version here: https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#using-data-from-the-triggering-workflow - - name: Debug - run: | - echo "Event: ${{ github.event.workflow_run.event }} Conclusion: ${{ github.event.workflow_run.conclusion }}" - - name: 'Download artifact' - uses: actions/github-script@v6 - with: - script: | - const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ - owner: context.repo.owner, - repo: context.repo.repo, - run_id: context.payload.workflow_run.id - }); - const matchArtifact = artifacts.data.artifacts.find((artifact) => - artifact.name === 'pr' - ); - const download = await github.rest.actions.downloadArtifact({ - owner: context.repo.owner, - repo: context.repo.repo, - artifact_id: matchArtifact.id, - archive_format: 'zip' - }); - const { writeFileSync } = require('node:fs'); - writeFileSync('${{ github.workspace }}/pr.zip', Buffer.from(download.data)); - - - run: unzip pr.zip - - - name: "Get PR Number" - id: vars - run: - echo "pr-number=$(cat NR)" >> "$GITHUB_OUTPUT" - - uses: actions/labeler@v4 with: configuration-path: .github/new-prs-labeler.yml # workaround for https://github.com/actions/labeler/issues/112 sync-labels: '' repo-token: ${{ secrets.ISSUE_SUBSCRIBER_TOKEN }} - pr-number: ${{ steps.vars.outputs.pr-number }} diff --git a/.github/workflows/pr-receive-label.yml b/.github/workflows/pr-receive-label.yml deleted file mode 100644 index c9085c44b98361..00000000000000 --- a/.github/workflows/pr-receive-label.yml +++ /dev/null @@ -1,26 +0,0 @@ -# See https://securitylab.github.com/research/github-actions-preventing-pwn-requests/ - -name: PR Receive Label -on: - pull_request: - types: - - labeled - -permissions: - contents: read - -jobs: - pr-subscriber: - runs-on: ubuntu-latest - if: github.repository == 'llvm/llvm-project' - steps: - - name: Store PR Information - run: | - mkdir -p ./pr - echo ${{ github.event.number }} > ./pr/NR - echo ${{ github.event.label.name }} > ./pr/LABEL - - - uses: actions/upload-artifact@v3 - with: - name: pr - path: pr/ diff --git a/.github/workflows/pr-receive.yml b/.github/workflows/pr-receive.yml deleted file mode 100644 index 13f1a883cf8ff6..00000000000000 --- a/.github/workflows/pr-receive.yml +++ /dev/null @@ -1,34 +0,0 @@ -# See https://securitylab.github.com/research/github-actions-preventing-pwn-requests/ - -name: PR Receive -on: - pull_request_target: - types: - - opened - - reopened - - ready_for_review - - synchronize - -permissions: - contents: read - -jobs: - pr-target: - runs-on: ubuntu-latest - # Ignore PRs with more than 10 commits. Pull requests with a lot of - # commits tend to be accidents usually when someone made a mistake while trying - # to rebase. We want to ignore these pull requests to avoid excessive - # notifications. - if: github.repository == 'llvm/llvm-project' && - github.event.pull_request.draft == false && - github.event.pull_request.commits < 10 - steps: - - name: Store PR Information - run: | - mkdir -p ./pr - echo ${{ github.event.number }} > ./pr/NR - - - uses: actions/upload-artifact@v3 - with: - name: pr - path: pr/ diff --git a/.github/workflows/pr-subscriber-wait.py b/.github/workflows/pr-subscriber-wait.py deleted file mode 100644 index 633f78c147707f..00000000000000 --- a/.github/workflows/pr-subscriber-wait.py +++ /dev/null @@ -1,27 +0,0 @@ -import github -import os -import sys -import time - - -def needs_to_wait(repo): - workflow_name = os.environ.get("GITHUB_WORKFLOW") - run_number = os.environ.get("GITHUB_RUN_NUMBER") - print("Workflow Name:", workflow_name, "Run Number:", run_number) - for status in ["in_progress", "queued"]: - for workflow in repo.get_workflow_runs(status=status): - print("Looking at ", workflow.name, "#", workflow.run_number) - if workflow.name != workflow_name: - continue - if workflow.run_number < int(run_number): - print("Workflow {} still {} ".format(workflow.run_number, status)) - return True - return False - - -repo_name = os.environ.get("GITHUB_REPOSITORY") -token = os.environ.get("GITHUB_TOKEN") -gh = github.Github(token) -repo = gh.get_repo(repo_name) -while needs_to_wait(repo): - time.sleep(30) diff --git a/.github/workflows/pr-subscriber.yml b/.github/workflows/pr-subscriber.yml index 1fc3bfed3a66b1..ef2ef7b9e4a35c 100644 --- a/.github/workflows/pr-subscriber.yml +++ b/.github/workflows/pr-subscriber.yml @@ -1,22 +1,17 @@ name: PR Subscriber on: - workflow_run: - workflows: ["PR Receive Label"] + pull_request_target: types: - - completed + - labeled permissions: - actions: read contents: read jobs: auto-subscribe: runs-on: ubuntu-latest - if: > - github.repository == 'llvm/llvm-project' && - github.event.workflow_run.event == 'pull_request' && - github.event.workflow_run.conclusion == 'success' + if: github.repository == 'llvm/llvm-project' steps: - name: Setup Automation Script run: | @@ -26,47 +21,10 @@ jobs: chmod a+x github-automation.py pip install -r requirements.txt - - name: 'Wait for other actions' - # We can't use the concurrency tag for these jobs, because it will - # cancel pending jobs if another job is running. - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - python3 pr-subscriber-wait.py - - - # From: https://securitylab.github.com/research/github-actions-preventing-pwn-requests/ - # Updated version here: https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#using-data-from-the-triggering-workflow - - name: 'Download artifact' - uses: actions/github-script@v6 - with: - script: | - const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ - owner: context.repo.owner, - repo: context.repo.repo, - run_id: context.payload.workflow_run.id - }); - const matchArtifact = artifacts.data.artifacts.find((artifact) => - artifact.name === 'pr' - ); - const download = await github.rest.actions.downloadArtifact({ - owner: context.repo.owner, - repo: context.repo.repo, - artifact_id: matchArtifact.id, - archive_format: 'zip' - }); - const { writeFileSync } = require('node:fs'); - writeFileSync('${{ github.workspace }}/pr.zip', Buffer.from(download.data)); - - - run: unzip pr.zip - - name: Update watchers - # https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-an-intermediate-environment-variable run: | - PR_NUMBER=$(cat NR) - LABEL_NAME=$(cat LABEL) ./github-automation.py \ --token '${{ secrets.ISSUE_SUBSCRIBER_TOKEN }}' \ pr-subscriber \ - --issue-number "$PR_NUMBER" \ - --label-name "$LABEL_NAME" + --issue-number "${{ github.event.number }}" \ + --label-name "${{ github.event.label.name }}" diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index 4469222c79f1e2..270f58f777ce84 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -12,9 +12,7 @@ on: # To guarantee Maintained check is occasionally updated. See # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained schedule: - - cron: '38 20 * * 4' - push: - branches: [ "main" ] + - cron: '38 20 * * *' # Declare default permissions as read only. permissions: diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 5684287195fb80..6caa144ddd866d 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -723,11 +723,6 @@ Bug Fixes to C++ Support declaration definition. Fixes: (`#61763 `_) -- Fix a bug where implicit deduction guides are not correctly generated for nested template - classes. Fixes: - (`#46200 `_) - (`#57812 `_) - - Diagnose use of a variable-length array in a coroutine. The design of coroutines is such that it is not possible to support VLA use. Fixes: (`#65858 `_) diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 940cca67368492..564ca48cc32ac5 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -922,6 +922,11 @@ def err_header_import_semi_in_macro : Error< def err_header_import_not_header_unit : Error< "header file %0 (aka '%1') cannot be imported because " "it is not known to be a header unit">; +def warn_pp_include_angled_in_module_purview : Warning< + "'#include ' attaches the declarations to the named module '%0'" + ", which is not usually intended; consider moving that directive before " + "the module declaration">, + InGroup>; def warn_header_guard : Warning< "%0 is used as a header guard here, followed by #define of a different macro">, diff --git a/clang/include/clang/Driver/Distro.h b/clang/include/clang/Driver/Distro.h index 8291f6575a7114..a8de94163e8b0b 100644 --- a/clang/include/clang/Driver/Distro.h +++ b/clang/include/clang/Driver/Distro.h @@ -78,6 +78,7 @@ class Distro { UbuntuKinetic, UbuntuLunar, UbuntuMantic, + UbuntuNoble, UnknownDistro }; @@ -129,7 +130,7 @@ class Distro { } bool IsUbuntu() const { - return DistroVal >= UbuntuHardy && DistroVal <= UbuntuMantic; + return DistroVal >= UbuntuHardy && DistroVal <= UbuntuNoble; } bool IsAlpineLinux() const { return DistroVal == AlpineLinux; } diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp index cc2e5be98d32d3..e4ac1abf12a7f8 100644 --- a/clang/lib/Basic/Module.cpp +++ b/clang/lib/Basic/Module.cpp @@ -161,9 +161,10 @@ bool Module::isForBuilding(const LangOptions &LangOpts) const { StringRef TopLevelName = getTopLevelModuleName(); StringRef CurrentModule = LangOpts.CurrentModule; - // When building framework Foo, we want to make sure that Foo *and* - // Foo_Private are textually included and no modules are built for both. - if (getTopLevelModule()->IsFramework && + // When building the implementation of framework Foo, we want to make sure + // that Foo *and* Foo_Private are textually included and no modules are built + // for either. + if (!LangOpts.isCompilingModule() && getTopLevelModule()->IsFramework && CurrentModule == LangOpts.ModuleName && !CurrentModule.endswith("_Private") && TopLevelName.endswith("_Private")) TopLevelName = TopLevelName.drop_back(8); diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index 27f525ee35edbf..dbc4cffa8976d6 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -203,8 +203,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM, // Signature. Mandatory ObjC-style method descriptor @encode sequence. std::string typeAtEncoding = CGM.getContext().getObjCEncodingForBlock(blockInfo.getBlockExpr()); - elements.add(llvm::ConstantExpr::getBitCast( - CGM.GetAddrOfConstantCString(typeAtEncoding).getPointer(), i8p)); + elements.add(CGM.GetAddrOfConstantCString(typeAtEncoding).getPointer()); // GC layout. if (C.getLangOpts().ObjC) { @@ -809,7 +808,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { llvm::Constant *blockISA = blockInfo.NoEscape ? CGM.getNSConcreteGlobalBlock() : CGM.getNSConcreteStackBlock(); - isa = llvm::ConstantExpr::getBitCast(blockISA, VoidPtrTy); + isa = blockISA; // Build the block descriptor. descriptor = buildBlockDescriptor(CGM, blockInfo); @@ -1869,7 +1868,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { CaptureStrKind::CopyHelper, CGM); if (llvm::GlobalValue *Func = CGM.getModule().getNamedValue(FuncName)) - return llvm::ConstantExpr::getBitCast(Func, VoidPtrTy); + return Func; ASTContext &C = getContext(); @@ -1990,7 +1989,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { FinishFunction(); - return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); + return Fn; } static BlockFieldFlags @@ -2056,7 +2055,7 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { CaptureStrKind::DisposeHelper, CGM); if (llvm::GlobalValue *Func = CGM.getModule().getNamedValue(FuncName)) - return llvm::ConstantExpr::getBitCast(Func, VoidPtrTy); + return Func; ASTContext &C = getContext(); @@ -2113,7 +2112,7 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { FinishFunction(); - return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); + return Fn; } namespace { @@ -2352,7 +2351,7 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, CGF.FinishFunction(); - return llvm::ConstantExpr::getBitCast(Fn, CGF.Int8PtrTy); + return Fn; } /// Build the copy helper for a __block variable. @@ -2408,7 +2407,7 @@ generateByrefDisposeHelper(CodeGenFunction &CGF, CGF.FinishFunction(); - return llvm::ConstantExpr::getBitCast(Fn, CGF.Int8PtrTy); + return Fn; } /// Build the dispose helper for a __block variable. diff --git a/clang/lib/Driver/Distro.cpp b/clang/lib/Driver/Distro.cpp index 005c31bd38893c..36f828f8cae26d 100644 --- a/clang/lib/Driver/Distro.cpp +++ b/clang/lib/Driver/Distro.cpp @@ -94,6 +94,7 @@ static Distro::DistroType DetectLsbRelease(llvm::vfs::FileSystem &VFS) { .Case("kinetic", Distro::UbuntuKinetic) .Case("lunar", Distro::UbuntuLunar) .Case("mantic", Distro::UbuntuMantic) + .Case("noble", Distro::UbuntuNoble) .Default(Distro::UnknownDistro); return Version; } diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 6fd515a5f0c756..d97a103833c2fa 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -2537,6 +2537,10 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( return {ImportAction::None}; } + if (isAngled && isInNamedModule()) + Diag(FilenameTok, diag::warn_pp_include_angled_in_module_purview) + << getNamedModuleName(); + // Look up the file, create a File ID for it. SourceLocation IncludePos = FilenameTok.getLocation(); // If the filename string was the result of macro expansions, set the include diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index 38ac406b14adad..bee80db8d166a6 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -1965,9 +1965,15 @@ bool Sema::buildCoroutineParameterMoves(SourceLocation Loc) { if (PD->getType()->isDependentType()) continue; + // Preserve the referenced state for unused parameter diagnostics. + bool DeclReferenced = PD->isReferenced(); + ExprResult PDRefExpr = BuildDeclRefExpr(PD, PD->getType().getNonReferenceType(), ExprValueKind::VK_LValue, Loc); // FIXME: scope? + + PD->setReferenced(DeclReferenced); + if (PDRefExpr.isInvalid()) return false; diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 59721c8dc664aa..9044400fbb1f3f 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -2253,7 +2253,6 @@ struct ConvertConstructorToDeductionGuideTransform { Sema &SemaRef; ClassTemplateDecl *Template; - ClassTemplateDecl *NestedPattern = nullptr; DeclContext *DC = Template->getDeclContext(); CXXRecordDecl *Primary = Template->getTemplatedDecl(); @@ -2333,9 +2332,6 @@ struct ConvertConstructorToDeductionGuideTransform { Args.addOuterRetainedLevel(); } - if (NestedPattern) - Args.addOuterRetainedLevels(NestedPattern->getTemplateDepth()); - FunctionProtoTypeLoc FPTL = CD->getTypeSourceInfo()->getTypeLoc() .getAsAdjusted(); assert(FPTL && "no prototype for constructor declaration"); @@ -2445,17 +2441,10 @@ struct ConvertConstructorToDeductionGuideTransform { SmallVector ParamTypes; const FunctionProtoType *T = TL.getTypePtr(); - MultiLevelTemplateArgumentList OuterInstantiationArgs; - if (NestedPattern) - OuterInstantiationArgs = SemaRef.getTemplateInstantiationArgs(Template); - // -- The types of the function parameters are those of the constructor. for (auto *OldParam : TL.getParams()) { ParmVarDecl *NewParam = transformFunctionTypeParam(OldParam, Args, MaterializedTypedefs); - if (NestedPattern && NewParam) - NewParam = transformFunctionTypeParam(NewParam, OuterInstantiationArgs, - MaterializedTypedefs); if (!NewParam) return QualType(); ParamTypes.push_back(NewParam->getType()); @@ -2661,24 +2650,13 @@ void Sema::DeclareImplicitDeductionGuides(TemplateDecl *Template, if (BuildingDeductionGuides.isInvalid()) return; - // If the template is nested, then we need to use the original - // pattern to iterate over the constructors. - ClassTemplateDecl *Pattern = Transform.Template; - while (Pattern->getInstantiatedFromMemberTemplate()) { - if (Pattern->isMemberSpecialization()) - break; - Pattern = Pattern->getInstantiatedFromMemberTemplate(); - Transform.NestedPattern = Pattern; - } - // Convert declared constructors into deduction guide templates. // FIXME: Skip constructors for which deduction must necessarily fail (those // for which some class template parameter without a default argument never // appears in a deduced context). - ContextRAII SavedContext(*this, Pattern->getTemplatedDecl()); llvm::SmallPtrSet ProcessedCtors; bool AddedAny = false; - for (NamedDecl *D : LookupConstructors(Pattern->getTemplatedDecl())) { + for (NamedDecl *D : LookupConstructors(Transform.Primary)) { D = D->getUnderlyingDecl(); if (D->isInvalidDecl() || D->isImplicit()) continue; @@ -2724,8 +2702,6 @@ void Sema::DeclareImplicitDeductionGuides(TemplateDecl *Template, Transform.buildSimpleDeductionGuide(Transform.DeducedType)) ->getTemplatedDecl()) ->setDeductionCandidateKind(DeductionCandidate::Copy); - - SavedContext.pop(); } /// Diagnose the presence of a default template argument on a diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp index 4b7103c20557cc..898906977ba9bb 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp @@ -238,10 +238,10 @@ class StreamChecker : public Checker FnDescriptions = { - {{{"fopen"}}, {nullptr, &StreamChecker::evalFopen, ArgNone}}, + {{{"fopen"}, 2}, {nullptr, &StreamChecker::evalFopen, ArgNone}}, {{{"freopen"}, 3}, {&StreamChecker::preFreopen, &StreamChecker::evalFreopen, 2}}, - {{{"tmpfile"}}, {nullptr, &StreamChecker::evalFopen, ArgNone}}, + {{{"tmpfile"}, 0}, {nullptr, &StreamChecker::evalFopen, ArgNone}}, {{{"fclose"}, 1}, {&StreamChecker::preDefault, &StreamChecker::evalFclose, 0}}, {{{"fread"}, 4}, @@ -1037,7 +1037,7 @@ StreamChecker::ensureStreamNonNull(SVal StreamVal, const Expr *StreamE, ConstraintManager &CM = C.getConstraintManager(); ProgramStateRef StateNotNull, StateNull; - std::tie(StateNotNull, StateNull) = CM.assumeDual(C.getState(), *Stream); + std::tie(StateNotNull, StateNull) = CM.assumeDual(State, *Stream); if (!StateNotNull && StateNull) { if (ExplodedNode *N = C.generateErrorNode(StateNull)) { diff --git a/clang/test/Analysis/stream-non-posix-function.c b/clang/test/Analysis/stream-non-posix-function.c new file mode 100644 index 00000000000000..70b3ab25d02653 --- /dev/null +++ b/clang/test/Analysis/stream-non-posix-function.c @@ -0,0 +1,16 @@ +// RUN: %clang_analyze_cc1 -fno-builtin -analyzer-checker=core,alpha.unix.Stream -verify %s +// expected-no-diagnostics + +typedef struct _FILE FILE; + +// These functions are not standard C library functions. +FILE *tmpfile(const char *restrict path); // Real 'tmpfile' should have exactly 0 formal parameters. +FILE *fopen(const char *restrict path); // Real 'fopen' should have exactly 2 formal parameters. + +void test_fopen_non_posix(void) { + FILE *fp = fopen("file"); // no-leak: This isn't the standard POSIX `fopen`, we don't know the semantics of this call. +} + +void test_tmpfile_non_posix(void) { + FILE *fp = tmpfile("file"); // // no-leak: This isn't the standard POSIX `tmpfile`, we don't know the semantics of this call. +} diff --git a/clang/test/ClangScanDeps/modules-priv-fw-from-pub.m b/clang/test/ClangScanDeps/modules-priv-fw-from-pub.m new file mode 100644 index 00000000000000..cb82e0c366322c --- /dev/null +++ b/clang/test/ClangScanDeps/modules-priv-fw-from-pub.m @@ -0,0 +1,121 @@ +// This test checks that importing private headers from the public headers of +// a framework is consistent between the dependency scanner and the explicit build. + +// RUN: rm -rf %t +// RUN: split-file %s %t + +//--- frameworks/FW.framework/Modules/module.modulemap +framework module FW { header "FW.h" } +//--- frameworks/FW.framework/Modules/module.private.modulemap +framework module FW_Private { header "FW_Private.h"} +//--- frameworks/FW.framework/Headers/FW.h +#include +//--- frameworks/FW.framework/PrivateHeaders/FW_Private.h +@import Dependency; + +//--- modules/module.modulemap +module Dependency { header "dependency.h" } +//--- modules/dependency.h +// empty + +//--- tu.m +#include + +//--- cdb.json.in +[{ + "file": "DIR/tu.m", + "directory": "DIR", + "command": "clang -fmodules -fmodules-cache-path=DIR/cache -fimplicit-module-maps -I DIR/modules -F DIR/frameworks -Wno-framework-include-private-from-public -Wno-atimport-in-framework-header -c DIR/tu.m -o DIR/tu.o" +}] + +// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json +// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full > %t/deps.json + +// Check that FW is reported to depend on FW_Private. +// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t +// CHECK: { +// CHECK-NEXT: "modules": [ +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [], +// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/modules/module.modulemap", +// CHECK-NEXT: "command-line": [ +// CHECK: ], +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/modules/dependency.h", +// CHECK-NEXT: "[[PREFIX]]/modules/module.modulemap" +// CHECK-NEXT: ], +// CHECK-NEXT: "name": "Dependency" +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "module-name": "FW_Private" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/frameworks/FW.framework/Modules/module.modulemap", +// CHECK-NEXT: "command-line": [ +// CHECK: ], +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/frameworks/FW.framework/Headers/FW.h", +// CHECK-NEXT: "[[PREFIX]]/frameworks/FW.framework/Modules/module.modulemap", +// CHECK-NEXT: "[[PREFIX]]/frameworks/FW.framework/Modules/module.private.modulemap" +// CHECK-NEXT: ], +// CHECK-NEXT: "name": "FW" +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "module-name": "Dependency" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/frameworks/FW.framework/Modules/module.private.modulemap", +// CHECK-NEXT: "command-line": [ +// CHECK: ], +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/frameworks/FW.framework/Modules/module.private.modulemap", +// CHECK-NEXT: "[[PREFIX]]/frameworks/FW.framework/PrivateHeaders/FW_Private.h", +// CHECK-NEXT: "[[PREFIX]]/modules/module.modulemap" +// CHECK-NEXT: ], +// CHECK-NEXT: "name": "FW_Private" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "translation-units": [ +// CHECK-NEXT: { +// CHECK-NEXT: "commands": [ +// CHECK-NEXT: { +// CHECK-NEXT: "clang-context-hash": "{{.*}}", +// CHECK-NEXT: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "module-name": "FW" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "command-line": [ +// CHECK: ], +// CHECK-NEXT: "executable": "clang", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/tu.m" +// CHECK-NEXT: ], +// CHECK-NEXT: "input-file": "[[PREFIX]]/tu.m" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } + +// Check that building FW succeeds. If FW_Private was to be treated textually, +// building FW would fail due to Dependency not being present on the command line. +// RUN: %deps-to-rsp %t/deps.json --module-name=Dependency > %t/Dependency.cc1.rsp +// RUN: %deps-to-rsp %t/deps.json --module-name=FW_Private > %t/FW_Private.cc1.rsp +// RUN: %deps-to-rsp %t/deps.json --module-name=FW > %t/FW.cc1.rsp +// RUN: %deps-to-rsp %t/deps.json --tu-index=0 > %t/tu.rsp + +// RUN: %clang @%t/Dependency.cc1.rsp +// RUN: %clang @%t/FW_Private.cc1.rsp +// RUN: %clang @%t/FW.cc1.rsp +// RUN: %clang @%t/tu.rsp diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c new file mode 100644 index 00000000000000..b0607425336e28 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - | \ +// RUN: FileCheck --check-prefix=CHECK-C %s +// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - -x c++ | \ +// RUN: FileCheck --check-prefix=CHECK-CXX %s + +#include + +// CHECK-C: define{{.*}} void @test1() +// CHECK-CXX: define{{.*}} i64 @_Z5test12u1(i64{{[^,]*}}) +union u1 { }; +union u1 test1(union u1 a) { + return a; +} + +struct s1 { + union u1 u; + int i; + float f; +}; + +// CHECK-C: define{{.*}} { i32, float } @test2(i32{{[^,]*}}, float{{[^,]*}}) +/// FIXME: This doesn't match g++. +// CHECK-CXX: define{{.*}} { i32, float } @_Z5test22s1(i32{{[^,]*}}, float{{[^,]*}}) +struct s1 test2(struct s1 a) { + return a; +} diff --git a/clang/test/CodeGen/arm64_32-vaarg.c b/clang/test/CodeGen/arm64_32-vaarg.c index 9fbcf88ecfdcc3..3f1f4443436da1 100644 --- a/clang/test/CodeGen/arm64_32-vaarg.c +++ b/clang/test/CodeGen/arm64_32-vaarg.c @@ -29,7 +29,7 @@ long long test_longlong(OneLongLong input, va_list *mylist) { // CHECK-LABEL: define{{.*}} i64 @test_longlong(i64 %input // CHECK: [[STARTPTR:%.*]] = load ptr, ptr %mylist // CHECK: [[ALIGN_TMP:%.+]] = getelementptr inbounds i8, ptr [[STARTPTR]], i32 7 - // CHECK: [[ALIGNED_ADDR:%.+]] = tail call ptr @llvm.ptrmask.p0.i32(ptr nonnull [[ALIGN_TMP]], i32 -8) + // CHECK: [[ALIGNED_ADDR:%.+]] = tail call align 8 ptr @llvm.ptrmask.p0.i32(ptr nonnull [[ALIGN_TMP]], i32 -8) // CHECK: [[NEXT:%.*]] = getelementptr inbounds i8, ptr [[ALIGNED_ADDR]], i32 8 // CHECK: store ptr [[NEXT]], ptr %mylist diff --git a/clang/test/Preprocessor/include-in-module-purview.cppm b/clang/test/Preprocessor/include-in-module-purview.cppm new file mode 100644 index 00000000000000..0a080112b43277 --- /dev/null +++ b/clang/test/Preprocessor/include-in-module-purview.cppm @@ -0,0 +1,60 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -E -P -I%t -o %t/tmp 2>&1 | FileCheck %t/a.cppm +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -E -P -I%t -o - 2>&1 \ +// RUN: -Wno-include-angled-in-module-purview | FileCheck %t/a.cppm --check-prefix=CHECK-NO-WARN + +//--- a.h +// left empty + +//--- b.h +#include +// The headers not get included shouldn't be affected. +#ifdef WHATEVER +#include +#endif + +//--- a.cppm +module; +#include +#include +#include +#include "a.h" +#include "b.h" +export module a; + +#include +#include +#include +#include "a.h" +#include "b.h" + +// CHECK: a.cppm:9:10: warning: '#include ' attaches the declarations to the named module 'a' +// CHECK: a.cppm:10:10: warning: '#include ' attaches the declarations to the named module 'a' +// CHECK: a.cppm:11:10: warning: '#include ' attaches the declarations to the named module 'a' +// CHECK: In file included from {{.*}}/a.cppm:11 +// CHECK-NEXT: b.h:1:10: warning: '#include ' attaches the declarations to the named module 'a' +// CHECK: In file included from {{.*}}/a.cppm:13 +// CHECK-NEXT: b.h:1:10: warning: '#include ' attaches the declarations to the named module 'a' + +module :private; +#include +#include +#include +#include "a.h" +#include "b.h" + +// CHECK: a.cppm:24:10: warning: '#include ' attaches the declarations to the named module 'a' +// CHECK: a.cppm:25:10: warning: '#include ' attaches the declarations to the named module 'a' +// CHECK: a.cppm:26:10: warning: '#include ' attaches the declarations to the named module 'a' +// CHECK: In file included from {{.*}}/a.cppm:26 +// CHECK-NEXT: b.h:1:10: warning: '#include ' attaches the declarations to the named module 'a' +// CHECK: In file included from {{.*}}/a.cppm:28 +// CHECK-NEXT: b.h:1:10: warning: '#include ' attaches the declarations to the named module 'a' + +// We should have catched all warnings. +// CHECK: 10 warnings generated. + +// CHECK-NO-WARN-NOT: warning diff --git a/clang/test/SemaCXX/warn-unused-parameters-coroutine.cpp b/clang/test/SemaCXX/warn-unused-parameters-coroutine.cpp new file mode 100644 index 00000000000000..b4c01550f9f788 --- /dev/null +++ b/clang/test/SemaCXX/warn-unused-parameters-coroutine.cpp @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -fsyntax-only -Wunused-parameter -verify -std=c++20 %s + +#include "Inputs/std-coroutine.h" + +struct awaitable { + bool await_ready() noexcept; + void await_resume() noexcept; + void await_suspend(std::coroutine_handle<>) noexcept; +}; + +struct task : awaitable { + struct promise_type { + task get_return_object() noexcept; + awaitable initial_suspend() noexcept; + awaitable final_suspend() noexcept; + void unhandled_exception() noexcept; + void return_void() noexcept; + }; +}; + +task foo(int a) { // expected-warning{{unused parameter 'a'}} + co_return; +} + +task bar(int a, int b) { // expected-warning{{unused parameter 'b'}} + a = a + 1; + co_return; +} + +void create_closure() { + auto closure = [](int c) -> task { // expected-warning{{unused parameter 'c'}} + co_return; + }; +} diff --git a/clang/test/SemaTemplate/nested-deduction-guides.cpp b/clang/test/SemaTemplate/nested-deduction-guides.cpp index 38410b93ead3b9..2c5dda456a138a 100644 --- a/clang/test/SemaTemplate/nested-deduction-guides.cpp +++ b/clang/test/SemaTemplate/nested-deduction-guides.cpp @@ -4,15 +4,10 @@ template struct A { template struct B { B(...); - B(const B &) = default; }; template B(U) -> B; }; A::B b = 123; -A::B copy = b; using T = decltype(b); using T = A::B; - -using Copy = decltype(copy); -using Copy = A::B; diff --git a/clang/test/SemaTemplate/nested-implicit-deduction-guides.cpp b/clang/test/SemaTemplate/nested-implicit-deduction-guides.cpp deleted file mode 100644 index 10b70f9c8c4f35..00000000000000 --- a/clang/test/SemaTemplate/nested-implicit-deduction-guides.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// RUN: %clang_cc1 -std=c++17 -verify %s -// expected-no-diagnostics - -template struct S { - template struct N { - N(T) {} - N(T, U) {} - template N(V, U) {} - }; -}; - -S::N x{"a", 1}; - -using T = decltype(x); -using T = S::N; diff --git a/clang/unittests/Serialization/SourceLocationEncodingTest.cpp b/clang/unittests/Serialization/SourceLocationEncodingTest.cpp index 2640ea4a599893..141da4c27f8d0b 100644 --- a/clang/unittests/Serialization/SourceLocationEncodingTest.cpp +++ b/clang/unittests/Serialization/SourceLocationEncodingTest.cpp @@ -25,8 +25,9 @@ void roundTrip(SourceLocation::UIntTy Loc, std::optional ExpectedEncoded = std::nullopt) { uint64_t ActualEncoded = SourceLocationEncoding::encode(SourceLocation::getFromRawEncoding(Loc)); - if (ExpectedEncoded) + if (ExpectedEncoded) { ASSERT_EQ(ActualEncoded, *ExpectedEncoded) << "Encoding " << Loc; + } SourceLocation::UIntTy DecodedEncoded = SourceLocationEncoding::decode(ActualEncoded).getRawEncoding(); ASSERT_EQ(DecodedEncoded, Loc) << "Decoding " << ActualEncoded; @@ -41,9 +42,10 @@ void roundTrip(std::vector Locs, for (auto L : Locs) ActualEncoded.push_back(SourceLocationEncoding::encode( SourceLocation::getFromRawEncoding(L), Seq)); - if (!ExpectedEncoded.empty()) + if (!ExpectedEncoded.empty()) { ASSERT_EQ(ActualEncoded, ExpectedEncoded) << "Encoding " << testing::PrintToString(Locs); + } } std::vector DecodedEncoded; { @@ -70,7 +72,7 @@ TEST(SourceLocationEncoding, Individual) { roundTrip(Big); roundTrip(Big + 1); roundTrip(MacroBit | Big); - roundTrip(MacroBit | Big + 1); + roundTrip(MacroBit | (Big + 1)); } TEST(SourceLocationEncoding, Sequence) { diff --git a/compiler-rt/include/sanitizer/common_interface_defs.h b/compiler-rt/include/sanitizer/common_interface_defs.h index 0bbade454244a0..5a78bb1a23a29d 100644 --- a/compiler-rt/include/sanitizer/common_interface_defs.h +++ b/compiler-rt/include/sanitizer/common_interface_defs.h @@ -298,7 +298,7 @@ void SANITIZER_CDECL __sanitizer_symbolize_global(void *data_ptr, #define __sanitizer_return_address() \ __builtin_extract_return_addr(__builtin_return_address(0)) #else -extern "C" void *SANITIZER_CDECL _ReturnAddress(void); +void *SANITIZER_CDECL _ReturnAddress(void); #pragma intrinsic(_ReturnAddress) #define __sanitizer_return_address() _ReturnAddress() #endif diff --git a/compiler-rt/lib/profile/InstrProfiling.c b/compiler-rt/lib/profile/InstrProfiling.c index bc1f1d35bfe967..78930723d573a4 100644 --- a/compiler-rt/lib/profile/InstrProfiling.c +++ b/compiler-rt/lib/profile/InstrProfiling.c @@ -90,6 +90,6 @@ COMPILER_RT_VISIBILITY void __llvm_profile_reset_counters(void) { lprofSetProfileDumped(0); } -inline int hasCorrelation() { +int hasCorrelation() { return (__llvm_profile_get_version() & VARIANT_MASK_DBG_CORRELATE) != 0ULL; } diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh index 6eb9fa7abb7e0c..520e0a631cb4a3 100755 --- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh +++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash # # Run as: CLANG=bin/clang build_symbolizer.sh out.o +# If you want to use a local copy of zlib, set ZLIB_SRC. # zlib can be downloaded from http://www.zlib.net. # # Script compiles self-contained object file with symbolization code. @@ -20,6 +21,12 @@ set -u SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) SRC_DIR=$(readlink -f $SCRIPT_DIR/..) + +if [[ $# -ne 1 ]]; then + echo "Missing output file" + exit 1 +fi + OUTPUT=$(readlink -f $1) COMPILER_RT_SRC=$(readlink -f ${SCRIPT_DIR}/../../../..) LLVM_SRC=${LLVM_SRC:-${COMPILER_RT_SRC}/../llvm} @@ -52,6 +59,7 @@ LLVM_BUILD=${BUILD_DIR}/llvm SYMBOLIZER_BUILD=${BUILD_DIR}/symbolizer FLAGS=${FLAGS:-} +ZLIB_SRC=${ZLIB_SRC:-} TARGET_TRIPLE=$($CC -print-target-triple $FLAGS) if [[ "$FLAGS" =~ "-m32" ]] ; then # Avoid new wrappers. @@ -63,7 +71,15 @@ FLAGS+=" -include ${SRC_DIR}/../sanitizer_redefine_builtins.h -DSANITIZER_COMMON LINKFLAGS="-fuse-ld=lld -target $TARGET_TRIPLE" # Build zlib. -[[ -d ${ZLIB_BUILD} ]] || git clone https://github.com/madler/zlib ${ZLIB_BUILD} +if [[ ! -d ${ZLIB_BUILD} ]]; then + if [[ -z "${ZLIB_SRC}" ]]; then + git clone https://github.com/madler/zlib ${ZLIB_BUILD} + else + ZLIB_SRC=$(readlink -f $ZLIB_SRC) + cp -r ${ZLIB_SRC}/* ${ZLIB_BUILD}/ + fi +fi + cd ${ZLIB_BUILD} AR="${AR}" CC="${CC}" CFLAGS="$FLAGS -Wno-deprecated-non-prototype" RANLIB=/bin/true ./configure --static make -j libz.a diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 8218af691b79c8..809fd3b3be7cfd 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -1592,9 +1592,9 @@ createLoopOp(Fortran::lower::AbstractConverter &converter, addOperand(operands, operandSegments, workerNum); addOperand(operands, operandSegments, vectorNum); addOperands(operands, operandSegments, tileOperands); + addOperands(operands, operandSegments, cacheOperands); addOperands(operands, operandSegments, privateOperands); addOperands(operands, operandSegments, reductionOperands); - addOperands(operands, operandSegments, cacheOperands); auto loopOp = createRegionOp( builder, currentLocation, eval, operands, operandSegments); @@ -3295,9 +3295,12 @@ void Fortran::lower::finalizeOpenACCRoutineAttachment( mlir::func::FuncOp funcOp = mod.lookupSymbol(mapping.first); if (!funcOp) - llvm::report_fatal_error( - "could not find function to attach OpenACC routine information."); - attachRoutineInfo(funcOp, mapping.second); + mlir::emitWarning(mod.getLoc(), + llvm::Twine("function '") + llvm::Twine(mapping.first) + + llvm::Twine("' in acc routine directive is not " + "found in this translation unit.")); + else + attachRoutineInfo(funcOp, mapping.second); } accRoutineInfos.clear(); } diff --git a/flang/lib/Optimizer/Dialect/FIRType.cpp b/flang/lib/Optimizer/Dialect/FIRType.cpp index 323b589cc7f2eb..efaead599bee16 100644 --- a/flang/lib/Optimizer/Dialect/FIRType.cpp +++ b/flang/lib/Optimizer/Dialect/FIRType.cpp @@ -545,7 +545,9 @@ std::string getTypeAsString(mlir::Type ty, const fir::KindMapping &kindMap, break; } else if (auto charTy = mlir::dyn_cast_or_null(ty)) { name << 'c' << kindMap.getCharacterBitsize(charTy.getFKind()); - if (charTy.getLen() != fir::CharacterType::singleton()) + if (charTy.getLen() == fir::CharacterType::unknownLen()) + name << "xU"; + else if (charTy.getLen() != fir::CharacterType::singleton()) name << "x" << charTy.getLen(); break; } else if (auto seqTy = mlir::dyn_cast_or_null(ty)) { @@ -574,6 +576,9 @@ std::string getTypeAsString(mlir::Type ty, const fir::KindMapping &kindMap, } else if (auto boxTy = mlir::dyn_cast_or_null(ty)) { name << "box_"; ty = boxTy.getEleTy(); + } else if (auto boxcharTy = mlir::dyn_cast_or_null(ty)) { + name << "boxchar_"; + ty = boxcharTy.getEleTy(); } else if (auto recTy = mlir::dyn_cast_or_null(ty)) { name << "rec_" << recTy.getName(); break; diff --git a/flang/unittests/Optimizer/FIRTypesTest.cpp b/flang/unittests/Optimizer/FIRTypesTest.cpp index b66c1ff441bade..238aa6939d5df3 100644 --- a/flang/unittests/Optimizer/FIRTypesTest.cpp +++ b/flang/unittests/Optimizer/FIRTypesTest.cpp @@ -313,4 +313,6 @@ TEST_F(FIRTypesTest, getTypeAsString) { fir::getTypeAsString( fir::LLVMPointerType::get(mlir::IntegerType::get(&context, 32)), *kindMap)); + EXPECT_EQ("boxchar_c8xU", + fir::getTypeAsString(fir::BoxCharType::get(&context, 1), *kindMap)); } diff --git a/libc/utils/gpu/loader/amdgpu/Loader.cpp b/libc/utils/gpu/loader/amdgpu/Loader.cpp index 2f99076a720e2a..b1b3aa6ce028ca 100644 --- a/libc/utils/gpu/loader/amdgpu/Loader.cpp +++ b/libc/utils/gpu/loader/amdgpu/Loader.cpp @@ -487,21 +487,22 @@ int load(int argc, char **argv, char **envp, void *image, size_t size, handle_error(err); hsa_amd_agents_allow_access(1, &dev_agent, nullptr, host_clock_freq); - if (hsa_status_t err = - hsa_agent_get_info(dev_agent, - static_cast( - HSA_AMD_AGENT_INFO_TIMESTAMP_FREQUENCY), - host_clock_freq)) - handle_error(err); - - void *freq_addr; - if (hsa_status_t err = hsa_executable_symbol_get_info( - freq_sym, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &freq_addr)) - handle_error(err); - - if (hsa_status_t err = hsa_memcpy(freq_addr, dev_agent, host_clock_freq, - host_agent, sizeof(uint64_t))) - handle_error(err); + if (HSA_STATUS_SUCCESS == + hsa_agent_get_info(dev_agent, + static_cast( + HSA_AMD_AGENT_INFO_TIMESTAMP_FREQUENCY), + host_clock_freq)) { + + void *freq_addr; + if (hsa_status_t err = hsa_executable_symbol_get_info( + freq_sym, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, + &freq_addr)) + handle_error(err); + + if (hsa_status_t err = hsa_memcpy(freq_addr, dev_agent, host_clock_freq, + host_agent, sizeof(uint64_t))) + handle_error(err); + } } // Obtain a queue with the minimum (power of two) size, used to send commands diff --git a/libcxx/docs/ReleaseNotes/18.rst b/libcxx/docs/ReleaseNotes/18.rst index ac78563aa73848..41c3f7cc455f18 100644 --- a/libcxx/docs/ReleaseNotes/18.rst +++ b/libcxx/docs/ReleaseNotes/18.rst @@ -72,6 +72,11 @@ Improvements and New Features Deprecations and Removals ------------------------- +- Availability macros which will never trigger an error have been removed. This includes anything that has been + introduced before macOS 10.13, iOS 12, tvOS 12 and watchOS 4. This shouldn't affect anybody, since AppleClang 15 + doesn't support any older OSes. If you are a vendor and make use of these macros, please inform the libc++ team so we + can re-introduce them and consider upstreaming support for your platform. + - The non-conforming constructor ``std::future_error(std::error_code)`` has been removed. Please use the ``std::future_error(std::future_errc)`` constructor provided in C++17 instead. diff --git a/libcxx/docs/index.rst b/libcxx/docs/index.rst index 72c80d7dc954a3..c4199dc69d7c26 100644 --- a/libcxx/docs/index.rst +++ b/libcxx/docs/index.rst @@ -127,7 +127,7 @@ Libc++ also supports common platforms and architectures: =============== ========================= ============================ Target platform Target architecture Notes =============== ========================= ============================ -macOS 10.9+ i386, x86_64, arm64 Building the shared library itself requires targetting macOS 10.13+ +macOS 10.13+ i386, x86_64, arm64 FreeBSD 12+ i386, x86_64, arm Linux i386, x86_64, arm, arm64 Only glibc-2.24 and later and no other libc is officially supported Android 5.0+ i386, x86_64, arm, arm64 diff --git a/libcxx/include/__availability b/libcxx/include/__availability index a6367945edc7c6..99a16c968de3c6 100644 --- a/libcxx/include/__availability +++ b/libcxx/include/__availability @@ -87,11 +87,6 @@ #if defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) - // This controls the availability of std::shared_mutex and std::shared_timed_mutex, - // which were added to the dylib later. -// # define _LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX -# define _LIBCPP_AVAILABILITY_SHARED_MUTEX - // These macros control the availability of std::bad_optional_access and // other exception types. These were put in the shared library to prevent // code bloat from every user program defining the vtable for these exception @@ -109,41 +104,6 @@ // # define _LIBCPP_AVAILABILITY_HAS_NO_BAD_ANY_CAST # define _LIBCPP_AVAILABILITY_BAD_ANY_CAST - // This controls the availability of std::uncaught_exceptions(). -// # define _LIBCPP_AVAILABILITY_HAS_NO_UNCAUGHT_EXCEPTIONS -# define _LIBCPP_AVAILABILITY_UNCAUGHT_EXCEPTIONS - - // This controls the availability of the sized version of ::operator delete, - // ::operator delete[], and their align_val_t variants, which were all added - // in C++17, and hence not present in early dylibs. -// # define _LIBCPP_AVAILABILITY_HAS_NO_SIZED_NEW_DELETE -# define _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE - - // This controls the availability of the std::future_error exception. - // - // Note that when exceptions are disabled, the methods that normally throw - // std::future_error can be used even on older deployment targets, but those - // methods will abort instead of throwing. -// # define _LIBCPP_AVAILABILITY_HAS_NO_FUTURE_ERROR -# define _LIBCPP_AVAILABILITY_FUTURE_ERROR - - // This controls the availability of std::type_info's vtable. - // I can't imagine how using std::type_info can work at all if - // this isn't supported. -// # define _LIBCPP_AVAILABILITY_HAS_NO_TYPEINFO_VTABLE -# define _LIBCPP_AVAILABILITY_TYPEINFO_VTABLE - - // This controls the availability of std::locale::category members - // (e.g. std::locale::collate), which are defined in the dylib. -// # define _LIBCPP_AVAILABILITY_HAS_NO_LOCALE_CATEGORY -# define _LIBCPP_AVAILABILITY_LOCALE_CATEGORY - - // This controls the availability of atomic operations on std::shared_ptr - // (e.g. `std::atomic_store(std::shared_ptr)`), which require a shared - // lock table located in the dylib. -// # define _LIBCPP_AVAILABILITY_HAS_NO_ATOMIC_SHARED_PTR -# define _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR - // These macros control the availability of all parts of that // depend on something in the dylib. // # define _LIBCPP_AVAILABILITY_HAS_NO_FILESYSTEM_LIBRARY @@ -192,100 +152,14 @@ #elif defined(__APPLE__) - // shared_mutex and shared_timed_mutex -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101200) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 100000) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 100000) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 30000) -# define _LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX -# endif -# define _LIBCPP_AVAILABILITY_SHARED_MUTEX \ - __attribute__((availability(macos,strict,introduced=10.12))) \ - __attribute__((availability(ios,strict,introduced=10.0))) \ - __attribute__((availability(tvos,strict,introduced=10.0))) \ - __attribute__((availability(watchos,strict,introduced=3.0))) - - // bad_optional_access, bad_variant_access and bad_any_cast - // Note: bad_optional_access & friends were not introduced in the matching - // macOS and iOS versions, so the version mismatch between macOS and others - // is intended. -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101300) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 120000) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 120000) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 50000) -# define _LIBCPP_AVAILABILITY_HAS_NO_BAD_OPTIONAL_ACCESS -# define _LIBCPP_AVAILABILITY_HAS_NO_BAD_VARIANT_ACCESS -# define _LIBCPP_AVAILABILITY_HAS_NO_BAD_ANY_CAST -# endif -# define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS \ - __attribute__((availability(macos,strict,introduced=10.13))) \ - __attribute__((availability(ios,strict,introduced=12.0))) \ - __attribute__((availability(tvos,strict,introduced=12.0))) \ - __attribute__((availability(watchos,strict,introduced=5.0))) -# define _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS \ - _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS -# define _LIBCPP_AVAILABILITY_BAD_ANY_CAST \ - _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS - - // uncaught_exceptions -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101200) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 100000) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 100000) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 30000) -# define _LIBCPP_AVAILABILITY_HAS_NO_UNCAUGHT_EXCEPTIONS -# endif -# define _LIBCPP_AVAILABILITY_UNCAUGHT_EXCEPTIONS \ - __attribute__((availability(macos,strict,introduced=10.12))) \ - __attribute__((availability(ios,strict,introduced=10.0))) \ - __attribute__((availability(tvos,strict,introduced=10.0))) \ - __attribute__((availability(watchos,strict,introduced=3.0))) - - // sized operator new and sized operator delete -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101200) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 100000) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 100000) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 30000) -# define _LIBCPP_AVAILABILITY_HAS_NO_SIZED_NEW_DELETE -# endif -# define _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE \ - __attribute__((availability(macos,strict,introduced=10.12))) \ - __attribute__((availability(ios,strict,introduced=10.0))) \ - __attribute__((availability(tvos,strict,introduced=10.0))) \ - __attribute__((availability(watchos,strict,introduced=3.0))) - - // future_error -# if (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 60000) -# define _LIBCPP_AVAILABILITY_HAS_NO_FUTURE_ERROR -# endif -# define _LIBCPP_AVAILABILITY_FUTURE_ERROR \ - __attribute__((availability(ios,strict,introduced=6.0))) - - // type_info's vtable -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 100900) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 70000) -# define _LIBCPP_AVAILABILITY_HAS_NO_TYPEINFO_VTABLE -# endif -# define _LIBCPP_AVAILABILITY_TYPEINFO_VTABLE \ - __attribute__((availability(macos,strict,introduced=10.9))) \ - __attribute__((availability(ios,strict,introduced=7.0))) - - // locale::category -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 100900) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 70000) -# define _LIBCPP_AVAILABILITY_HAS_NO_LOCALE_CATEGORY -# endif -# define _LIBCPP_AVAILABILITY_LOCALE_CATEGORY \ - __attribute__((availability(macos,strict,introduced=10.9))) \ - __attribute__((availability(ios,strict,introduced=7.0))) - - // atomic operations on shared_ptr -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 100900) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 70000) -# define _LIBCPP_AVAILABILITY_HAS_NO_ATOMIC_SHARED_PTR -# endif -# define _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR \ - __attribute__((availability(macos,strict,introduced=10.9))) \ - __attribute__((availability(ios,strict,introduced=7.0))) +# if (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 50000) +# define _LIBCPP_AVAILABILITY_HAS_NO_BAD_OPTIONAL_ACCESS +# define _LIBCPP_AVAILABILITY_HAS_NO_BAD_VARIANT_ACCESS +# define _LIBCPP_AVAILABILITY_HAS_NO_BAD_ANY_CAST +# endif +# define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS __attribute__((availability(watchos,strict,introduced=5.0))) +# define _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS +# define _LIBCPP_AVAILABILITY_BAD_ANY_CAST _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS // # if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) || \ @@ -385,12 +259,10 @@ // Those are defined in terms of the availability attributes above, and // should not be vendor-specific. #if defined(_LIBCPP_HAS_NO_EXCEPTIONS) -# define _LIBCPP_AVAILABILITY_FUTURE # define _LIBCPP_AVAILABILITY_THROW_BAD_ANY_CAST # define _LIBCPP_AVAILABILITY_THROW_BAD_OPTIONAL_ACCESS # define _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS #else -# define _LIBCPP_AVAILABILITY_FUTURE _LIBCPP_AVAILABILITY_FUTURE_ERROR # define _LIBCPP_AVAILABILITY_THROW_BAD_ANY_CAST _LIBCPP_AVAILABILITY_BAD_ANY_CAST # define _LIBCPP_AVAILABILITY_THROW_BAD_OPTIONAL_ACCESS _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS # define _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS diff --git a/libcxx/include/__exception/operations.h b/libcxx/include/__exception/operations.h index 78e7c7a5d0b51d..8f374c0ccee50b 100644 --- a/libcxx/include/__exception/operations.h +++ b/libcxx/include/__exception/operations.h @@ -31,7 +31,7 @@ _LIBCPP_EXPORTED_FROM_ABI terminate_handler set_terminate(terminate_handler) _NO _LIBCPP_EXPORTED_FROM_ABI terminate_handler get_terminate() _NOEXCEPT; _LIBCPP_EXPORTED_FROM_ABI bool uncaught_exception() _NOEXCEPT; -_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_AVAILABILITY_UNCAUGHT_EXCEPTIONS int uncaught_exceptions() _NOEXCEPT; +_LIBCPP_EXPORTED_FROM_ABI int uncaught_exceptions() _NOEXCEPT; class _LIBCPP_EXPORTED_FROM_ABI exception_ptr; diff --git a/libcxx/include/__locale b/libcxx/include/__locale index 90dcad3590c3d2..b1502dd71edadf 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -78,7 +78,7 @@ public: class _LIBCPP_EXPORTED_FROM_ABI id; typedef int category; - _LIBCPP_AVAILABILITY_LOCALE_CATEGORY + static const category // values assigned here are for exposition only none = 0, collate = LC_COLLATE_MASK, diff --git a/libcxx/include/__memory/shared_ptr.h b/libcxx/include/__memory/shared_ptr.h index 96e1f80bdede5c..10e36bc87610bb 100644 --- a/libcxx/include/__memory/shared_ptr.h +++ b/libcxx/include/__memory/shared_ptr.h @@ -1985,8 +1985,7 @@ class _LIBCPP_EXPORTED_FROM_ABI __sp_mut friend _LIBCPP_EXPORTED_FROM_ABI __sp_mut& __get_sp_mut(const void*); }; -_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR -__sp_mut& __get_sp_mut(const void*); +_LIBCPP_EXPORTED_FROM_ABI __sp_mut& __get_sp_mut(const void*); template inline _LIBCPP_INLINE_VISIBILITY @@ -1997,7 +1996,6 @@ atomic_is_lock_free(const shared_ptr<_Tp>*) } template -_LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> atomic_load(const shared_ptr<_Tp>* __p) { @@ -2010,7 +2008,6 @@ atomic_load(const shared_ptr<_Tp>* __p) template inline _LIBCPP_INLINE_VISIBILITY -_LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR shared_ptr<_Tp> atomic_load_explicit(const shared_ptr<_Tp>* __p, memory_order) { @@ -2018,7 +2015,6 @@ atomic_load_explicit(const shared_ptr<_Tp>* __p, memory_order) } template -_LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR _LIBCPP_HIDE_FROM_ABI void atomic_store(shared_ptr<_Tp>* __p, shared_ptr<_Tp> __r) { @@ -2030,7 +2026,6 @@ atomic_store(shared_ptr<_Tp>* __p, shared_ptr<_Tp> __r) template inline _LIBCPP_INLINE_VISIBILITY -_LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR void atomic_store_explicit(shared_ptr<_Tp>* __p, shared_ptr<_Tp> __r, memory_order) { @@ -2038,7 +2033,6 @@ atomic_store_explicit(shared_ptr<_Tp>* __p, shared_ptr<_Tp> __r, memory_order) } template -_LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> atomic_exchange(shared_ptr<_Tp>* __p, shared_ptr<_Tp> __r) { @@ -2051,7 +2045,6 @@ atomic_exchange(shared_ptr<_Tp>* __p, shared_ptr<_Tp> __r) template inline _LIBCPP_INLINE_VISIBILITY -_LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR shared_ptr<_Tp> atomic_exchange_explicit(shared_ptr<_Tp>* __p, shared_ptr<_Tp> __r, memory_order) { @@ -2059,7 +2052,6 @@ atomic_exchange_explicit(shared_ptr<_Tp>* __p, shared_ptr<_Tp> __r, memory_order } template -_LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR _LIBCPP_HIDE_FROM_ABI bool atomic_compare_exchange_strong(shared_ptr<_Tp>* __p, shared_ptr<_Tp>* __v, shared_ptr<_Tp> __w) { @@ -2081,7 +2073,6 @@ atomic_compare_exchange_strong(shared_ptr<_Tp>* __p, shared_ptr<_Tp>* __v, share template inline _LIBCPP_INLINE_VISIBILITY -_LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR bool atomic_compare_exchange_weak(shared_ptr<_Tp>* __p, shared_ptr<_Tp>* __v, shared_ptr<_Tp> __w) { @@ -2090,7 +2081,6 @@ atomic_compare_exchange_weak(shared_ptr<_Tp>* __p, shared_ptr<_Tp>* __v, shared_ template inline _LIBCPP_INLINE_VISIBILITY -_LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR bool atomic_compare_exchange_strong_explicit(shared_ptr<_Tp>* __p, shared_ptr<_Tp>* __v, shared_ptr<_Tp> __w, memory_order, memory_order) @@ -2100,7 +2090,6 @@ atomic_compare_exchange_strong_explicit(shared_ptr<_Tp>* __p, shared_ptr<_Tp>* _ template inline _LIBCPP_INLINE_VISIBILITY -_LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR bool atomic_compare_exchange_weak_explicit(shared_ptr<_Tp>* __p, shared_ptr<_Tp>* __v, shared_ptr<_Tp> __w, memory_order, memory_order) diff --git a/libcxx/include/__utility/small_buffer.h b/libcxx/include/__utility/small_buffer.h index 0c6d4986a58514..b029ca96c2d1cd 100644 --- a/libcxx/include/__utility/small_buffer.h +++ b/libcxx/include/__utility/small_buffer.h @@ -62,7 +62,7 @@ class __small_buffer { } template - _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE _LIBCPP_HIDE_FROM_ABI _Stored* __alloc() { + _LIBCPP_HIDE_FROM_ABI _Stored* __alloc() { if constexpr (__fits_in_buffer<_Stored>) { return std::launder(reinterpret_cast<_Stored*>(__buffer_)); } else { @@ -73,13 +73,13 @@ class __small_buffer { } template - _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE _LIBCPP_HIDE_FROM_ABI void __dealloc() noexcept { + _LIBCPP_HIDE_FROM_ABI void __dealloc() noexcept { if constexpr (!__fits_in_buffer<_Stored>) ::operator delete[](*reinterpret_cast(__buffer_), sizeof(_Stored), align_val_t{alignof(_Stored)}); } template - _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE _LIBCPP_HIDE_FROM_ABI void __construct(_Args&&... __args) { + _LIBCPP_HIDE_FROM_ABI void __construct(_Args&&... __args) { _Stored* __buffer = __alloc<_Stored>(); auto __guard = std::__make_exception_guard([&] { __dealloc<_Stored>(); }); std::construct_at(__buffer, std::forward<_Args>(__args)...); diff --git a/libcxx/include/future b/libcxx/include/future index 97b22aa458bbf2..9836228c628c71 100644 --- a/libcxx/include/future +++ b/libcxx/include/future @@ -516,15 +516,9 @@ make_error_condition(future_errc __e) _NOEXCEPT return error_condition(static_cast(__e), future_category()); } -_LIBCPP_NORETURN inline _LIBCPP_HIDE_FROM_ABI -#ifndef _LIBCPP_HAS_NO_EXCEPTIONS -_LIBCPP_AVAILABILITY_FUTURE_ERROR -#endif -void __throw_future_error(future_errc __ev); +_LIBCPP_NORETURN inline _LIBCPP_HIDE_FROM_ABI void __throw_future_error(future_errc __ev); -class _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_AVAILABILITY_FUTURE_ERROR future_error - : public logic_error -{ +class _LIBCPP_EXPORTED_FROM_ABI future_error : public logic_error { error_code __ec_; future_error(error_code); @@ -554,9 +548,7 @@ void __throw_future_error(future_errc __ev) #endif } -class _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_AVAILABILITY_FUTURE __assoc_sub_state - : public __shared_count -{ +class _LIBCPP_EXPORTED_FROM_ABI __assoc_sub_state : public __shared_count { protected: exception_ptr __exception_; mutable mutex __mut_; @@ -642,8 +634,7 @@ __assoc_sub_state::wait_for(const chrono::duration<_Rep, _Period>& __rel_time) c } template -class _LIBCPP_AVAILABILITY_FUTURE _LIBCPP_HIDDEN __assoc_state - : public __assoc_sub_state +class _LIBCPP_HIDDEN __assoc_state : public __assoc_sub_state { typedef __assoc_sub_state base; _LIBCPP_SUPPRESS_DEPRECATED_PUSH @@ -676,7 +667,6 @@ __assoc_state<_Rp>::__on_zero_shared() _NOEXCEPT template template -_LIBCPP_AVAILABILITY_FUTURE void __assoc_state<_Rp>::set_value(_Arg&& __arg) { @@ -724,8 +714,7 @@ __assoc_state<_Rp>::copy() } template -class _LIBCPP_AVAILABILITY_FUTURE __assoc_state<_Rp&> - : public __assoc_sub_state +class __assoc_state<_Rp&> : public __assoc_sub_state { typedef __assoc_sub_state base; typedef _Rp* _Up; @@ -784,8 +773,7 @@ __assoc_state<_Rp&>::copy() } template -class _LIBCPP_AVAILABILITY_FUTURE __assoc_state_alloc - : public __assoc_state<_Rp> +class __assoc_state_alloc : public __assoc_state<_Rp> { typedef __assoc_state<_Rp> base; _Alloc __alloc_; @@ -812,8 +800,7 @@ __assoc_state_alloc<_Rp, _Alloc>::__on_zero_shared() _NOEXCEPT } template -class _LIBCPP_AVAILABILITY_FUTURE __assoc_state_alloc<_Rp&, _Alloc> - : public __assoc_state<_Rp&> +class __assoc_state_alloc<_Rp&, _Alloc> : public __assoc_state<_Rp&> { typedef __assoc_state<_Rp&> base; _Alloc __alloc_; @@ -838,8 +825,7 @@ __assoc_state_alloc<_Rp&, _Alloc>::__on_zero_shared() _NOEXCEPT } template -class _LIBCPP_AVAILABILITY_FUTURE __assoc_sub_state_alloc - : public __assoc_sub_state +class __assoc_sub_state_alloc : public __assoc_sub_state { typedef __assoc_sub_state base; _Alloc __alloc_; @@ -864,8 +850,7 @@ __assoc_sub_state_alloc<_Alloc>::__on_zero_shared() _NOEXCEPT } template -class _LIBCPP_AVAILABILITY_FUTURE __deferred_assoc_state - : public __assoc_state<_Rp> +class __deferred_assoc_state : public __assoc_state<_Rp> { typedef __assoc_state<_Rp> base; @@ -905,8 +890,7 @@ __deferred_assoc_state<_Rp, _Fp>::__execute() } template -class _LIBCPP_AVAILABILITY_FUTURE __deferred_assoc_state - : public __assoc_sub_state +class __deferred_assoc_state : public __assoc_sub_state { typedef __assoc_sub_state base; @@ -947,8 +931,7 @@ __deferred_assoc_state::__execute() } template -class _LIBCPP_AVAILABILITY_FUTURE __async_assoc_state - : public __assoc_state<_Rp> +class __async_assoc_state : public __assoc_state<_Rp> { typedef __assoc_state<_Rp> base; @@ -996,8 +979,7 @@ __async_assoc_state<_Rp, _Fp>::__on_zero_shared() _NOEXCEPT } template -class _LIBCPP_AVAILABILITY_FUTURE __async_assoc_state - : public __assoc_sub_state +class __async_assoc_state : public __assoc_sub_state { typedef __assoc_sub_state base; @@ -1061,7 +1043,7 @@ _LIBCPP_INLINE_VISIBILITY future<_Rp> __make_async_assoc_state(_Fp&& __f); template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FUTURE future +class _LIBCPP_TEMPLATE_VIS future { __assoc_state<_Rp>* __state_; @@ -1148,7 +1130,7 @@ future<_Rp>::get() } template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FUTURE future<_Rp&> +class _LIBCPP_TEMPLATE_VIS future<_Rp&> { __assoc_state<_Rp&>* __state_; @@ -1230,7 +1212,7 @@ future<_Rp&>::get() } template <> -class _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_AVAILABILITY_FUTURE future +class _LIBCPP_EXPORTED_FROM_ABI future { __assoc_sub_state* __state_; @@ -1300,7 +1282,7 @@ swap(future<_Rp>& __x, future<_Rp>& __y) _NOEXCEPT template class packaged_task; template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FUTURE promise +class _LIBCPP_TEMPLATE_VIS promise { __assoc_state<_Rp>* __state_; @@ -1442,7 +1424,7 @@ promise<_Rp>::set_exception_at_thread_exit(exception_ptr __p) // promise template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FUTURE promise<_Rp&> +class _LIBCPP_TEMPLATE_VIS promise<_Rp&> { __assoc_state<_Rp&>* __state_; @@ -1565,7 +1547,7 @@ promise<_Rp&>::set_exception_at_thread_exit(exception_ptr __p) // promise template <> -class _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_AVAILABILITY_FUTURE promise +class _LIBCPP_EXPORTED_FROM_ABI promise { __assoc_sub_state* __state_; @@ -1638,7 +1620,7 @@ template template class __packaged_task_base; template -class _LIBCPP_AVAILABILITY_FUTURE __packaged_task_base<_Rp(_ArgTypes...)> +class __packaged_task_base<_Rp(_ArgTypes...)> { __packaged_task_base(const __packaged_task_base&); __packaged_task_base& operator=(const __packaged_task_base&); @@ -1656,8 +1638,7 @@ public: template class __packaged_task_func; template -class _LIBCPP_AVAILABILITY_FUTURE __packaged_task_func<_Fp, _Alloc, _Rp(_ArgTypes...)> - : public __packaged_task_base<_Rp(_ArgTypes...)> +class __packaged_task_func<_Fp, _Alloc, _Rp(_ArgTypes...)> : public __packaged_task_base<_Rp(_ArgTypes...)> { __compressed_pair<_Fp, _Alloc> __f_; public: @@ -1714,7 +1695,7 @@ __packaged_task_func<_Fp, _Alloc, _Rp(_ArgTypes...)>::operator()(_ArgTypes&& ... template class __packaged_task_function; template -class _LIBCPP_AVAILABILITY_FUTURE __packaged_task_function<_Rp(_ArgTypes...)> +class __packaged_task_function<_Rp(_ArgTypes...)> { typedef __packaged_task_base<_Rp(_ArgTypes...)> __base; @@ -1898,7 +1879,7 @@ __packaged_task_function<_Rp(_ArgTypes...)>::operator()(_ArgTypes... __arg) cons } template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FUTURE packaged_task<_Rp(_ArgTypes...)> +class _LIBCPP_TEMPLATE_VIS packaged_task<_Rp(_ArgTypes...)> { public: typedef _Rp result_type; // extension @@ -2013,7 +1994,7 @@ packaged_task<_Rp(_ArgTypes...)>::reset() } template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FUTURE packaged_task +class _LIBCPP_TEMPLATE_VIS packaged_task { public: typedef void result_type; // extension @@ -2384,7 +2365,7 @@ shared_future<_Rp&>::operator=(const shared_future& __rhs) } template <> -class _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_AVAILABILITY_FUTURE shared_future +class _LIBCPP_EXPORTED_FROM_ABI shared_future { __assoc_sub_state* __state_; diff --git a/libcxx/include/new b/libcxx/include/new index e95f2e46d0fe34..cb280743b6e186 100644 --- a/libcxx/include/new +++ b/libcxx/include/new @@ -216,7 +216,7 @@ _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new(st _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p) _NOEXCEPT; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, const std::nothrow_t&) _NOEXCEPT; #ifndef _LIBCPP_HAS_NO_LIBRARY_SIZED_DEALLOCATION -_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE void operator delete(void* __p, std::size_t __sz) _NOEXCEPT; +_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::size_t __sz) _NOEXCEPT; #endif _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new[](std::size_t __sz) _THROW_BAD_ALLOC; @@ -224,7 +224,7 @@ _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new[]( _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p) _NOEXCEPT; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, const std::nothrow_t&) _NOEXCEPT; #ifndef _LIBCPP_HAS_NO_LIBRARY_SIZED_DEALLOCATION -_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE void operator delete[](void* __p, std::size_t __sz) _NOEXCEPT; +_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::size_t __sz) _NOEXCEPT; #endif #ifndef _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION @@ -233,7 +233,7 @@ _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new(st _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::align_val_t) _NOEXCEPT; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::align_val_t, const std::nothrow_t&) _NOEXCEPT; #ifndef _LIBCPP_HAS_NO_LIBRARY_SIZED_DEALLOCATION -_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE void operator delete(void* __p, std::size_t __sz, std::align_val_t) _NOEXCEPT; +_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::size_t __sz, std::align_val_t) _NOEXCEPT; #endif _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new[](std::size_t __sz, std::align_val_t) _THROW_BAD_ALLOC; @@ -241,7 +241,7 @@ _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new[]( _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::align_val_t) _NOEXCEPT; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::align_val_t, const std::nothrow_t&) _NOEXCEPT; #ifndef _LIBCPP_HAS_NO_LIBRARY_SIZED_DEALLOCATION -_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE void operator delete[](void* __p, std::size_t __sz, std::align_val_t) _NOEXCEPT; +_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::size_t __sz, std::align_val_t) _NOEXCEPT; #endif #endif diff --git a/libcxx/include/shared_mutex b/libcxx/include/shared_mutex index 350c841e11a0a7..3118676d4e6b95 100644 --- a/libcxx/include/shared_mutex +++ b/libcxx/include/shared_mutex @@ -153,7 +153,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -struct _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_AVAILABILITY_SHARED_MUTEX __shared_mutex_base { +struct _LIBCPP_EXPORTED_FROM_ABI __shared_mutex_base { mutex __mut_; condition_variable __gate1_; condition_variable __gate2_; @@ -183,8 +183,7 @@ struct _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_AVAILABILITY_SHARED_MUTEX __shared_mute }; # if _LIBCPP_STD_VER >= 17 -class _LIBCPP_EXPORTED_FROM_ABI - _LIBCPP_AVAILABILITY_SHARED_MUTEX _LIBCPP_THREAD_SAFETY_ANNOTATION(__capability__("shared_mutex")) shared_mutex { +class _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_THREAD_SAFETY_ANNOTATION(__capability__("shared_mutex")) shared_mutex { __shared_mutex_base __base_; public: @@ -222,8 +221,8 @@ public: }; # endif -class _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_AVAILABILITY_SHARED_MUTEX _LIBCPP_THREAD_SAFETY_ANNOTATION( - __capability__("shared_timed_mutex")) shared_timed_mutex { +class _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_THREAD_SAFETY_ANNOTATION(__capability__("shared_timed_mutex")) + shared_timed_mutex { __shared_mutex_base __base_; public: diff --git a/libcxx/include/typeinfo b/libcxx/include/typeinfo index 59bc291454c3d9..cf9c29dbad0036 100644 --- a/libcxx/include/typeinfo +++ b/libcxx/include/typeinfo @@ -92,7 +92,6 @@ class _LIBCPP_EXPORTED_FROM_ABI type_info int __compare(const type_info &__rhs) const _NOEXCEPT; public: - _LIBCPP_AVAILABILITY_TYPEINFO_VTABLE virtual ~type_info(); const char *name() const _NOEXCEPT; @@ -314,7 +313,6 @@ class _LIBCPP_EXPORTED_FROM_ABI type_info : __type_name(__impl::__string_to_type_name(__n)) {} public: - _LIBCPP_AVAILABILITY_TYPEINFO_VTABLE virtual ~type_info(); _LIBCPP_INLINE_VISIBILITY diff --git a/libcxx/include/version b/libcxx/include/version index c41ce745b59304..3c8e1a132e87f8 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -248,7 +248,7 @@ __cpp_lib_within_lifetime 202306L # define __cpp_lib_quoted_string_io 201304L # define __cpp_lib_result_of_sfinae 201210L # define __cpp_lib_robust_nonmodifying_seq_ops 201304L -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # define __cpp_lib_shared_timed_mutex 201402L # endif # define __cpp_lib_string_udls 201304L @@ -303,7 +303,7 @@ __cpp_lib_within_lifetime 202306L # define __cpp_lib_raw_memory_algorithms 201606L # define __cpp_lib_sample 201603L # define __cpp_lib_scoped_lock 201703L -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # define __cpp_lib_shared_mutex 201505L # endif # define __cpp_lib_shared_ptr_arrays 201611L diff --git a/libcxx/test/libcxx/experimental/memory/memory.resource.global/global_memory_resource_lifetime.pass.cpp b/libcxx/test/libcxx/experimental/memory/memory.resource.global/global_memory_resource_lifetime.pass.cpp index fc12e65bdc7204..87ffb28a07c78d 100644 --- a/libcxx/test/libcxx/experimental/memory/memory.resource.global/global_memory_resource_lifetime.pass.cpp +++ b/libcxx/test/libcxx/experimental/memory/memory.resource.global/global_memory_resource_lifetime.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: availability-aligned_allocation-missing - // // memory_resource * new_delete_resource() diff --git a/libcxx/test/libcxx/experimental/memory/memory.resource.global/new_delete_resource_lifetime.pass.cpp b/libcxx/test/libcxx/experimental/memory/memory.resource.global/new_delete_resource_lifetime.pass.cpp index 5e2e02fe18b489..bba56e7dc1dd5f 100644 --- a/libcxx/test/libcxx/experimental/memory/memory.resource.global/new_delete_resource_lifetime.pass.cpp +++ b/libcxx/test/libcxx/experimental/memory/memory.resource.global/new_delete_resource_lifetime.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: availability-aligned_allocation-missing - // // memory_resource * new_delete_resource() diff --git a/libcxx/test/libcxx/language.support/support.dynamic/aligned_alloc_availability.verify.cpp b/libcxx/test/libcxx/language.support/support.dynamic/aligned_alloc_availability.verify.cpp deleted file mode 100644 index 159ab9b9988666..00000000000000 --- a/libcxx/test/libcxx/language.support/support.dynamic/aligned_alloc_availability.verify.cpp +++ /dev/null @@ -1,50 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// Make sure we get compile-time availability errors when trying to use aligned -// allocation/deallocation on deployment targets that don't support it (before macosx10.13). - -// UNSUPPORTED: c++03, c++11, c++14 -// REQUIRES: availability-aligned_allocation-missing - -#include -#include - -#include "test_macros.h" - -constexpr auto OverAligned = __STDCPP_DEFAULT_NEW_ALIGNMENT__ * 2; - -struct alignas(OverAligned) A { }; - -void f() { - // Normal versions - { - A *a1 = new A; // expected-error-re {{aligned allocation function of type {{.+}} is only available on}} - // `delete` is also required by the line above if construction fails - // expected-error-re@-2 {{aligned deallocation function of type {{.+}} is only available on}} - - delete a1; // expected-error-re {{aligned deallocation function of type {{.+}} is only available on}} - - A* a2 = new(std::nothrow) A; // expected-error-re {{aligned allocation function of type {{.+}} is only available on}} - // `delete` is also required above for the same reason - // expected-error-re@-2 {{aligned deallocation function of type {{.+}} is only available on}} - } - - // Array versions - { - A *a1 = new A[2]; // expected-error-re {{aligned allocation function of type {{.+}} is only available on}} - // `delete` is also required by the line above if construction fails - // expected-error-re@-2 {{aligned deallocation function of type {{.+}} is only available on}} - - delete[] a1; // expected-error-re {{aligned deallocation function of type {{.+}} is only available on}} - - A* a2 = new(std::nothrow) A[2]; // expected-error-re {{aligned allocation function of type {{.+}} is only available on}} - // `delete` is also required above for the same reason - // expected-error-re@-2 {{aligned deallocation function of type {{.+}} is only available on}} - } -} diff --git a/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp b/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp index 3b0849873ac914..fb56ce4518a718 100644 --- a/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp +++ b/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp @@ -9,10 +9,6 @@ // test libc++'s implementation of align_val_t, and the relevant new/delete // overloads in all dialects when -faligned-allocation is present. -// Some dylibs do not contain the aligned allocation functions, so trying to force -// using those with -faligned-allocation results in a link error. -// XFAIL: availability-aligned_allocation-missing - // Libc++ when built for z/OS doesn't contain the aligned allocation functions, // nor does the dynamic library shipped with z/OS. // UNSUPPORTED: target={{.+}}-zos{{.*}} diff --git a/libcxx/test/libcxx/language.support/support.dynamic/new_faligned_allocation.pass.cpp b/libcxx/test/libcxx/language.support/support.dynamic/new_faligned_allocation.pass.cpp index b6d3b5c1eb803b..9cf1b275abc55c 100644 --- a/libcxx/test/libcxx/language.support/support.dynamic/new_faligned_allocation.pass.cpp +++ b/libcxx/test/libcxx/language.support/support.dynamic/new_faligned_allocation.pass.cpp @@ -9,10 +9,6 @@ // test libc++'s implementation of align_val_t, and the relevant new/delete // overloads in all dialects when -faligned-allocation is present. -// Some dylibs do not contain the aligned allocation functions, so trying to force -// using those with -faligned-allocation results in a link error. -// XFAIL: availability-aligned_allocation-missing - // Libc++ when built for z/OS doesn't contain the aligned allocation functions, // nor does the dynamic library shipped with z/OS. // UNSUPPORTED: target={{.+}}-zos{{.*}} diff --git a/libcxx/test/libcxx/memory/aligned_allocation_macro.compile.pass.cpp b/libcxx/test/libcxx/memory/aligned_allocation_macro.compile.pass.cpp index d26339730be9ce..34a3146e26c958 100644 --- a/libcxx/test/libcxx/memory/aligned_allocation_macro.compile.pass.cpp +++ b/libcxx/test/libcxx/memory/aligned_allocation_macro.compile.pass.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-aligned_allocation-missing #include diff --git a/libcxx/test/libcxx/selftest/convenience_substitutions/verify.sh.cpp b/libcxx/test/libcxx/selftest/convenience_substitutions/verify.sh.cpp new file mode 100644 index 00000000000000..d1713b44a086d2 --- /dev/null +++ b/libcxx/test/libcxx/selftest/convenience_substitutions/verify.sh.cpp @@ -0,0 +1,16 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: verify-support + +// Make sure that we provide the %{verify} convenience substitution. + +// RUN: %{verify} + +struct Foo {}; +typedef Foo::x x; // expected-error {{no type named 'x' in 'Foo'}} diff --git a/libcxx/test/libcxx/thread/thread.shared_mutex/thread_safety.verify.cpp b/libcxx/test/libcxx/thread/thread.shared_mutex/thread_safety.verify.cpp index 19d8aab292b87a..fee940fff847ba 100644 --- a/libcxx/test/libcxx/thread/thread.shared_mutex/thread_safety.verify.cpp +++ b/libcxx/test/libcxx/thread/thread.shared_mutex/thread_safety.verify.cpp @@ -8,7 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: no-threads -// UNSUPPORTED: availability-shared_mutex-missing // REQUIRES: thread-safety // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS diff --git a/libcxx/test/libcxx/thread/thread.shared_timed_mutex/thread_safety.verify.cpp b/libcxx/test/libcxx/thread/thread.shared_timed_mutex/thread_safety.verify.cpp index 9fa1ef7d9996e8..baa8bc614f1495 100644 --- a/libcxx/test/libcxx/thread/thread.shared_timed_mutex/thread_safety.verify.cpp +++ b/libcxx/test/libcxx/thread/thread.shared_timed_mutex/thread_safety.verify.cpp @@ -8,7 +8,6 @@ // UNSUPPORTED: c++03, c++11 // UNSUPPORTED: no-threads -// UNSUPPORTED: availability-shared_mutex-missing // REQUIRES: thread-safety // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp index 2c8161eedbc4de..e446bcf5cfc976 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp @@ -39,15 +39,21 @@ struct TestFn { { A a(T(1)); static_assert(noexcept(std::atomic_notify_all(&a)), ""); - auto f = [&]() { + + std::atomic is_ready[2] = {false, false}; + auto f = [&](int index) { assert(std::atomic_load(&a) == T(1)); + is_ready[index].store(true); + std::atomic_wait(&a, T(1)); assert(std::atomic_load(&a) == T(3)); }; - std::thread t1 = support::make_test_thread(f); - std::thread t2 = support::make_test_thread(f); - std::this_thread::sleep_for(std::chrono::milliseconds(100)); + std::thread t1 = support::make_test_thread(f, /*index=*/0); + std::thread t2 = support::make_test_thread(f, /*index=*/1); + while (!is_ready[0] || !is_ready[1]) { + // Spin + } std::atomic_store(&a, T(3)); std::atomic_notify_all(&a); t1.join(); diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/default.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/default.pass.cpp index 2f5a43d812064d..099c39bceea3ea 100644 --- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/default.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/default.pass.cpp @@ -11,8 +11,6 @@ // test_memory_resource requires RTTI for dynamic_cast // UNSUPPORTED: no-rtti -// XFAIL: availability-aligned_allocation-missing - // // template class polymorphic_allocator diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.eq/equal.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.eq/equal.pass.cpp index 3153c562151f82..475c4cee4fa5aa 100644 --- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.eq/equal.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.eq/equal.pass.cpp @@ -11,8 +11,6 @@ // test_memory_resource requires RTTI for dynamic_cast // UNSUPPORTED: no-rtti -// XFAIL: availability-aligned_allocation-missing - // // template class polymorphic_allocator; diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.eq/not_equal.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.eq/not_equal.pass.cpp index 908e32cb553f5b..3376459b311e08 100644 --- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.eq/not_equal.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.eq/not_equal.pass.cpp @@ -11,8 +11,6 @@ // test_memory_resource requires RTTI for dynamic_cast // UNSUPPORTED: no-rtti -// XFAIL: availability-aligned_allocation-missing - // // template class polymorphic_allocator; diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/allocate.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/allocate.pass.cpp index 289d2ee6accd62..b2dc33737f1a8a 100644 --- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/allocate.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/allocate.pass.cpp @@ -11,8 +11,6 @@ // test_memory_resource requires RTTI for dynamic_cast // UNSUPPORTED: no-rtti -// XFAIL: availability-aligned_allocation-missing - // // template class polymorphic_allocator diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair.pass.cpp index 17b9758b7f5284..4899587274923f 100644 --- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: availability-aligned_allocation-missing - // // template class polymorphic_allocator diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair_const_lvalue_pair.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair_const_lvalue_pair.pass.cpp index 0c09d4c72fa798..96a13b481460be 100644 --- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair_const_lvalue_pair.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair_const_lvalue_pair.pass.cpp @@ -11,8 +11,6 @@ // test_memory_resource requires RTTI for dynamic_cast // UNSUPPORTED: no-rtti -// XFAIL: availability-aligned_allocation-missing - // // template class polymorphic_allocator diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair_rvalue.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair_rvalue.pass.cpp index e442e7e9a72036..c7be5efe914356 100644 --- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair_rvalue.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair_rvalue.pass.cpp @@ -11,8 +11,6 @@ // test_memory_resource requires RTTI for dynamic_cast // UNSUPPORTED: no-rtti -// XFAIL: availability-aligned_allocation-missing - // // template class polymorphic_allocator diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair_values.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair_values.pass.cpp index d203864a4d99d2..03e466e3821a09 100644 --- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair_values.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair_values.pass.cpp @@ -11,8 +11,6 @@ // test_memory_resource requires RTTI for dynamic_cast // UNSUPPORTED: no-rtti -// XFAIL: availability-aligned_allocation-missing - // // template class polymorphic_allocator diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_piecewise_pair.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_piecewise_pair.pass.cpp index c7f49fdb659572..7e3f92bed4421f 100644 --- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_piecewise_pair.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_piecewise_pair.pass.cpp @@ -11,8 +11,6 @@ // test_memory_resource requires RTTI for dynamic_cast // UNSUPPORTED: no-rtti -// XFAIL: availability-aligned_allocation-missing - // // template class polymorphic_allocator diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_piecewise_pair_evil.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_piecewise_pair_evil.pass.cpp index 47fbb997ae9ca5..7b8ce2ccd451ec 100644 --- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_piecewise_pair_evil.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_piecewise_pair_evil.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: availability-aligned_allocation-missing - // // template class polymorphic_allocator diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_types.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_types.pass.cpp index 252044a8aa51da..2b7a46d1ef01dc 100644 --- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_types.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_types.pass.cpp @@ -11,8 +11,6 @@ // test_memory_resource requires RTTI for dynamic_cast // UNSUPPORTED: no-rtti -// XFAIL: availability-aligned_allocation-missing - // // template class polymorphic_allocator diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/deallocate.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/deallocate.pass.cpp index bdcc5815ca7c0f..1911ebd378b9e2 100644 --- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/deallocate.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/deallocate.pass.cpp @@ -11,8 +11,6 @@ // test_memory_resource requires RTTI for dynamic_cast // UNSUPPORTED: no-rtti -// XFAIL: availability-aligned_allocation-missing - // // template class polymorphic_allocator diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/destroy.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/destroy.pass.cpp index 4075404e069248..dc99d8aea63d42 100644 --- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/destroy.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/destroy.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: availability-aligned_allocation-missing - // // template class polymorphic_allocator diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/resource.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/resource.pass.cpp index afe614b52f8d56..e47784760bf02b 100644 --- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/resource.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/resource.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: availability-aligned_allocation-missing - // // template class polymorphic_allocator diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/select_on_container_copy_construction.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/select_on_container_copy_construction.pass.cpp index d80cb2f689b57b..698773a2c5f16b 100644 --- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/select_on_container_copy_construction.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/select_on_container_copy_construction.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: availability-aligned_allocation-missing - // // template class polymorphic_allocator diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_deque_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_deque_synop.pass.cpp index 919c867bd6ea9d..6dc013b7f7cc2f 100644 --- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_deque_synop.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_deque_synop.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: availability-aligned_allocation-missing - // // namespace std { namespace experimental { namespace pmr { diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_forward_list_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_forward_list_synop.pass.cpp index 0a3c771c7b5425..fa726ed1f5a389 100644 --- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_forward_list_synop.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_forward_list_synop.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: availability-aligned_allocation-missing - // // namespace std { namespace experimental { namespace pmr { diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_list_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_list_synop.pass.cpp index cd9150ca5d43be..5fee88359b857a 100644 --- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_list_synop.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_list_synop.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: availability-aligned_allocation-missing - // // namespace std { namespace experimental { namespace pmr { diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_map_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_map_synop.pass.cpp index b59b7f14412b68..2f7e9067a82b49 100644 --- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_map_synop.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_map_synop.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: availability-aligned_allocation-missing - // // namespace std { namespace experimental { namespace pmr { diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_regex_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_regex_synop.pass.cpp index c650c870db01dc..6d452bdfe35175 100644 --- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_regex_synop.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_regex_synop.pass.cpp @@ -9,8 +9,6 @@ // UNSUPPORTED: c++03 // UNSUPPORTED: no-localization -// XFAIL: availability-aligned_allocation-missing - // // namespace std { namespace experimental { namespace pmr { diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_set_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_set_synop.pass.cpp index dcb829063fb5b5..b6f1251027342e 100644 --- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_set_synop.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_set_synop.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: availability-aligned_allocation-missing - // // namespace std { namespace experimental { namespace pmr { diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_string_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_string_synop.pass.cpp index 818c6691be91c2..ddd2783d17ce1b 100644 --- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_string_synop.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_string_synop.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: availability-aligned_allocation-missing - // // namespace std { namespace experimental { namespace pmr { diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_unordered_map_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_unordered_map_synop.pass.cpp index 5f55cc8d016d33..b7dcdf698d7a28 100644 --- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_unordered_map_synop.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_unordered_map_synop.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: availability-aligned_allocation-missing - // // namespace std { namespace experimental { namespace pmr { diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_unordered_set_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_unordered_set_synop.pass.cpp index 4c80a0ca60c435..30908dd7ee38eb 100644 --- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_unordered_set_synop.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_unordered_set_synop.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: availability-aligned_allocation-missing - // // namespace std { namespace experimental { namespace pmr { diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_vector_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_vector_synop.pass.cpp index 783843b7460431..f2dbcd323cbe3f 100644 --- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_vector_synop.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_vector_synop.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: availability-aligned_allocation-missing - // // namespace std { namespace experimental { namespace pmr { diff --git a/libcxx/test/std/experimental/memory/memory.resource.global/default_resource.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.global/default_resource.pass.cpp index 2378d37cc341cd..35f3f4a6eea0cb 100644 --- a/libcxx/test/std/experimental/memory/memory.resource.global/default_resource.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.resource.global/default_resource.pass.cpp @@ -11,8 +11,6 @@ // test_memory_resource requires RTTI for dynamic_cast // UNSUPPORTED: no-rtti -// XFAIL: availability-aligned_allocation-missing - // // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS diff --git a/libcxx/test/std/experimental/memory/memory.resource.global/new_delete_resource.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.global/new_delete_resource.pass.cpp index f308752752cb7f..a6223123cd97be 100644 --- a/libcxx/test/std/experimental/memory/memory.resource.global/new_delete_resource.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.resource.global/new_delete_resource.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: availability-aligned_allocation-missing - // // memory_resource * new_delete_resource() diff --git a/libcxx/test/std/experimental/memory/memory.resource.global/null_memory_resource.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.global/null_memory_resource.pass.cpp index e9f57db3a0845b..e1bb4c4a7a00d2 100644 --- a/libcxx/test/std/experimental/memory/memory.resource.global/null_memory_resource.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.resource.global/null_memory_resource.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: availability-aligned_allocation-missing - // // memory_resource * null_memory_resource() diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align.pass.cpp index b7bfee9f58609a..c903c63f68c4ad 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align.pass.cpp @@ -13,9 +13,6 @@ // asan and msan will not call the new handler. // UNSUPPORTED: sanitizer-new-delete -// We get availability markup errors when aligned allocation is missing -// XFAIL: availability-aligned_allocation-missing - // Libc++ when built for z/OS doesn't contain the aligned allocation functions, // nor does the dynamic library shipped with z/OS. // UNSUPPORTED: target={{.+}}-zos{{.*}} diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align.replace.indirect.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align.replace.indirect.pass.cpp index 64d987ed691b29..dcaa76a650d37c 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align.replace.indirect.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align.replace.indirect.pass.cpp @@ -15,9 +15,6 @@ // XFAIL: LIBCXX-AIX-FIXME -// We get availability markup errors when aligned allocation is missing -// XFAIL: availability-aligned_allocation-missing - // Libc++ when built for z/OS doesn't contain the aligned allocation functions, // nor does the dynamic library shipped with z/OS. // UNSUPPORTED: target={{.+}}-zos{{.*}} diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align_nothrow.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align_nothrow.pass.cpp index ca1e529d6b74f4..0343d51f184d90 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align_nothrow.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align_nothrow.pass.cpp @@ -13,9 +13,6 @@ // asan and msan will not call the new handler. // UNSUPPORTED: sanitizer-new-delete -// We get availability markup errors when aligned allocation is missing -// XFAIL: availability-aligned_allocation-missing - // Libc++ when built for z/OS doesn't contain the aligned allocation functions, // nor does the dynamic library shipped with z/OS. // UNSUPPORTED: target={{.+}}-zos{{.*}} diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align_nothrow.replace.indirect.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align_nothrow.replace.indirect.pass.cpp index a62128942eaf63..eba8a9026fa459 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align_nothrow.replace.indirect.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align_nothrow.replace.indirect.pass.cpp @@ -16,9 +16,6 @@ // XFAIL: LIBCXX-AIX-FIXME -// We get availability markup errors when aligned allocation is missing -// XFAIL: availability-aligned_allocation-missing - // Libc++ when built for z/OS doesn't contain the aligned allocation functions, // nor does the dynamic library shipped with z/OS. // UNSUPPORTED: target={{.+}}-zos{{.*}} diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align_nothrow.replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align_nothrow.replace.pass.cpp index 9a830e0e82990a..62a040e297ae56 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align_nothrow.replace.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new.size_align_nothrow.replace.pass.cpp @@ -13,9 +13,6 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: sanitizer-new-delete -// We get availability markup errors when aligned allocation is missing -// XFAIL: availability-aligned_allocation-missing - // Libc++ when built for z/OS doesn't contain the aligned allocation functions, // nor does the dynamic library shipped with z/OS. // UNSUPPORTED: target={{.+}}-zos{{.*}} diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/nodiscard.verify.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/nodiscard.verify.cpp index cb5ea43fdef360..0f30cf0135a417 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/nodiscard.verify.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/nodiscard.verify.cpp @@ -17,9 +17,6 @@ // [[nodiscard]] enabled before C++20 in libc++ as an extension // UNSUPPORTED: (c++11 || c++14 || c++17) && !stdlib=libc++ -// We get availability markup errors when aligned allocation is missing -// XFAIL: availability-aligned_allocation-missing - // Libc++ when built for z/OS doesn't contain the aligned allocation functions, // nor does the dynamic library shipped with z/OS. // UNSUPPORTED: target={{.+}}-zos{{.*}} diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new.size_align.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new.size_align.pass.cpp index 49b797f69d09ba..93edb32130c3e2 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new.size_align.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new.size_align.pass.cpp @@ -10,9 +10,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// We get availability markup errors when aligned allocation is missing -// XFAIL: availability-aligned_allocation-missing - // asan and msan will not call the new handler. // UNSUPPORTED: sanitizer-new-delete diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new.size_align_nothrow.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new.size_align_nothrow.pass.cpp index 7149a377f983aa..4e5d36cd7c6dfc 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new.size_align_nothrow.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new.size_align_nothrow.pass.cpp @@ -10,9 +10,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// We get availability markup errors when aligned allocation is missing -// XFAIL: availability-aligned_allocation-missing - // asan and msan will not call the new handler. // UNSUPPORTED: sanitizer-new-delete diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new.size_align_nothrow.replace.indirect.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new.size_align_nothrow.replace.indirect.pass.cpp index 3cbb5aaf8f0be6..4a18ad2df8f2aa 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new.size_align_nothrow.replace.indirect.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new.size_align_nothrow.replace.indirect.pass.cpp @@ -15,9 +15,6 @@ // XFAIL: LIBCXX-AIX-FIXME -// We get availability markup errors when aligned allocation is missing -// XFAIL: availability-aligned_allocation-missing - // Libc++ when built for z/OS doesn't contain the aligned allocation functions, // nor does the dynamic library shipped with z/OS. // UNSUPPORTED: target={{.+}}-zos{{.*}} diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new.size_align_nothrow.replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new.size_align_nothrow.replace.pass.cpp index 25a3bf767307f4..3c9e17b0b02be5 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new.size_align_nothrow.replace.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new.size_align_nothrow.replace.pass.cpp @@ -13,9 +13,6 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: sanitizer-new-delete -// We get availability markup errors when aligned allocation is missing -// XFAIL: availability-aligned_allocation-missing - // Libc++ when built for z/OS doesn't contain the aligned allocation functions, // nor does the dynamic library shipped with z/OS. // UNSUPPORTED: target={{.+}}-zos{{.*}} diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/nodiscard.verify.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/nodiscard.verify.cpp index d7547ae5783d86..16d6a223eb5350 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/nodiscard.verify.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/nodiscard.verify.cpp @@ -17,9 +17,6 @@ // [[nodiscard]] enabled before C++20 in libc++ as an extension // UNSUPPORTED: (c++11 || c++14 || c++17) && !stdlib=libc++ -// We get availability markup errors when aligned allocation is missing -// XFAIL: availability-aligned_allocation-missing - // Libc++ when built for z/OS doesn't contain the aligned allocation functions, // nor does the dynamic library shipped with z/OS. // UNSUPPORTED: target={{.+}}-zos{{.*}} diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/shared_mutex.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/shared_mutex.version.compile.pass.cpp index be00ed79d0c497..6662de9c33f30e 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/shared_mutex.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/shared_mutex.version.compile.pass.cpp @@ -41,7 +41,7 @@ # error "__cpp_lib_shared_mutex should not be defined before c++17" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++14" # endif @@ -50,13 +50,13 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)' is not met!" +# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS)' is not met!" # endif # endif #elif TEST_STD_VER == 17 -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++17" # endif @@ -65,11 +65,11 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)' is not met!" +# error "__cpp_lib_shared_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS)' is not met!" # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++17" # endif @@ -78,13 +78,13 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)' is not met!" +# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS)' is not met!" # endif # endif #elif TEST_STD_VER == 20 -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++20" # endif @@ -93,11 +93,11 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)' is not met!" +# error "__cpp_lib_shared_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS)' is not met!" # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++20" # endif @@ -106,13 +106,13 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)' is not met!" +# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS)' is not met!" # endif # endif #elif TEST_STD_VER == 23 -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++23" # endif @@ -121,11 +121,11 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)' is not met!" +# error "__cpp_lib_shared_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS)' is not met!" # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++23" # endif @@ -134,13 +134,13 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)' is not met!" +# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS)' is not met!" # endif # endif #elif TEST_STD_VER > 23 -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++26" # endif @@ -149,11 +149,11 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)' is not met!" +# error "__cpp_lib_shared_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS)' is not met!" # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++26" # endif @@ -162,7 +162,7 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)' is not met!" +# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS)' is not met!" # endif # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp index ed86c555221c54..3cc7ed5f6d2227 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp @@ -1545,7 +1545,7 @@ # error "__cpp_lib_shared_ptr_weak_type should not be defined before c++17" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++14" # endif @@ -1554,7 +1554,7 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)' is not met!" +# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS)' is not met!" # endif # endif @@ -2471,7 +2471,7 @@ # error "__cpp_lib_semaphore should not be defined before c++20" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++17" # endif @@ -2480,7 +2480,7 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)' is not met!" +# error "__cpp_lib_shared_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS)' is not met!" # endif # endif @@ -2498,7 +2498,7 @@ # error "__cpp_lib_shared_ptr_weak_type should have the value 201606L in c++17" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++17" # endif @@ -2507,7 +2507,7 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)' is not met!" +# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS)' is not met!" # endif # endif @@ -3682,7 +3682,7 @@ # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++20" # endif @@ -3691,7 +3691,7 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)' is not met!" +# error "__cpp_lib_shared_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS)' is not met!" # endif # endif @@ -3709,7 +3709,7 @@ # error "__cpp_lib_shared_ptr_weak_type should have the value 201606L in c++20" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++20" # endif @@ -3718,7 +3718,7 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)' is not met!" +# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS)' is not met!" # endif # endif @@ -5127,7 +5127,7 @@ # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++23" # endif @@ -5136,7 +5136,7 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)' is not met!" +# error "__cpp_lib_shared_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS)' is not met!" # endif # endif @@ -5154,7 +5154,7 @@ # error "__cpp_lib_shared_ptr_weak_type should have the value 201606L in c++23" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++23" # endif @@ -5163,7 +5163,7 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)' is not met!" +# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS)' is not met!" # endif # endif @@ -6674,7 +6674,7 @@ # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++26" # endif @@ -6683,7 +6683,7 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)' is not met!" +# error "__cpp_lib_shared_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS)' is not met!" # endif # endif @@ -6701,7 +6701,7 @@ # error "__cpp_lib_shared_ptr_weak_type should have the value 201606L in c++26" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX) +# if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++26" # endif @@ -6710,7 +6710,7 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)' is not met!" +# error "__cpp_lib_shared_timed_mutex should not be defined when the requirement '!defined(_LIBCPP_HAS_NO_THREADS)' is not met!" # endif # endif diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/implicit_ctad.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/implicit_ctad.pass.cpp index 23a946b331be9d..9a595f90ed4fff 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/implicit_ctad.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/implicit_ctad.pass.cpp @@ -9,8 +9,6 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++98, c++03, c++11, c++14 -// XFAIL: availability-shared_mutex-missing - // // shared_lock diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp index 547e514f00a983..3304e7526e3ce7 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp @@ -9,8 +9,6 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 -// XFAIL: availability-shared_mutex-missing - // // template class shared_lock; diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp index 4f6801efa7cee2..6d7838e8c6c95c 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp @@ -9,8 +9,6 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 -// XFAIL: availability-shared_mutex-missing - // // template class shared_lock; diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp index c76b73a2fa0ea6..fa3c689398aaa4 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp @@ -9,8 +9,6 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 -// XFAIL: availability-shared_mutex-missing - // // template class shared_lock; diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp index 3742becccf8ac4..4940041bcf9656 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp @@ -10,8 +10,6 @@ // UNSUPPORTED: c++03, c++11 // ALLOW_RETRIES: 2 -// XFAIL: availability-shared_mutex-missing - // // template class shared_lock; diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp index 6a2e2592155f81..9cb6dc173db350 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp @@ -9,8 +9,6 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 -// XFAIL: availability-shared_mutex-missing - // // template class shared_lock; diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp index 949d6c2955cb61..e231c130330d68 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp @@ -9,8 +9,6 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 -// XFAIL: availability-shared_mutex-missing - // // template class shared_lock; diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp index 508d1cb81b7638..07963a4ee1d519 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp @@ -9,8 +9,6 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 -// XFAIL: availability-shared_mutex-missing - // // class timed_mutex; diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp index 1aa437e6c6c1a7..1fd97b4173e01e 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp @@ -9,8 +9,6 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 -// XFAIL: availability-shared_mutex-missing - // // class shared_timed_mutex; diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp index 4f7b764197c2e2..9a969c01d2c41e 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp @@ -10,8 +10,6 @@ // UNSUPPORTED: c++03, c++11 // ALLOW_RETRIES: 2 -// XFAIL: availability-shared_mutex-missing - // // template class shared_lock; diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp index e7d24621e02254..edb7c42356acee 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp @@ -10,8 +10,6 @@ // UNSUPPORTED: c++03, c++11 // ALLOW_RETRIES: 2 -// XFAIL: availability-shared_mutex-missing - // // template class shared_lock; diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp index d5c88ce9f5a146..889641b14f6cf6 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp @@ -9,8 +9,6 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 -// XFAIL: availability-shared_mutex-missing - // // template class shared_lock; diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp index 91e25a2d74f183..162bd989b823e9 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp @@ -9,8 +9,6 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 -// XFAIL: availability-shared_mutex-missing - // // template class shared_lock; diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/assign.verify.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/assign.verify.cpp index 0dcd48e750d090..34164aaa0413c0 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/assign.verify.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/assign.verify.cpp @@ -9,8 +9,6 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11, c++14 -// UNSUPPORTED: availability-shared_mutex-missing - // // class shared_mutex; diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/copy.verify.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/copy.verify.cpp index ad3ffd8a745f1b..9b43198d0a37ba 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/copy.verify.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/copy.verify.cpp @@ -9,8 +9,6 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11, c++14 -// UNSUPPORTED: availability-shared_mutex-missing - // // class shared_mutex; diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/default.pass.cpp index 84763cd831d791..91320a52b62aea 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/default.pass.cpp @@ -9,8 +9,6 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11, c++14 -// UNSUPPORTED: availability-shared_mutex-missing - // // class shared_mutex; diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp index 82ad573c54cc2c..122f2b0cddb791 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp @@ -11,8 +11,6 @@ // ALLOW_RETRIES: 2 -// UNSUPPORTED: availability-shared_mutex-missing - // // class shared_mutex; diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp index b7616750a5770f..9df0d57c9621c9 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp @@ -11,8 +11,6 @@ // ALLOW_RETRIES: 2 -// UNSUPPORTED: availability-shared_mutex-missing - // // class shared_mutex; diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp index 675369fb09bcc6..f39b1ee29f7db3 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp @@ -11,8 +11,6 @@ // ALLOW_RETRIES: 2 -// UNSUPPORTED: availability-shared_mutex-missing - // // class shared_mutex; diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp index 33bbf0a2e547cc..c091b06f474927 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp @@ -11,8 +11,6 @@ // ALLOW_RETRIES: 2 -// UNSUPPORTED: availability-shared_mutex-missing - // // class shared_mutex; diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/assign.compile.fail.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/assign.compile.fail.cpp index ca1d0ff0f76ff9..c2cd8931b16cd8 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/assign.compile.fail.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/assign.compile.fail.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11 -// UNSUPPORTED: availability-shared_mutex-missing - // // class shared_timed_mutex; diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/copy.compile.fail.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/copy.compile.fail.cpp index 0458f9c93aff67..9b0a66160e0703 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/copy.compile.fail.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/copy.compile.fail.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11 -// UNSUPPORTED: availability-shared_mutex-missing - // // class shared_timed_mutex; diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp index e6633665c76577..b278419df6eedb 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp @@ -9,8 +9,6 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 -// UNSUPPORTED: availability-shared_mutex-missing - // // class shared_timed_mutex; diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp index 606a377e72c548..acabbabb3896bf 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp @@ -9,8 +9,6 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 -// UNSUPPORTED: availability-shared_mutex-missing - // ALLOW_RETRIES: 3 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp index 20d7347b21cdc9..36f5dbaa2040fa 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp @@ -9,8 +9,6 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 -// UNSUPPORTED: availability-shared_mutex-missing - // ALLOW_RETRIES: 3 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp index 52d7e2245847d9..cc7091fed415dc 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp @@ -11,8 +11,6 @@ // ALLOW_RETRIES: 2 -// UNSUPPORTED: availability-shared_mutex-missing - // // class shared_timed_mutex; diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_for.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_for.pass.cpp index 7eb7f9001aa2a0..30fc3c510a237c 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_for.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_for.pass.cpp @@ -10,8 +10,6 @@ // UNSUPPORTED: c++03, c++11 // ALLOW_RETRIES: 2 -// UNSUPPORTED: availability-shared_mutex-missing - // // class shared_timed_mutex; diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp index 1c15acb62f477f..8523df08f81294 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp @@ -11,8 +11,6 @@ // ALLOW_RETRIES: 2 -// UNSUPPORTED: availability-shared_mutex-missing - // // class shared_timed_mutex; diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_for.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_for.pass.cpp index dae0a448d888b9..c7d02a3a72001f 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_for.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_for.pass.cpp @@ -11,8 +11,6 @@ // ALLOW_RETRIES: 3 -// UNSUPPORTED: availability-shared_mutex-missing - // // class shared_timed_mutex; diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_until.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_until.pass.cpp index ae7c26029868aa..a95ffab0aa60d0 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_until.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_until.pass.cpp @@ -11,8 +11,6 @@ // ALLOW_RETRIES: 2 -// UNSUPPORTED: availability-shared_mutex-missing - // // class shared_timed_mutex; diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until.pass.cpp index 7aab2353e4eb01..fb521efdb93f59 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until.pass.cpp @@ -11,8 +11,6 @@ // ALLOW_RETRIES: 2 -// UNSUPPORTED: availability-shared_mutex-missing - // // class shared_timed_mutex; diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until_deadlock_bug.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until_deadlock_bug.pass.cpp index a8b44efdab1e18..cdb0afcdf78463 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until_deadlock_bug.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until_deadlock_bug.pass.cpp @@ -9,8 +9,6 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 -// UNSUPPORTED: availability-shared_mutex-missing - // // class shared_timed_mutex; diff --git a/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp index 0d4cab8b809c5b..a3c43de5c0b1f6 100644 --- a/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_any_cast-missing && !no-exceptions - // // any& operator=(const any&); diff --git a/libcxx/test/std/utilities/any/any.class/any.assign/move.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.assign/move.pass.cpp index 08c9acd6068383..2cfc3f63aa769f 100644 --- a/libcxx/test/std/utilities/any/any.class/any.assign/move.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.assign/move.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_any_cast-missing && !no-exceptions - // // any& operator=(any &&); diff --git a/libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp index 7bd0ffafd95529..939c766777c0bf 100644 --- a/libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_any_cast-missing && !no-exceptions - // // template diff --git a/libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp index 6bb5f13030904d..c804887ff68f05 100644 --- a/libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_any_cast-missing && !no-exceptions - // // any(any const &); diff --git a/libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp index ec53bf13008512..3b96126d917516 100644 --- a/libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_any_cast-missing && !no-exceptions - // // template any(in_place_type_t, Args&&...); diff --git a/libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp index a8a362b5c45ccd..37d3e1aa8602e1 100644 --- a/libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_any_cast-missing && !no-exceptions - // // any(any &&) noexcept; diff --git a/libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp index ed7f4e94b39f4f..120dfc22f3fbc4 100644 --- a/libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_any_cast-missing && !no-exceptions - // // template any(Value &&) diff --git a/libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp index 00c4c50ae2f598..3d3ecf9b870ce7 100644 --- a/libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_any_cast-missing && !no-exceptions - // // template T& emplace(Args&&...); diff --git a/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp index eee478dd36ea9e..6b52036a6e716b 100644 --- a/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_any_cast-missing && !no-exceptions - // // any::reset() noexcept diff --git a/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp index 163dafca59a8ca..c13cb8b96a3afb 100644 --- a/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_any_cast-missing && !no-exceptions - // // any::swap(any &) noexcept diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_pointer.pass.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_pointer.pass.cpp index 4faa75fee6d648..5143befffce731 100644 --- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_pointer.pass.cpp +++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_pointer.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_any_cast-missing && !no-exceptions - // // template diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp index e2db999b7ae047..079be667719443 100644 --- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp +++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_any_cast-missing && !no-exceptions - // // template diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_request_invalid_value_category.verify.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_request_invalid_value_category.verify.cpp index 3565df8ac8b1e7..34f785fdff6bcd 100644 --- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_request_invalid_value_category.verify.cpp +++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_request_invalid_value_category.verify.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// UNSUPPORTED: availability-bad_any_cast-missing - // // template diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.verify.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.verify.cpp index 2369febb5e6e56..1830626d7f4103 100644 --- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.verify.cpp +++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.verify.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// UNSUPPORTED: availability-bad_any_cast-missing - // // template diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.verify.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.verify.cpp index cd62f88b4f5008..3b9aac581a083b 100644 --- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.verify.cpp +++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.verify.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// UNSUPPORTED: availability-bad_any_cast-missing - // // template diff --git a/libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp b/libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp index 1a68488b01f152..bf98896f8b6760 100644 --- a/libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp +++ b/libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_any_cast-missing && !no-exceptions - // // template any make_any(Args&&...); diff --git a/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp b/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp index becd1982e35b29..0abed9998520ae 100644 --- a/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp +++ b/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_any_cast-missing && !no-exceptions - // // void swap(any &, any &) noexcept diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared.array.bounded.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared.array.bounded.pass.cpp index 79558cfc4c6c0c..c42063d53326a9 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared.array.bounded.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared.array.bounded.pass.cpp @@ -8,9 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// This test requires support for aligned allocation to test overaligned types. -// XFAIL: availability-aligned_allocation-missing - // // shared_ptr diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared.array.unbounded.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared.array.unbounded.pass.cpp index f6e0f4e2467155..f3802e32f75bc2 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared.array.unbounded.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared.array.unbounded.pass.cpp @@ -8,9 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// This test requires support for aligned allocation to test overaligned types. -// XFAIL: availability-aligned_allocation-missing - // // shared_ptr diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared.array.bounded.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared.array.bounded.pass.cpp index 60fb9bb25ff861..f75466bcdcf8cb 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared.array.bounded.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared.array.bounded.pass.cpp @@ -8,9 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// This test requires support for aligned allocation to test overaligned types. -// XFAIL: availability-aligned_allocation-missing - // // shared_ptr diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared.array.unbounded.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared.array.unbounded.pass.cpp index 01681cdf3a675b..cd6c548010692d 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared.array.unbounded.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared.array.unbounded.pass.cpp @@ -8,9 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// This test requires support for aligned allocation to test overaligned types. -// XFAIL: availability-aligned_allocation-missing - // // shared_ptr diff --git a/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp b/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp index ab01a74c9a9679..45e6e642037775 100644 --- a/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_optional_access-missing // diff --git a/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp b/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp index 954f399ac5c585..9cf3b089301210 100644 --- a/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_optional_access-missing // diff --git a/libcxx/test/std/utilities/optional/optional.monadic/and_then.pass.cpp b/libcxx/test/std/utilities/optional/optional.monadic/and_then.pass.cpp index 375820a9033d9c..97305d976e0664 100644 --- a/libcxx/test/std/utilities/optional/optional.monadic/and_then.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.monadic/and_then.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 -// XFAIL: availability-bad_optional_access-missing && !no-exceptions - // // template constexpr auto and_then(F&&) &; diff --git a/libcxx/test/std/utilities/optional/optional.monadic/transform.pass.cpp b/libcxx/test/std/utilities/optional/optional.monadic/transform.pass.cpp index 79a8cfc5e73c33..0a151517b101c8 100644 --- a/libcxx/test/std/utilities/optional/optional.monadic/transform.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.monadic/transform.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 -// XFAIL: availability-bad_optional_access-missing && !no-exceptions - // // template constexpr auto transform(F&&) &; diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/U.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/U.pass.cpp index 60e589ba46d1bf..a5ee602ab7bce6 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/U.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/U.pass.cpp @@ -8,8 +8,6 @@ // // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_optional_access-missing && !no-exceptions - // // template diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/const_T.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/const_T.pass.cpp index 32334a0bdc2807..91a2323eebbf46 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/const_T.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/const_T.pass.cpp @@ -8,8 +8,6 @@ // // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_optional_access-missing && !no-exceptions - // // constexpr optional(const T& v); diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp index 41b563357eac29..f856c1d41d05a7 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_optional_access-missing && !no-exceptions - // // constexpr optional(optional&& rhs); diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/rvalue_T.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/rvalue_T.pass.cpp index d301212eaa73b6..12425955f5a868 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/rvalue_T.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/rvalue_T.pass.cpp @@ -8,8 +8,6 @@ // // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_optional_access-missing && !no-exceptions - // // constexpr optional(T&& v); diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp index d4c0e8fc62da3a..781784c6806a4b 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_optional_access-missing && !no-exceptions - // // constexpr T& optional::value() &; diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp index 9349f1de0d53ca..ea62f76d4082a8 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_optional_access-missing && !no-exceptions - // // constexpr const T& optional::value() const &; diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp index 2831816cf5b109..49dbe4e58b57d2 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_optional_access-missing && !no-exceptions - // // constexpr const T& optional::value() const &&; diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp index dabeced527500a..5de0187b495727 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp @@ -9,8 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14 // -// XFAIL: availability-bad_optional_access-missing && !no-exceptions - // constexpr T& optional::value() &&; #include diff --git a/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp b/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp index d6c770fe18e9c8..e325a7af558ebc 100644 --- a/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_optional_access-missing && !no-exceptions - // // // template diff --git a/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp b/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp index aa9c9251e6ff67..57c9d4bf1b0460 100644 --- a/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing // diff --git a/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp b/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp index 47aed9a8e5e659..97c7ff0ed09579 100644 --- a/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template diff --git a/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp b/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp index 734fe1b480fbf2..d5e54d41e2f7b4 100644 --- a/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template constexpr T& get(variant& v); diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp index 759685594c5c38..0cab5689114360 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template class variant; diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp index a0b6684bddb1dd..096d365d2d7525 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template class variant; diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp index f62b5be1135968..84094347aed3a7 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template class variant; diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp index 108f92ded4e88b..5d3e8d884ee1ae 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template class variant; diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp index 3a487c7e1f0868..d1e5768f58d2b1 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template class variant; diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp index fd490a3c59ccf4..cc1a3fe8ff78af 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template class variant; diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_args.pass.cpp index 56d85858c545a2..00b3d70984ca3a 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_args.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_args.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template class variant; diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_init_list_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_init_list_args.pass.cpp index 8b4c0cd575fec0..c93f456eb03812 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_init_list_args.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_init_list_args.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template class variant; diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_args.pass.cpp index dbee67fc8a6dab..417845792675d3 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_args.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_args.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template class variant; diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_init_list_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_init_list_args.pass.cpp index ea05d906f097d1..11f5e1f9f94e6f 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_init_list_args.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_init_list_args.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template class variant; diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp index 308461e93f78d1..e2518fe29caf7e 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template class variant; diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp index 21a02c24f7db47..96fcd7e7bee481 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template class variant; diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp index efbe6bd2b4c78e..9068aacc435928 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template class variant; diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_args.pass.cpp index 45c891caf4b75a..24305aa0a35dae 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_args.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_args.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template class variant; diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_init_list_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_init_list_args.pass.cpp index 9f961664fa8d11..74d834b9b34575 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_init_list_args.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_init_list_args.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template class variant; diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp index 1f388b44f95463..1802bc4670bba9 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template class variant; diff --git a/libcxx/test/std/utilities/variant/variant.visit/robust_against_adl.pass.cpp b/libcxx/test/std/utilities/variant/variant.visit/robust_against_adl.pass.cpp index f4b191a24623ff..6f17fa32648d41 100644 --- a/libcxx/test/std/utilities/variant/variant.visit/robust_against_adl.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.visit/robust_against_adl.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template // constexpr see below visit(Visitor&& vis, Variants&&... vars); diff --git a/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp b/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp index 26a44b364b24c3..097b784f2bf2ce 100644 --- a/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template // constexpr see below visit(Visitor&& vis, Variants&&... vars); diff --git a/libcxx/test/std/utilities/variant/variant.visit/visit_return_type.pass.cpp b/libcxx/test/std/utilities/variant/variant.visit/visit_return_type.pass.cpp index 3a848caaaba12d..eb425c07f93222 100644 --- a/libcxx/test/std/utilities/variant/variant.visit/visit_return_type.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.visit/visit_return_type.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// XFAIL: availability-bad_variant_access-missing && !no-exceptions - // // template // constexpr R visit(Visitor&& vis, Variants&&... vars); diff --git a/libcxx/utils/data/ignore_format.txt b/libcxx/utils/data/ignore_format.txt index cf6dca7cd7d3c6..a81b27ea688655 100644 --- a/libcxx/utils/data/ignore_format.txt +++ b/libcxx/utils/data/ignore_format.txt @@ -827,7 +827,6 @@ libcxx/test/libcxx/iterators/predef.iterators/__unconstrained_reverse_iterator/t libcxx/test/libcxx/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.ops/failed.pass.cpp libcxx/test/libcxx/iterators/unwrap_iter.pass.cpp libcxx/test/libcxx/language.support/cxa_deleted_virtual.pass.cpp -libcxx/test/libcxx/language.support/support.dynamic/aligned_alloc_availability.verify.cpp libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp libcxx/test/libcxx/language.support/support.dynamic/new_faligned_allocation.pass.cpp libcxx/test/libcxx/language.support/support.rtti/type.info/type_info.comparison.apple.compile.pass.cpp diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index 76faa275d2d7e2..774fb6155398f9 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -911,8 +911,8 @@ def add_version_header(tc): "name": "__cpp_lib_shared_mutex", "values": {"c++17": 201505}, "headers": ["shared_mutex"], - "test_suite_guard": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)", - "libcxx_guard": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)", + "test_suite_guard": "!defined(_LIBCPP_HAS_NO_THREADS)", + "libcxx_guard": "!defined(_LIBCPP_HAS_NO_THREADS)", }, { "name": "__cpp_lib_shared_ptr_arrays", @@ -928,8 +928,8 @@ def add_version_header(tc): "name": "__cpp_lib_shared_timed_mutex", "values": {"c++14": 201402}, "headers": ["shared_mutex"], - "test_suite_guard": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)", - "libcxx_guard": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_HAS_NO_SHARED_MUTEX)", + "test_suite_guard": "!defined(_LIBCPP_HAS_NO_THREADS)", + "libcxx_guard": "!defined(_LIBCPP_HAS_NO_THREADS)", }, { "name": "__cpp_lib_shift", diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py index 11f60a9620546f..de7d3e140dd06f 100644 --- a/libcxx/utils/libcxx/test/features.py +++ b/libcxx/utils/libcxx/test/features.py @@ -497,7 +497,7 @@ def check_gdb(cfg): Feature( name="availability-fp_to_chars-missing", when=lambda cfg: BooleanExpression.evaluate( - "stdlib=apple-libc++ && target={{.+}}-apple-macosx{{(10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0|12.0|13.0)(.0)?}}", + "stdlib=apple-libc++ && target={{.+}}-apple-macosx{{(10.13|10.14|10.15|11.0|12.0|13.0)(.0)?}}", cfg.available_features, ), ), @@ -505,7 +505,7 @@ def check_gdb(cfg): Feature( name="availability-char8_t_support-missing", when=lambda cfg: BooleanExpression.evaluate( - "stdlib=apple-libc++ && target={{.+}}-apple-macosx{{(10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0)(.0)?}}", + "stdlib=apple-libc++ && target={{.+}}-apple-macosx{{(10.13|10.14|10.15|11.0)(.0)?}}", cfg.available_features, ), ), @@ -513,31 +513,7 @@ def check_gdb(cfg): Feature( name="availability-verbose_abort-missing", when=lambda cfg: BooleanExpression.evaluate( - "stdlib=apple-libc++ && target={{.+}}-apple-macosx{{(10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0|12.0|13.0)(.0)?}}", - cfg.available_features, - ), - ), - # Tests that require std::bad_variant_access in the built library - Feature( - name="availability-bad_variant_access-missing", - when=lambda cfg: BooleanExpression.evaluate( - "stdlib=apple-libc++ && target={{.+}}-apple-macosx10.{{(9|10|11|12)(.0)?}}", - cfg.available_features, - ), - ), - # Tests that require std::bad_optional_access in the built library - Feature( - name="availability-bad_optional_access-missing", - when=lambda cfg: BooleanExpression.evaluate( - "stdlib=apple-libc++ && target={{.+}}-apple-macosx10.{{(9|10|11|12)(.0)?}}", - cfg.available_features, - ), - ), - # Tests that require std::bad_any_cast in the built library - Feature( - name="availability-bad_any_cast-missing", - when=lambda cfg: BooleanExpression.evaluate( - "stdlib=apple-libc++ && target={{.+}}-apple-macosx10.{{(9|10|11|12)(.0)?}}", + "stdlib=apple-libc++ && target={{.+}}-apple-macosx{{(10.13|10.14|10.15|11.0|12.0|13.0)(.0)?}}", cfg.available_features, ), ), @@ -545,7 +521,7 @@ def check_gdb(cfg): Feature( name="availability-pmr-missing", when=lambda cfg: BooleanExpression.evaluate( - "stdlib=apple-libc++ && target={{.+}}-apple-macosx{{(10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0|12.0|13.0)(.0)?}}", + "stdlib=apple-libc++ && target={{.+}}-apple-macosx{{(10.13|10.14|10.15|11.0|12.0|13.0)(.0)?}}", cfg.available_features, ), ), @@ -553,7 +529,7 @@ def check_gdb(cfg): Feature( name="availability-filesystem-missing", when=lambda cfg: BooleanExpression.evaluate( - "stdlib=apple-libc++ && target={{.+}}-apple-macosx10.{{(9|10|11|12|13|14)(.0)?}}", + "stdlib=apple-libc++ && target={{.+}}-apple-macosx10.{{(13|14)(.0)?}}", cfg.available_features, ), ), @@ -561,24 +537,7 @@ def check_gdb(cfg): Feature( name="availability-synchronization_library-missing", when=lambda cfg: BooleanExpression.evaluate( - "stdlib=apple-libc++ && target={{.+}}-apple-macosx10.{{(9|10|11|12|13|14|15)(.0)?}}", - cfg.available_features, - ), - ), - # Tests that require support for std::shared_mutex and std::shared_timed_mutex in the built library - Feature( - name="availability-shared_mutex-missing", - when=lambda cfg: BooleanExpression.evaluate( - "stdlib=apple-libc++ && target={{.+}}-apple-macosx10.{{(9|10|11)(.0)?}}", - cfg.available_features, - ), - ), - # Tests that require support for aligned allocation in the built library. This is about `operator new(..., std::align_val_t, ...)` specifically, - # not other forms of aligned allocation. - Feature( - name="availability-aligned_allocation-missing", - when=lambda cfg: BooleanExpression.evaluate( - "stdlib=apple-libc++ && target={{.+}}-apple-macosx10.{{(9|10|11|12)(.0)?}}", + "stdlib=apple-libc++ && target={{.+}}-apple-macosx10.{{(13|14|15)(.0)?}}", cfg.available_features, ), ), @@ -587,7 +546,7 @@ def check_gdb(cfg): name="availability-tzdb-missing", when=lambda cfg: BooleanExpression.evaluate( # TODO(ldionne) Please provide the correct value. - "(stdlib=apple-libc++ && target={{.+}}-apple-macosx{{(10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0|12.0|13.0)(.0)?}})", + "(stdlib=apple-libc++ && target={{.+}}-apple-macosx{{(10.13|10.14|10.15|11.0|12.0|13.0)(.0)?}})", cfg.available_features, ), ), diff --git a/libcxx/utils/libcxx/test/format.py b/libcxx/utils/libcxx/test/format.py index 4106cd83db34b7..52c6f9cd8f2ef2 100644 --- a/libcxx/utils/libcxx/test/format.py +++ b/libcxx/utils/libcxx/test/format.py @@ -75,11 +75,21 @@ def parseScript(test, preamble): tmpDir, tmpBase = _getTempPaths(test) substitutions = lit.TestRunner.getDefaultSubstitutions(test, tmpDir, tmpBase) - # Check base substitutions and add the %{build} and %{run} convenience substitutions + # Check base substitutions and add the %{build}, %{verify} and %{run} convenience substitutions + # + # Note: We use -Wno-error with %{verify} to make sure that we don't treat all diagnostics as + # errors, which doesn't make sense for clang-verify tests because we may want to check + # for specific warning diagnostics. _checkBaseSubstitutions(substitutions) substitutions.append( ("%{build}", "%{cxx} %s %{flags} %{compile_flags} %{link_flags} -o %t.exe") ) + substitutions.append( + ( + "%{verify}", + "%{cxx} %s %{flags} %{compile_flags} -fsyntax-only -Wno-error -Xclang -verify -Xclang -verify-ignore-unexpected=note -ferror-limit=0", + ) + ) substitutions.append(("%{run}", "%{exec} %t.exe")) # Parse the test file, including custom directives @@ -227,6 +237,13 @@ class CxxStandardLibraryTest(lit.formats.FileBasedTest): file with the %{flags}, %{compile_flags} and %{link_flags} substitutions, and that produces an executable named %t.exe. + %{verify} + Expands to a command-line that builds the current source + file with the %{flags} and %{compile_flags} substitutions + and enables clang-verify. This can be used to write .sh.cpp + tests that use clang-verify. Note that this substitution can + only be used when the 'verify-support' feature is available. + %{run} Equivalent to `%{exec} %t.exe`. This is intended to be used in conjunction with the %{build} substitution. @@ -265,9 +282,6 @@ def getTestsForPath(self, testSuite, pathInSuite, litConfig, localConfig): yield lit.Test.Test(testSuite, pathInSuite, localConfig) def execute(self, test, litConfig): - VERIFY_FLAGS = ( - "-Xclang -verify -Xclang -verify-ignore-unexpected=note -ferror-limit=0" - ) supportsVerify = "verify-support" in test.config.available_features filename = test.path_in_suite[-1] @@ -305,13 +319,7 @@ def execute(self, test, litConfig): test.getFullName() ), ) - steps = [ - # Note: Use -Wno-error to make sure all diagnostics are not treated as errors, - # which doesn't make sense for clang-verify tests. - "%dbg(COMPILED WITH) %{{cxx}} %s %{{flags}} %{{compile_flags}} -fsyntax-only -Wno-error {}".format( - VERIFY_FLAGS - ) - ] + steps = ["%dbg(COMPILED WITH) %{verify}"] return self._executeShTest(test, litConfig, steps) # Make sure to check these ones last, since they will match other # suffixes above too. diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index df091613dc0a14..1b8acbe1c908b1 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -1167,7 +1167,9 @@ void LinkerScript::adjustOutputSections() { // * The address assignment. // The other option is to pick flags that minimize the impact the section // will have on the rest of the linker. That is why we copy the flags from - // the previous sections. Only a few flags are needed to keep the impact low. + // the previous sections. We copy just SHF_ALLOC and SHF_WRITE to keep the + // impact low. We do not propagate SHF_EXECINSTR as in some cases this can + // lead to executable writeable section. uint64_t flags = SHF_ALLOC; SmallVector defPhdrs; @@ -1193,8 +1195,8 @@ void LinkerScript::adjustOutputSections() { // We do not want to keep any special flags for output section // in case it is empty. if (isEmpty) - sec->flags = flags & ((sec->nonAlloc ? 0 : (uint64_t)SHF_ALLOC) | - SHF_WRITE | SHF_EXECINSTR); + sec->flags = + flags & ((sec->nonAlloc ? 0 : (uint64_t)SHF_ALLOC) | SHF_WRITE); // The code below may remove empty output sections. We should save the // specified program headers (if exist) and propagate them to subsequent diff --git a/lld/docs/ELF/linker_script.rst b/lld/docs/ELF/linker_script.rst index fbd96abcb44c5b..3606ef4fe4b8ee 100644 --- a/lld/docs/ELF/linker_script.rst +++ b/lld/docs/ELF/linker_script.rst @@ -97,6 +97,16 @@ The presence of ``address`` can cause the condition unsatisfied. LLD will warn. GNU ld from Binutils 2.35 onwards will reduce sh_addralign so that sh_addr=0 (modulo sh_addralign). +When an output section has no input section, GNU ld will eliminate it if it +only contains symbol assignments (e.g. ``.foo { symbol = 42; }``). LLD will +retain such sections unless all the symbol assignments are unreferenced +``PROVIDED``. + +When an output section has no input section but advances the location counter, +GNU ld sets the ``SHF_WRITE`` flag. LLD sets the SHF_WRITE flag only if the +preceding output section with non-empty input sections also has the SHF_WRITE +flag. + Output section type ------------------- diff --git a/lld/test/ELF/linkerscript/empty-synthetic-removed-flags.s b/lld/test/ELF/linkerscript/empty-synthetic-removed-flags.s index b8d72702425f19..dfc502e8da9e1d 100644 --- a/lld/test/ELF/linkerscript/empty-synthetic-removed-flags.s +++ b/lld/test/ELF/linkerscript/empty-synthetic-removed-flags.s @@ -29,7 +29,6 @@ # EMPTY-NEXT: Type: SHT_PROGBITS # EMPTY-NEXT: Flags [ # EMPTY-NEXT: SHF_ALLOC -# EMPTY-NEXT: SHF_EXECINSTR # EMPTY-NEXT: ] .section .foo,"ax" diff --git a/lld/test/ELF/linkerscript/extend-pt-load2.test b/lld/test/ELF/linkerscript/extend-pt-load2.test index 3be94a5670d8cf..6c0e3b2863f913 100644 --- a/lld/test/ELF/linkerscript/extend-pt-load2.test +++ b/lld/test/ELF/linkerscript/extend-pt-load2.test @@ -19,12 +19,13 @@ SECTIONS { .data.rel.ro : { *(.data.rel.ro) } } -# CHECK: .rodata PROGBITS 0000000000000215 000215 000001 00 A 0 -# CHECK-NEXT: foo PROGBITS 0000000000000216 000216 000000 00 A 0 -# CHECK-NEXT: .text PROGBITS 0000000000000218 000218 000001 00 AX 0 -# CHECK-NEXT: bar PROGBITS 0000000000000219 000219 000de7 00 AX 0 +# CHECK: .rodata PROGBITS 000000000000024d 00024d 000001 00 A 0 +# CHECK-NEXT: foo PROGBITS 000000000000024e 00024e 000000 00 A 0 +# CHECK-NEXT: .text PROGBITS 0000000000000250 000250 000001 00 AX 0 +# CHECK-NEXT: bar PROGBITS 0000000000000251 000251 000daf 00 A 0 # CHECK-NEXT: .data.rel.ro PROGBITS 0000000000001000 001000 000001 00 WA 0 -# CHECK: LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x000216 0x000216 R 0x1000 -# CHECK: LOAD 0x000218 0x0000000000000218 0x0000000000000218 0x000de8 0x000de8 R E 0x1000 +# CHECK: LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x00024e 0x00024e R 0x1000 +# CHECK-NEXT: LOAD 0x000250 0x0000000000000250 0x0000000000000250 0x000001 0x000001 R E 0x1000 +# CHECK-NEXT: LOAD 0x000251 0x0000000000000251 0x0000000000000251 0x000daf 0x000daf R 0x1000 # CHECK-NEXT: LOAD 0x001000 0x0000000000001000 0x0000000000001000 0x000068 0x000068 RW 0x1000 diff --git a/lld/test/ELF/linkerscript/insert-before.test b/lld/test/ELF/linkerscript/insert-before.test index f9611538c013b2..e6ed413639827a 100644 --- a/lld/test/ELF/linkerscript/insert-before.test +++ b/lld/test/ELF/linkerscript/insert-before.test @@ -9,13 +9,14 @@ # RUN: llvm-readelf -S -l %t1 | FileCheck %s # CHECK: Name Type Address Off Size ES Flg # CHECK-NEXT: NULL -# CHECK-NEXT: .foo.text PROGBITS 0000000000000000 001000 000008 00 AX -# CHECK-NEXT: .text PROGBITS 0000000000000008 001008 000008 00 AX -# CHECK-NEXT: .byte PROGBITS 0000000000000010 001010 000001 00 AX -# CHECK-NEXT: .foo.data PROGBITS 0000000000000011 001011 000008 00 WA -# CHECK-NEXT: .data PROGBITS 0000000000000019 001019 000008 00 WA +# CHECK-NEXT: .foo.text PROGBITS 0000000000000000 001000 000008 00 AX 0 +# CHECK-NEXT: .text PROGBITS 0000000000000008 001008 000008 00 AX 0 +# CHECK-NEXT: .byte PROGBITS 0000000000000010 001010 000001 00 A 0 +# CHECK-NEXT: .foo.data PROGBITS 0000000000000011 001011 000008 00 WA 0 +# CHECK-NEXT: .data PROGBITS 0000000000000019 001019 000008 00 WA 0 # CHECK: Type # CHECK-NEXT: LOAD {{.*}} R E +# CHECK-NEXT: LOAD {{.*}} R # CHECK-NEXT: LOAD {{.*}} RW # CHECK-NEXT: GNU_STACK {{.*}} RW diff --git a/lld/test/ELF/linkerscript/lma-align2.test b/lld/test/ELF/linkerscript/lma-align2.test index 2177101ff37475..23b06b179b6e85 100644 --- a/lld/test/ELF/linkerscript/lma-align2.test +++ b/lld/test/ELF/linkerscript/lma-align2.test @@ -7,17 +7,17 @@ # CHECK: Name Type Address Off Size ES Flg Lk Inf Al # CHECK-NEXT: NULL 0000000000000000 000000 000000 00 0 0 0 # CHECK-NEXT: .text PROGBITS 0000000008000000 001000 000001 00 AX 0 0 4 -# CHECK-NEXT: .data PROGBITS 0000000020000000 002000 000006 00 AX 0 0 8 -# CHECK-NEXT: .data2 PROGBITS 000000000800000e 00200e 000008 00 AX 0 0 1 -# CHECK-NEXT: .data3 PROGBITS 0000000020000008 002018 000000 00 AX 0 0 8 -# CHECK-NEXT: .data4 PROGBITS 0000000008000018 002018 000008 00 AX 0 0 1 +# CHECK-NEXT: .data PROGBITS 0000000020000000 002000 000006 00 A 0 0 8 +# CHECK-NEXT: .data2 PROGBITS 000000000800000e 00200e 000008 00 A 0 0 1 +# CHECK-NEXT: .data3 PROGBITS 0000000020000008 002018 000000 00 A 0 0 8 +# CHECK-NEXT: .data4 PROGBITS 0000000008000018 002018 000008 00 A 0 0 1 # CHECK: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align # CHECK-NEXT: LOAD 0x001000 0x0000000008000000 0x0000000008000000 0x000001 0x000001 R E 0x1000 -# CHECK-NEXT: LOAD 0x002000 0x0000000020000000 0x0000000008000008 0x000006 0x000006 R E 0x1000 -# CHECK-NEXT: LOAD 0x00200e 0x000000000800000e 0x000000000800000e 0x000008 0x000008 R E 0x1000 -# CHECK-NEXT: LOAD 0x002018 0x0000000008000018 0x0000000008000018 0x000008 0x000008 R E 0x1000 +# CHECK-NEXT: LOAD 0x002000 0x0000000020000000 0x0000000008000008 0x000006 0x000006 R 0x1000 +# CHECK-NEXT: LOAD 0x00200e 0x000000000800000e 0x000000000800000e 0x000008 0x000008 R 0x1000 +# CHECK-NEXT: LOAD 0x002018 0x0000000008000018 0x0000000008000018 0x000008 0x000008 R 0x1000 MEMORY { CODE (rx) : ORIGIN = 0x08000000, LENGTH = 100K diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp index 72280927471f88..013b2bbc0e67f2 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp @@ -434,11 +434,6 @@ bool GDBRemoteRegisterContext::WriteRegisterBytes(const RegisterInfo *reg_info, } else { // This is an actual register, write it success = SetPrimordialRegister(reg_info, gdb_comm); - - if (success && do_reconfigure_arm64_sve) { - AArch64Reconfigure(); - InvalidateAllRegisters(); - } } // Check if writing this register will invalidate any other register @@ -452,6 +447,11 @@ bool GDBRemoteRegisterContext::WriteRegisterBytes(const RegisterInfo *reg_info, false); } + if (success && do_reconfigure_arm64_sve) { + AArch64Reconfigure(); + InvalidateAllRegisters(); + } + return success; } } else { @@ -772,8 +772,6 @@ void GDBRemoteRegisterContext::AArch64Reconfigure() { std::optional vg_reg_value; const RegisterInfo *vg_reg_info = m_reg_info_sp->GetRegisterInfo("vg"); if (vg_reg_info) { - // Make sure we get the latest value of vg from the remote. - SetRegisterIsValid(vg_reg_info, false); uint32_t vg_reg_num = vg_reg_info->kinds[eRegisterKindLLDB]; uint64_t reg_value = ReadRegisterAsUnsigned(vg_reg_num, fail_value); if (reg_value != fail_value && reg_value <= 32) @@ -783,11 +781,6 @@ void GDBRemoteRegisterContext::AArch64Reconfigure() { std::optional svg_reg_value; const RegisterInfo *svg_reg_info = m_reg_info_sp->GetRegisterInfo("svg"); if (svg_reg_info) { - // When vg is written it is automatically made invalid. Writing vg will also - // change svg if we're in streaming mode but it will not be made invalid - // so do this manually so the following read gets the latest svg value. - SetRegisterIsValid(svg_reg_info, false); - uint32_t svg_reg_num = svg_reg_info->kinds[eRegisterKindLLDB]; uint64_t reg_value = ReadRegisterAsUnsigned(svg_reg_num, fail_value); if (reg_value != fail_value && reg_value <= 32) diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index 16b3ba661d0716..c50ac5de77904f 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -1612,6 +1612,22 @@ bool ProcessGDBRemote::CalculateThreadStopInfo(ThreadGDBRemote *thread) { return false; } +void ProcessGDBRemote::ParseExpeditedRegisters( + ExpeditedRegisterMap &expedited_register_map, ThreadSP thread_sp) { + ThreadGDBRemote *gdb_thread = static_cast(thread_sp.get()); + RegisterContextSP gdb_reg_ctx_sp(gdb_thread->GetRegisterContext()); + + for (const auto &pair : expedited_register_map) { + StringExtractor reg_value_extractor(pair.second); + WritableDataBufferSP buffer_sp( + new DataBufferHeap(reg_value_extractor.GetStringRef().size() / 2, 0)); + reg_value_extractor.GetHexBytes(buffer_sp->GetData(), '\xcc'); + uint32_t lldb_regnum = gdb_reg_ctx_sp->ConvertRegisterKindToRegisterNumber( + eRegisterKindProcessPlugin, pair.first); + gdb_thread->PrivateSetRegisterValue(lldb_regnum, buffer_sp->GetData()); + } +} + ThreadSP ProcessGDBRemote::SetThreadStopInfo( lldb::tid_t tid, ExpeditedRegisterMap &expedited_register_map, uint8_t signo, const std::string &thread_name, const std::string &reason, @@ -1646,32 +1662,35 @@ ThreadSP ProcessGDBRemote::SetThreadStopInfo( reg_ctx_sp->InvalidateIfNeeded(true); + auto iter = std::find(m_thread_ids.begin(), m_thread_ids.end(), tid); + if (iter != m_thread_ids.end()) + SetThreadPc(thread_sp, iter - m_thread_ids.begin()); + + ParseExpeditedRegisters(expedited_register_map, thread_sp); + // AArch64 SVE/SME specific code below updates SVE and ZA register sizes and // offsets if value of VG or SVG registers has changed since last stop. const ArchSpec &arch = GetTarget().GetArchitecture(); if (arch.IsValid() && arch.GetTriple().isAArch64()) { - GDBRemoteRegisterContext *gdb_remote_reg_ctx = - static_cast(reg_ctx_sp.get()); + GDBRemoteRegisterContext *reg_ctx_sp = + static_cast( + gdb_thread->GetRegisterContext().get()); - if (gdb_remote_reg_ctx) { - gdb_remote_reg_ctx->AArch64Reconfigure(); - gdb_remote_reg_ctx->InvalidateAllRegisters(); + if (reg_ctx_sp) { + reg_ctx_sp->AArch64Reconfigure(); + // Now we have changed the offsets of all the registers, so the values + // will be corrupted. + reg_ctx_sp->InvalidateAllRegisters(); + + // Expedited registers values will never contain registers that would be + // resized by AArch64Reconfigure. So we are safe to continue using these + // values. These values include vg, svg and useful general purpose + // registers so this saves a few read packets each time we make use of + // them. + ParseExpeditedRegisters(expedited_register_map, thread_sp); } } - auto iter = std::find(m_thread_ids.begin(), m_thread_ids.end(), tid); - if (iter != m_thread_ids.end()) - SetThreadPc(thread_sp, iter - m_thread_ids.begin()); - - for (const auto &pair : expedited_register_map) { - StringExtractor reg_value_extractor(pair.second); - WritableDataBufferSP buffer_sp( - new DataBufferHeap(reg_value_extractor.GetStringRef().size() / 2, 0)); - reg_value_extractor.GetHexBytes(buffer_sp->GetData(), '\xcc'); - uint32_t lldb_regnum = reg_ctx_sp->ConvertRegisterKindToRegisterNumber( - eRegisterKindProcessPlugin, pair.first); - gdb_thread->PrivateSetRegisterValue(lldb_regnum, buffer_sp->GetData()); - } thread_sp->SetName(thread_name.empty() ? nullptr : thread_name.c_str()); gdb_thread->SetThreadDispatchQAddr(thread_dispatch_qaddr); diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h index f0ead4c38c237a..f3787e7169047e 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h @@ -470,6 +470,9 @@ class ProcessGDBRemote : public Process, void DidForkSwitchSoftwareBreakpoints(bool enable); void DidForkSwitchHardwareTraps(bool enable); + void ParseExpeditedRegisters(ExpeditedRegisterMap &expedited_register_map, + lldb::ThreadSP thread_sp); + // Lists of register fields generated from the remote's target XML. // Pointers to these RegisterFlags will be set in the register info passed // back to the upper levels of lldb. Doing so is safe because this class will diff --git a/lldb/tools/debugserver/source/MacOSX/MachTask.mm b/lldb/tools/debugserver/source/MacOSX/MachTask.mm index 4f5b4039243f66..fd2ac64ac6cf79 100644 --- a/lldb/tools/debugserver/source/MacOSX/MachTask.mm +++ b/lldb/tools/debugserver/source/MacOSX/MachTask.mm @@ -145,6 +145,8 @@ //---------------------------------------------------------------------- void MachTask::Clear() { // Do any cleanup needed for this task + if (m_exception_thread) + ShutDownExcecptionThread(); m_task = TASK_NULL; m_exception_thread = 0; m_exception_port = MACH_PORT_NULL; diff --git a/llvm/cmake/config.guess b/llvm/cmake/config.guess index 71abbf939f97f4..f4896236776942 100644 --- a/llvm/cmake/config.guess +++ b/llvm/cmake/config.guess @@ -816,6 +816,9 @@ EOF i*:PW*:*) echo ${UNAME_MACHINE}-pc-pw32 exit ;; + *:SerenityOS:*:*) + echo ${UNAME_MACHINE}-pc-serenity + exit ;; *:Interix*:*) case ${UNAME_MACHINE} in x86) diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h index 918e69352effe7..6407dde5bcd6c7 100644 --- a/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/llvm/include/llvm/CodeGen/CommandFlags.h @@ -90,8 +90,6 @@ bool getDisableTailCalls(); bool getStackSymbolOrdering(); -unsigned getOverrideStackAlignment(); - bool getStackRealign(); std::string getTrapFuncName(); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h index eff87c5617d997..ea75c2c7f6f419 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h @@ -118,11 +118,7 @@ class GISelKnownBitsAnalysis : public MachineFunctionPass { GISelKnownBitsAnalysis() : MachineFunctionPass(ID) { initializeGISelKnownBitsAnalysisPass(*PassRegistry::getPassRegistry()); } - GISelKnownBits &get(MachineFunction &MF) { - if (!Info) - Info = std::make_unique(MF); - return *Info.get(); - } + GISelKnownBits &get(MachineFunction &MF); void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnMachineFunction(MachineFunction &MF) override; void releaseMemory() override { Info.reset(); } diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 7ccb99d511fff4..851353042cc267 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -35,6 +35,7 @@ class LegalizationArtifactCombiner { MachineIRBuilder &Builder; MachineRegisterInfo &MRI; const LegalizerInfo &LI; + GISelKnownBits *KB; static bool isArtifactCast(unsigned Opc) { switch (Opc) { @@ -50,8 +51,9 @@ class LegalizationArtifactCombiner { public: LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI, - const LegalizerInfo &LI) - : Builder(B), MRI(MRI), LI(LI) {} + const LegalizerInfo &LI, + GISelKnownBits *KB = nullptr) + : Builder(B), MRI(MRI), LI(LI), KB(KB) {} bool tryCombineAnyExt(MachineInstr &MI, SmallVectorImpl &DeadInsts, @@ -131,13 +133,25 @@ class LegalizationArtifactCombiner { LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); LLT SrcTy = MRI.getType(SrcReg); APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits()); - auto Mask = Builder.buildConstant( - DstTy, MaskVal.zext(DstTy.getScalarSizeInBits())); if (SextSrc && (DstTy != MRI.getType(SextSrc))) SextSrc = Builder.buildSExtOrTrunc(DstTy, SextSrc).getReg(0); if (TruncSrc && (DstTy != MRI.getType(TruncSrc))) TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0); - Builder.buildAnd(DstReg, SextSrc ? SextSrc : TruncSrc, Mask); + APInt ExtMaskVal = MaskVal.zext(DstTy.getScalarSizeInBits()); + Register AndSrc = SextSrc ? SextSrc : TruncSrc; + // Elide G_AND and mask constant if possible. + // The G_AND would also be removed by the post-legalize redundant_and + // combine, but in this very common case, eliding early and regardless of + // OptLevel results in significant compile-time and O0 code-size + // improvements. Inserting unnecessary instructions between boolean defs + // and uses hinders a lot of folding during ISel. + if (KB && (KB->getKnownZeroes(AndSrc) | ExtMaskVal).isAllOnes()) { + replaceRegOrBuildCopy(DstReg, AndSrc, MRI, Builder, UpdatedDefs, + Observer); + } else { + auto Mask = Builder.buildConstant(DstTy, ExtMaskVal); + Builder.buildAnd(DstReg, AndSrc, Mask); + } markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 50c63621b8e416..33fa3985e64d8b 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1697,6 +1697,13 @@ def int_amdgcn_s_setprio : DefaultAttrsIntrinsic<[], [llvm_i16_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects]>; +def int_amdgcn_s_ttracedata : + DefaultAttrsIntrinsic<[], [llvm_i32_ty], + [IntrNoMem, IntrHasSideEffects]>; +def int_amdgcn_s_ttracedata_imm : + DefaultAttrsIntrinsic<[], [llvm_i16_ty], + [IntrNoMem, IntrHasSideEffects, ImmArg>]>; + // This is IntrHasSideEffects so it can be used to read cycle counters. def int_amdgcn_s_getreg : ClangBuiltin<"__builtin_amdgcn_s_getreg">, @@ -1932,6 +1939,11 @@ def int_amdgcn_inverse_ballot : def int_amdgcn_s_bitreplicate : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty], [IntrNoMem, IntrConvergent]>; +// Lowers to S_QUADMASK_B{32,64} +// The argument must be uniform; otherwise, the result is undefined. +def int_amdgcn_s_quadmask : + DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyint_ty], [IntrNoMem, IntrConvergent]>; + class AMDGPUWaveReduce : Intrinsic< [data_ty], [ diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index 0f56ac68c851f6..414b86d53aff6b 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -221,7 +221,8 @@ class Triple { Emscripten, ShaderModel, // DirectX ShaderModel LiteOS, - LastOSType = LiteOS + Serenity, + LastOSType = Serenity }; enum EnvironmentType { UnknownEnvironment, @@ -668,6 +669,10 @@ class Triple { return getOS() == Triple::AIX; } + bool isOSSerenity() const { + return getOS() == Triple::Serenity; + } + /// Tests whether the OS uses the ELF binary format. bool isOSBinFormatELF() const { return getObjectFormat() == Triple::ELF; diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index bd1bd8261123e5..7190036d60c8c6 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1541,18 +1541,6 @@ struct Attributor { /* ForceUpdate */ false); } - /// Similar to getAAFor but the return abstract attribute will be updated (via - /// `AbstractAttribute::update`) even if it is found in the cache. This is - /// especially useful for AAIsDead as changes in liveness can make updates - /// possible/useful that were not happening before as the abstract attribute - /// was assumed dead. - template - const AAType *getAndUpdateAAFor(const AbstractAttribute &QueryingAA, - const IRPosition &IRP, DepClassTy DepClass) { - return getOrCreateAAFor(IRP, &QueryingAA, DepClass, - /* ForceUpdate */ true); - } - /// The version of getAAFor that allows to omit a querying abstract /// attribute. Using this after Attributor started running is restricted to /// only the Attributor itself. Initial seeding of AAs can be done via this diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index fe3d7d679129fd..c07b5b9e130f80 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -6412,6 +6412,44 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, return Constant::getNullValue(ReturnType); break; } + case Intrinsic::ptrmask: { + if (isa(Op0) || isa(Op1)) + return PoisonValue::get(Op0->getType()); + + // NOTE: We can't apply this simplifications based on the value of Op1 + // because we need to preserve provenance. + if (Q.isUndefValue(Op0) || match(Op0, m_Zero())) + return Constant::getNullValue(Op0->getType()); + + assert(Op1->getType()->getScalarSizeInBits() == + Q.DL.getIndexTypeSizeInBits(Op0->getType()) && + "Invalid mask width"); + // If index-width (mask size) is less than pointer-size then mask is + // 1-extended. + if (match(Op1, m_PtrToInt(m_Specific(Op0)))) + return Op0; + + // NOTE: We may have attributes associated with the return value of the + // llvm.ptrmask intrinsic that will be lost when we just return the + // operand. We should try to preserve them. + if (match(Op1, m_AllOnes()) || Q.isUndefValue(Op1)) + return Op0; + + Constant *C; + if (match(Op1, m_ImmConstant(C))) { + KnownBits PtrKnown = computeKnownBits(Op0, /*Depth=*/0, Q); + // See if we only masking off bits we know are already zero due to + // alignment. + APInt IrrelevantPtrBits = + PtrKnown.Zero.zextOrTrunc(C->getType()->getScalarSizeInBits()); + C = ConstantFoldBinaryOpOperands( + Instruction::Or, C, ConstantInt::get(C->getType(), IrrelevantPtrBits), + Q.DL); + if (C != nullptr && C->isAllOnesValue()) + return Op0; + } + break; + } case Intrinsic::smax: case Intrinsic::smin: case Intrinsic::umax: diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index a41d0357bf343b..06dcf0a4d463b4 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -56,7 +56,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormatVariadic.h" diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index 8fbb97d261a405..c433d78516e1de 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/Module.h" +#include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "gisel-known-bits" @@ -773,3 +774,12 @@ void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) { return false; } + +GISelKnownBits &GISelKnownBitsAnalysis::get(MachineFunction &MF) { + if (!Info) { + unsigned MaxDepth = + MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6; + Info = std::make_unique(MF, MaxDepth); + } + return *Info.get(); +} diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index aecbe0b7604c07..6d75258c1041b1 100644 --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -218,7 +218,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, // This will keep all the observers notified about new insertions/deletions. RAIIMFObsDelInstaller Installer(MF, WrapperObserver); LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder, KB); - LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI); + LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI, KB); bool Changed = false; SmallVector RetryList; do { diff --git a/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp index 2343386e031c5d..e59a0197d6500e 100644 --- a/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp +++ b/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp @@ -15,7 +15,6 @@ #include "llvm/DebugInfo/CodeView/RecordName.h" #include "llvm/DebugInfo/CodeView/RecordSerialization.h" #include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include #include diff --git a/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp b/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp index e52f3e56f11550..eadc50f2da800a 100644 --- a/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp +++ b/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp @@ -8,7 +8,6 @@ #include "llvm/DebugInfo/CodeView/SymbolSerializer.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp index 23946c7de9adbf..6359ceb4c51749 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp @@ -17,7 +17,6 @@ #include "llvm/ExecutionEngine/JITLink/aarch32.h" #include "llvm/Object/ELF.h" #include "llvm/Object/ELFObjectFile.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/TargetParser/ARMTargetParser.h" diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp index 25b1dd9d3d1253..3b86250b60a44c 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp @@ -15,7 +15,6 @@ #include "llvm/ExecutionEngine/JITLink/TableManager.h" #include "llvm/ExecutionEngine/JITLink/ppc64.h" #include "llvm/Object/ELFObjectFile.h" -#include "llvm/Support/Endian.h" #include "EHFrameSupportImpl.h" #include "ELFLinkGraphBuilder.h" diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d00db82c9e49ac..291f0c8c5d991c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8203,7 +8203,8 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, } SDValue Result; - if (getTargetMachine().getCodeModel() == CodeModel::Large) { + if (getTargetMachine().getCodeModel() == CodeModel::Large && + !getTargetMachine().isPositionIndependent()) { Result = getAddrLarge(GN, DAG, OpFlags); } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) { Result = getAddrTiny(GN, DAG, OpFlags); @@ -9568,12 +9569,12 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op, // is necessary here. Just get the address of the jump table. JumpTableSDNode *JT = cast(Op); - if (getTargetMachine().getCodeModel() == CodeModel::Large && - !Subtarget->isTargetMachO()) { + CodeModel::Model CM = getTargetMachine().getCodeModel(); + if (CM == CodeModel::Large && !getTargetMachine().isPositionIndependent() && + !Subtarget->isTargetMachO()) return getAddrLarge(JT, DAG); - } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) { + if (CM == CodeModel::Tiny) return getAddrTiny(JT, DAG); - } return getAddr(JT, DAG); } @@ -9599,27 +9600,28 @@ SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op, SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast(Op); - - if (getTargetMachine().getCodeModel() == CodeModel::Large) { + CodeModel::Model CM = getTargetMachine().getCodeModel(); + if (CM == CodeModel::Large) { // Use the GOT for the large code model on iOS. if (Subtarget->isTargetMachO()) { return getGOT(CP, DAG); } - return getAddrLarge(CP, DAG); - } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) { + if (!getTargetMachine().isPositionIndependent()) + return getAddrLarge(CP, DAG); + } else if (CM == CodeModel::Tiny) { return getAddrTiny(CP, DAG); - } else { - return getAddr(CP, DAG); } + return getAddr(CP, DAG); } SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { BlockAddressSDNode *BA = cast(Op); - if (getTargetMachine().getCodeModel() == CodeModel::Large && - !Subtarget->isTargetMachO()) { - return getAddrLarge(BA, DAG); - } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) { + CodeModel::Model CM = getTargetMachine().getCodeModel(); + if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) { + if (!getTargetMachine().isPositionIndependent()) + return getAddrLarge(BA, DAG); + } else if (CM == CodeModel::Tiny) { return getAddrTiny(BA, DAG); } return getAddr(BA, DAG); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 88516967515a58..32068444114273 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2849,7 +2849,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { if (OpFlags & AArch64II::MO_GOT) { I.setDesc(TII.get(AArch64::LOADgot)); I.getOperand(1).setTargetFlags(OpFlags); - } else if (TM.getCodeModel() == CodeModel::Large) { + } else if (TM.getCodeModel() == CodeModel::Large && + !TM.isPositionIndependent()) { // Materialize the global using movz/movk instructions. materializeLargeCMVal(I, GV, OpFlags); I.eraseFromParent(); @@ -3502,7 +3503,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { return true; } case TargetOpcode::G_BLOCK_ADDR: { - if (TM.getCodeModel() == CodeModel::Large) { + if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) { materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0); I.eraseFromParent(); return true; diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 9eb0d83b9c2043..9a44001454c1d3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -191,6 +191,9 @@ extern char &AMDGPUImageIntrinsicOptimizerID; void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); extern char &AMDGPUPerfHintAnalysisID; +void initializeGCNRegPressurePrinterPass(PassRegistry &); +extern char &GCNRegPressurePrinterID; + // Passes common to R600 and SI FunctionPass *createAMDGPUPromoteAlloca(); void initializeAMDGPUPromoteAllocaPass(PassRegistry&); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 137e999db99673..061115f7fc8c4c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2735,15 +2735,6 @@ bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy, // $symbol@*@hi with lower 32 bits and higher 32 bits of a literal constant, // which is a 64-bit pc-relative offset from the encoding of the $symbol // operand to the global variable. - // - // What we want here is an offset from the value returned by s_getpc - // (which is the address of the s_add_u32 instruction) to the global - // variable, but since the encoding of $symbol starts 4 bytes after the start - // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too - // small. This requires us to add 4 to the global variable offset in order to - // compute the correct address. Similarly for the s_addc_u32 instruction, the - // encoding of $symbol starts 12 bytes after the start of the s_add_u32 - // instruction. LLT ConstPtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); @@ -2753,11 +2744,11 @@ bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy, MachineInstrBuilder MIB = B.buildInstr(AMDGPU::SI_PC_ADD_REL_OFFSET) .addDef(PCReg); - MIB.addGlobalAddress(GV, Offset + 4, GAFlags); + MIB.addGlobalAddress(GV, Offset, GAFlags); if (GAFlags == SIInstrInfo::MO_NONE) MIB.addImm(0); else - MIB.addGlobalAddress(GV, Offset + 12, GAFlags + 1); + MIB.addGlobalAddress(GV, Offset, GAFlags + 1); if (!B.getMRI()->getRegClassOrNull(PCReg)) B.getMRI()->setRegClass(PCReg, &AMDGPU::SReg_64RegClass); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 0c5ed649bcdbe3..047108fd06db5f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2995,6 +2995,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( return; case Intrinsic::amdgcn_inverse_ballot: case Intrinsic::amdgcn_s_bitreplicate: + case Intrinsic::amdgcn_s_quadmask: applyDefaultMapping(OpdMapper); constrainOpWithReadfirstlane(B, MI, 2); // Mask return; @@ -3065,6 +3066,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl( constrainOpWithReadfirstlane(B, MI, 2); return; } + case Intrinsic::amdgcn_s_ttracedata: + constrainOpWithReadfirstlane(B, MI, 1); // M0 + return; case Intrinsic::amdgcn_raw_buffer_load_lds: case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: { applyDefaultMapping(OpdMapper); @@ -4537,6 +4541,14 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize); break; } + case Intrinsic::amdgcn_s_quadmask: { + Register MaskReg = MI.getOperand(2).getReg(); + unsigned MaskSize = MRI.getType(MaskReg).getSizeInBits(); + unsigned MaskBank = getRegBankID(MaskReg, MRI, AMDGPU::SGPRRegBankID); + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, MaskSize); + OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize); + break; + } case Intrinsic::amdgcn_wave_reduce_umin: case Intrinsic::amdgcn_wave_reduce_umax: { unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); @@ -4661,6 +4673,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32); break; } + case Intrinsic::amdgcn_s_ttracedata: { + // This must be an SGPR, but accept a VGPR. + unsigned Bank = + getRegBankID(MI.getOperand(1).getReg(), MRI, AMDGPU::SGPRRegBankID); + OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32); + break; + } case Intrinsic::amdgcn_end_cf: { unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 81eb407109dbe1..ef57bcdf110a14 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -429,6 +429,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeGCNPreRAOptimizationsPass(*PR); initializeGCNPreRALongBranchRegPass(*PR); initializeGCNRewritePartialRegUsesPass(*PR); + initializeGCNRegPressurePrinterPass(*PR); } static std::unique_ptr createTLOF(const Triple &TT) { diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp index 1ca0f3b6e06b82..a04c470b7b9762 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "GCNRegPressure.h" +#include "AMDGPU.h" #include "llvm/CodeGen/RegisterPressure.h" using namespace llvm; @@ -31,7 +32,6 @@ bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1, return true; } - /////////////////////////////////////////////////////////////////////////////// // GCNRegPressure @@ -135,8 +135,6 @@ bool GCNRegPressure::less(const GCNSubtarget &ST, O.getVGPRNum(ST.hasGFX90AInsts())); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST) { return Printable([&RP, ST](raw_ostream &OS) { OS << "VGPRs: " << RP.Value[GCNRegPressure::VGPR32] << ' ' @@ -155,7 +153,6 @@ Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST) { OS << '\n'; }); } -#endif static LaneBitmask getDefRegMask(const MachineOperand &MO, const MachineRegisterInfo &MRI) { @@ -269,6 +266,13 @@ void GCNUpwardRPTracker::reset(const MachineInstr &MI, GCNRPTracker::reset(MI, LiveRegsCopy, true); } +void GCNUpwardRPTracker::reset(const MachineRegisterInfo &MRI_, + const LiveRegSet &LiveRegs_) { + MRI = &MRI_; + LiveRegs = LiveRegs_; + MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_); +} + void GCNUpwardRPTracker::recede(const MachineInstr &MI) { assert(MRI && "call reset first"); @@ -418,19 +422,17 @@ bool GCNDownwardRPTracker::advance(MachineBasicBlock::const_iterator Begin, return advance(End); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD Printable llvm::reportMismatch(const GCNRPTracker::LiveRegSet &LISLR, const GCNRPTracker::LiveRegSet &TrackedLR, - const TargetRegisterInfo *TRI) { - return Printable([&LISLR, &TrackedLR, TRI](raw_ostream &OS) { + const TargetRegisterInfo *TRI, StringRef Pfx) { + return Printable([&LISLR, &TrackedLR, TRI, Pfx](raw_ostream &OS) { for (auto const &P : TrackedLR) { auto I = LISLR.find(P.first); if (I == LISLR.end()) { - OS << " " << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second) + OS << Pfx << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second) << " isn't found in LIS reported set\n"; } else if (I->second != P.second) { - OS << " " << printReg(P.first, TRI) + OS << Pfx << printReg(P.first, TRI) << " masks doesn't match: LIS reported " << PrintLaneMask(I->second) << ", tracked " << PrintLaneMask(P.second) << '\n'; } @@ -438,7 +440,7 @@ Printable llvm::reportMismatch(const GCNRPTracker::LiveRegSet &LISLR, for (auto const &P : LISLR) { auto I = TrackedLR.find(P.first); if (I == TrackedLR.end()) { - OS << " " << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second) + OS << Pfx << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second) << " isn't found in tracked set\n"; } } @@ -467,7 +469,6 @@ bool GCNUpwardRPTracker::isValid() const { return true; } -LLVM_DUMP_METHOD Printable llvm::print(const GCNRPTracker::LiveRegSet &LiveRegs, const MachineRegisterInfo &MRI) { return Printable([&LiveRegs, &MRI](raw_ostream &OS) { @@ -483,7 +484,122 @@ Printable llvm::print(const GCNRPTracker::LiveRegSet &LiveRegs, }); } -LLVM_DUMP_METHOD void GCNRegPressure::dump() const { dbgs() << print(*this); } -#endif +static cl::opt UseDownwardTracker( + "amdgpu-print-rp-downward", + cl::desc("Use GCNDownwardRPTracker for GCNRegPressurePrinter pass"), + cl::init(false), cl::Hidden); + +char llvm::GCNRegPressurePrinter::ID = 0; +char &llvm::GCNRegPressurePrinterID = GCNRegPressurePrinter::ID; + +INITIALIZE_PASS(GCNRegPressurePrinter, "amdgpu-print-rp", "", true, true) + +bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + const LiveIntervals &LIS = getAnalysis(); + + auto &OS = dbgs(); + +// Leading spaces are important for YAML syntax. +#define PFX " " + + OS << "---\nname: " << MF.getName() << "\nbody: |\n"; + + auto printRP = [](const GCNRegPressure &RP) { + return Printable([&RP](raw_ostream &OS) { + OS << format(PFX " %-5d", RP.getSGPRNum()) + << format(" %-5d", RP.getVGPRNum(false)); + }); + }; + + auto ReportLISMismatchIfAny = [&](const GCNRPTracker::LiveRegSet &TrackedLR, + const GCNRPTracker::LiveRegSet &LISLR) { + if (LISLR != TrackedLR) { + OS << PFX " mis LIS: " << llvm::print(LISLR, MRI) + << reportMismatch(LISLR, TrackedLR, TRI, PFX " "); + } + }; + + // Register pressure before and at an instruction (in program order). + SmallVector, 16> RP; + + for (auto &MBB : MF) { + RP.clear(); + RP.reserve(MBB.size()); + + OS << PFX; + MBB.printName(OS); + OS << ":\n"; + + SlotIndex MBBStartSlot = LIS.getSlotIndexes()->getMBBStartIdx(&MBB); + SlotIndex MBBEndSlot = LIS.getSlotIndexes()->getMBBEndIdx(&MBB); + + GCNRPTracker::LiveRegSet LiveIn, LiveOut; + GCNRegPressure RPAtMBBEnd; + + if (UseDownwardTracker) { + if (MBB.empty()) { + LiveIn = LiveOut = getLiveRegs(MBBStartSlot, LIS, MRI); + RPAtMBBEnd = getRegPressure(MRI, LiveIn); + } else { + GCNDownwardRPTracker RPT(LIS); + RPT.reset(MBB.front()); + + LiveIn = RPT.getLiveRegs(); + + while (!RPT.advanceBeforeNext()) { + GCNRegPressure RPBeforeMI = RPT.getPressure(); + RPT.advanceToNext(); + RP.emplace_back(RPBeforeMI, RPT.getPressure()); + } + + LiveOut = RPT.getLiveRegs(); + RPAtMBBEnd = RPT.getPressure(); + } + } else { + GCNUpwardRPTracker RPT(LIS); + RPT.reset(MRI, MBBEndSlot); + RPT.moveMaxPressure(); // Clear max pressure. + + LiveOut = RPT.getLiveRegs(); + RPAtMBBEnd = RPT.getPressure(); + + for (auto &MI : reverse(MBB)) { + RPT.recede(MI); + if (!MI.isDebugInstr()) + RP.emplace_back(RPT.getPressure(), RPT.moveMaxPressure()); + } + + LiveIn = RPT.getLiveRegs(); + } + + OS << PFX " Live-in: " << llvm::print(LiveIn, MRI); + if (!UseDownwardTracker) + ReportLISMismatchIfAny(LiveIn, getLiveRegs(MBBStartSlot, LIS, MRI)); + + OS << PFX " SGPR VGPR\n"; + int I = 0; + for (auto &MI : MBB) { + if (!MI.isDebugInstr()) { + auto &[RPBeforeInstr, RPAtInstr] = + RP[UseDownwardTracker ? I : (RP.size() - 1 - I)]; + ++I; + OS << printRP(RPBeforeInstr) << '\n' << printRP(RPAtInstr) << " "; + } else + OS << PFX " "; + MI.print(OS); + } + OS << printRP(RPAtMBBEnd) << '\n'; + + OS << PFX " Live-out:" << llvm::print(LiveOut, MRI); + if (UseDownwardTracker) + ReportLISMismatchIfAny(LiveOut, getLiveRegs(MBBEndSlot, LIS, MRI)); + } + OS << "...\n"; + return false; + +#undef PFX +} \ No newline at end of file diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index 72e18acc1b8e49..c750fe74749e2b 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -128,6 +128,8 @@ class GCNRPTracker { void clearMaxPressure() { MaxPressure.clear(); } + GCNRegPressure getPressure() const { return CurPressure; } + // returns MaxPressure, resetting it decltype(MaxPressure) moveMaxPressure() { auto Res = MaxPressure; @@ -140,6 +142,9 @@ class GCNRPTracker { } }; +GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, + const MachineRegisterInfo &MRI); + class GCNUpwardRPTracker : public GCNRPTracker { public: GCNUpwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {} @@ -148,6 +153,14 @@ class GCNUpwardRPTracker : public GCNRPTracker { // filling live regs upon this point using LIS void reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr); + // reset tracker and set live register set to the specified value. + void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_); + + // reset tracker at the specified slot index. + void reset(const MachineRegisterInfo &MRI_, SlotIndex SI) { + reset(MRI_, llvm::getLiveRegs(SI, LIS, MRI_)); + } + // move to the state just above the MI void recede(const MachineInstr &MI); @@ -196,10 +209,6 @@ LaneBitmask getLiveLaneMask(unsigned Reg, const LiveIntervals &LIS, const MachineRegisterInfo &MRI); -GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, - const LiveIntervals &LIS, - const MachineRegisterInfo &MRI); - /// creates a map MachineInstr -> LiveRegSet /// R - range of iterators on instructions /// After - upon entry or exit of every instruction @@ -275,7 +284,22 @@ Printable print(const GCNRPTracker::LiveRegSet &LiveRegs, Printable reportMismatch(const GCNRPTracker::LiveRegSet &LISLR, const GCNRPTracker::LiveRegSet &TrackedL, - const TargetRegisterInfo *TRI); + const TargetRegisterInfo *TRI, StringRef Pfx = " "); + +struct GCNRegPressurePrinter : public MachineFunctionPass { + static char ID; + +public: + GCNRegPressurePrinter() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 413b3b5afa57a8..c1acf3e00d4fb8 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4744,8 +4744,8 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( const SIRegisterInfo *TRI = ST.getRegisterInfo(); Register Dst = MI.getOperand(0).getReg(); - Register Src0 = MI.getOperand(1).getReg(); - Register Src1 = MI.getOperand(2).getReg(); + const MachineOperand &Src0 = MI.getOperand(1); + const MachineOperand &Src1 = MI.getOperand(2); const DebugLoc &DL = MI.getDebugLoc(); Register SrcCond = MI.getOperand(3).getReg(); @@ -4754,20 +4754,42 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( const auto *CondRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); Register SrcCondCopy = MRI.createVirtualRegister(CondRC); + const TargetRegisterClass *Src0RC = Src0.isReg() + ? MRI.getRegClass(Src0.getReg()) + : &AMDGPU::VReg_64RegClass; + const TargetRegisterClass *Src1RC = Src1.isReg() + ? MRI.getRegClass(Src1.getReg()) + : &AMDGPU::VReg_64RegClass; + + const TargetRegisterClass *Src0SubRC = + TRI->getSubRegisterClass(Src0RC, AMDGPU::sub0); + const TargetRegisterClass *Src1SubRC = + TRI->getSubRegisterClass(Src1RC, AMDGPU::sub1); + + MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm( + MI, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC); + MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm( + MI, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC); + + MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm( + MI, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC); + MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm( + MI, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC); + BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy) .addReg(SrcCond); BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo) - .addImm(0) - .addReg(Src0, 0, AMDGPU::sub0) - .addImm(0) - .addReg(Src1, 0, AMDGPU::sub0) - .addReg(SrcCondCopy); + .addImm(0) + .add(Src0Sub0) + .addImm(0) + .add(Src1Sub0) + .addReg(SrcCondCopy); BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstHi) - .addImm(0) - .addReg(Src0, 0, AMDGPU::sub1) - .addImm(0) - .addReg(Src1, 0, AMDGPU::sub1) - .addReg(SrcCondCopy); + .addImm(0) + .add(Src0Sub1) + .addImm(0) + .add(Src1Sub1) + .addReg(SrcCondCopy); BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE), Dst) .addReg(DstLo) @@ -6725,24 +6747,12 @@ buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV, // $symbol@*@hi with lower 32 bits and higher 32 bits of a literal constant, // which is a 64-bit pc-relative offset from the encoding of the $symbol // operand to the global variable. - // - // What we want here is an offset from the value returned by s_getpc - // (which is the address of the s_add_u32 instruction) to the global - // variable, but since the encoding of $symbol starts 4 bytes after the start - // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too - // small. This requires us to add 4 to the global variable offset in order to - // compute the correct address. Similarly for the s_addc_u32 instruction, the - // encoding of $symbol starts 12 bytes after the start of the s_add_u32 - // instruction. - SDValue PtrLo = - DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, GAFlags); + SDValue PtrLo = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset, GAFlags); SDValue PtrHi; - if (GAFlags == SIInstrInfo::MO_NONE) { + if (GAFlags == SIInstrInfo::MO_NONE) PtrHi = DAG.getTargetConstant(0, DL, MVT::i32); - } else { - PtrHi = - DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 12, GAFlags + 1); - } + else + PtrHi = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset, GAFlags + 1); return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, PtrVT, PtrLo, PtrHi); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 243076de3d13cd..64f2997d11c142 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2439,21 +2439,33 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { Register Reg = MI.getOperand(0).getReg(); Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0); Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1); + MachineOperand OpLo = MI.getOperand(1); + MachineOperand OpHi = MI.getOperand(2); // Create a bundle so these instructions won't be re-ordered by the // post-RA scheduler. MIBundleBuilder Bundler(MBB, MI); Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg)); - // Add 32-bit offset from this instruction to the start of the - // constant data. - Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo) - .addReg(RegLo) - .add(MI.getOperand(1))); - + // What we want here is an offset from the value returned by s_getpc (which + // is the address of the s_add_u32 instruction) to the global variable, but + // since the encoding of $symbol starts 4 bytes after the start of the + // s_add_u32 instruction, we end up with an offset that is 4 bytes too + // small. This requires us to add 4 to the global variable offset in order + // to compute the correct address. Similarly for the s_addc_u32 instruction, + // the encoding of $symbol starts 12 bytes after the start of the s_add_u32 + // instruction. + + if (OpLo.isGlobal()) + OpLo.setOffset(OpLo.getOffset() + 4); + Bundler.append( + BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo)); + + if (OpHi.isGlobal()) + OpHi.setOffset(OpHi.getOffset() + 12); Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi) .addReg(RegHi) - .add(MI.getOperand(2))); + .add(OpHi)); finalizeBundle(MBB, Bundler.begin()); @@ -6523,8 +6535,10 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI, return CreatedBB; } - // Legalize S_BITREPLICATE - if (MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32) { + // Legalize S_BITREPLICATE and S_QUADMASK + if (MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 || + MI.getOpcode() == AMDGPU::S_QUADMASK_B32 || + MI.getOpcode() == AMDGPU::S_QUADMASK_B64) { MachineOperand &Src = MI.getOperand(1); if (Src.isReg() && RI.hasVectorRegisters(MRI.getRegClass(Src.getReg()))) Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI)); @@ -7302,27 +7316,27 @@ void SIInstrInfo::lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst, MachineOperand &Src1 = Inst.getOperand(2); MachineOperand &Cond = Inst.getOperand(3); - Register SCCSource = Cond.getReg(); - bool IsSCC = (SCCSource == AMDGPU::SCC); + Register CondReg = Cond.getReg(); + bool IsSCC = (CondReg == AMDGPU::SCC); // If this is a trivial select where the condition is effectively not SCC - // (SCCSource is a source of copy to SCC), then the select is semantically - // equivalent to copying SCCSource. Hence, there is no need to create + // (CondReg is a source of copy to SCC), then the select is semantically + // equivalent to copying CondReg. Hence, there is no need to create // V_CNDMASK, we can just use that and bail out. if (!IsSCC && Src0.isImm() && (Src0.getImm() == -1) && Src1.isImm() && (Src1.getImm() == 0)) { - MRI.replaceRegWith(Dest.getReg(), SCCSource); + MRI.replaceRegWith(Dest.getReg(), CondReg); return; } - const TargetRegisterClass *TC = - RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); - - Register CopySCC = MRI.createVirtualRegister(TC); - + Register NewCondReg = CondReg; if (IsSCC) { + const TargetRegisterClass *TC = + RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); + NewCondReg = MRI.createVirtualRegister(TC); + // Now look for the closest SCC def if it is a copy - // replacing the SCCSource with the COPY source register + // replacing the CondReg with the COPY source register bool CopyFound = false; for (MachineInstr &CandI : make_range(std::next(MachineBasicBlock::reverse_iterator(Inst)), @@ -7330,7 +7344,7 @@ void SIInstrInfo::lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst, if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != -1) { if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) { - BuildMI(MBB, MII, DL, get(AMDGPU::COPY), CopySCC) + BuildMI(MBB, MII, DL, get(AMDGPU::COPY), NewCondReg) .addReg(CandI.getOperand(1).getReg()); CopyFound = true; } @@ -7345,24 +7359,31 @@ void SIInstrInfo::lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst, unsigned Opcode = (ST.getWavefrontSize() == 64) ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; auto NewSelect = - BuildMI(MBB, MII, DL, get(Opcode), CopySCC).addImm(-1).addImm(0); + BuildMI(MBB, MII, DL, get(Opcode), NewCondReg).addImm(-1).addImm(0); NewSelect->getOperand(3).setIsUndef(Cond.isUndef()); } } - Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - - auto UpdatedInst = - BuildMI(MBB, MII, DL, get(AMDGPU::V_CNDMASK_B32_e64), ResultReg) - .addImm(0) - .add(Src1) // False - .addImm(0) - .add(Src0) // True - .addReg(IsSCC ? CopySCC : SCCSource); - - MRI.replaceRegWith(Dest.getReg(), ResultReg); - legalizeOperands(*UpdatedInst, MDT); - addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); + Register NewDestReg = MRI.createVirtualRegister( + RI.getEquivalentVGPRClass(MRI.getRegClass(Dest.getReg()))); + MachineInstr *NewInst; + if (Inst.getOpcode() == AMDGPU::S_CSELECT_B32) { + NewInst = BuildMI(MBB, MII, DL, get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg) + .addImm(0) + .add(Src1) // False + .addImm(0) + .add(Src0) // True + .addReg(NewCondReg); + } else { + NewInst = + BuildMI(MBB, MII, DL, get(AMDGPU::V_CNDMASK_B64_PSEUDO), NewDestReg) + .add(Src1) // False + .add(Src0) // True + .addReg(NewCondReg); + } + MRI.replaceRegWith(Dest.getReg(), NewDestReg); + legalizeOperands(*NewInst, MDT); + addUsersToMoveToVALUWorklist(NewDestReg, MRI, Worklist); } void SIInstrInfo::lowerScalarAbs(SIInstrWorklist &Worklist, diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index 04c9a6457944c5..e3f54d01eb22a2 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -10,6 +10,7 @@ #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIRegisterInfo.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineOperand.h" @@ -32,6 +33,7 @@ class SIOptimizeExecMasking : public MachineFunctionPass { DenseMap SaveExecVCmpMapping; SmallVector, 1> OrXors; + SmallVector KillFlagCandidates; Register isCopyFromExec(const MachineInstr &MI) const; Register isCopyToExec(const MachineInstr &MI) const; @@ -41,15 +43,16 @@ class SIOptimizeExecMasking : public MachineFunctionPass { MachineBasicBlock::reverse_iterator findExecCopy(MachineBasicBlock &MBB, MachineBasicBlock::reverse_iterator I) const; - bool isRegisterInUseBetween(MachineInstr &Stop, MachineInstr &Start, MCRegister Reg, bool UseLiveOuts = false, bool IgnoreStart = false) const; bool isRegisterInUseAfter(MachineInstr &Stop, MCRegister Reg) const; - MachineInstr *findInstrBackwards(MachineInstr &Origin, - std::function Pred, - ArrayRef NonModifiableRegs, - unsigned MaxInstructions = 20) const; + MachineInstr *findInstrBackwards( + MachineInstr &Origin, std::function Pred, + ArrayRef NonModifiableRegs, + MachineInstr *Terminator = nullptr, + SmallVectorImpl *KillFlagCandidates = nullptr, + unsigned MaxInstructions = 20) const; bool optimizeExecSequence(); void tryRecordVCmpxAndSaveexecSequence(MachineInstr &MI); bool optimizeVCMPSaveExecSequence(MachineInstr &SaveExecInstr, @@ -325,11 +328,13 @@ static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) { // Backwards-iterate from Origin (for n=MaxInstructions iterations) until either // the beginning of the BB is reached or Pred evaluates to true - which can be // an arbitrary condition based on the current MachineInstr, for instance an -// target instruction. Breaks prematurely by returning nullptr if one of the +// target instruction. Breaks prematurely by returning nullptr if one of the // registers given in NonModifiableRegs is modified by the current instruction. MachineInstr *SIOptimizeExecMasking::findInstrBackwards( MachineInstr &Origin, std::function Pred, - ArrayRef NonModifiableRegs, unsigned MaxInstructions) const { + ArrayRef NonModifiableRegs, MachineInstr *Terminator, + SmallVectorImpl *KillFlagCandidates, + unsigned MaxInstructions) const { MachineBasicBlock::reverse_iterator A = Origin.getReverseIterator(), E = Origin.getParent()->rend(); unsigned CurrentIteration = 0; @@ -344,6 +349,21 @@ MachineInstr *SIOptimizeExecMasking::findInstrBackwards( for (MCRegister Reg : NonModifiableRegs) { if (A->modifiesRegister(Reg, TRI)) return nullptr; + + // Check for kills that appear after the terminator instruction, that + // would not be detected by clearKillFlags, since they will cause the + // register to be dead at a later place, causing the verifier to fail. + // We use the candidates to clear the kill flags later. + if (Terminator && KillFlagCandidates && A != Terminator && + A->killsRegister(Reg, TRI)) { + for (MachineOperand &MO : A->operands()) { + if (MO.isReg() && MO.isKill()) { + Register Candidate = MO.getReg(); + if (Candidate != Reg && TRI->regsOverlap(Candidate, Reg)) + KillFlagCandidates->push_back(&MO); + } + } + } } ++CurrentIteration; @@ -599,6 +619,9 @@ bool SIOptimizeExecMasking::optimizeVCMPSaveExecSequence( if (Src1->isReg()) MRI->clearKillFlags(Src1->getReg()); + for (MachineOperand *MO : KillFlagCandidates) + MO->setIsKill(false); + SaveExecInstr.eraseFromParent(); VCmp.eraseFromParent(); @@ -690,7 +713,8 @@ void SIOptimizeExecMasking::tryRecordVCmpxAndSaveexecSequence( NonDefRegs.push_back(Src1->getReg()); if (!findInstrBackwards( - MI, [&](MachineInstr *Check) { return Check == VCmp; }, NonDefRegs)) + MI, [&](MachineInstr *Check) { return Check == VCmp; }, NonDefRegs, + VCmp, &KillFlagCandidates)) return; if (VCmp) @@ -777,6 +801,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { OrXors.clear(); SaveExecVCmpMapping.clear(); + KillFlagCandidates.clear(); static unsigned SearchWindow = 10; for (MachineBasicBlock &MBB : MF) { unsigned SearchCount = 0; diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 3143d437e37091..d1d828bfbdb70d 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -238,9 +238,7 @@ class SIWholeQuadMode : public MachineFunctionPass { AU.addRequired(); AU.addPreserved(); AU.addPreserved(); - AU.addRequired(); AU.addPreserved(); - AU.addRequired(); AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -1594,8 +1592,8 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { TRI = &TII->getRegisterInfo(); MRI = &MF.getRegInfo(); LIS = &getAnalysis(); - MDT = &getAnalysis(); - PDT = &getAnalysis(); + MDT = getAnalysisIfAvailable(); + PDT = getAnalysisIfAvailable(); if (ST->isWave32()) { AndOpc = AMDGPU::S_AND_B32; diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index c419b5f7a5711f..a28921a7ff33f6 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -326,8 +326,10 @@ def S_XNOR_SAVEEXEC_B64 : SOP1_64 <"s_xnor_saveexec_b64">; } // End hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC] -def S_QUADMASK_B32 : SOP1_32 <"s_quadmask_b32">; -def S_QUADMASK_B64 : SOP1_64 <"s_quadmask_b64">; +def S_QUADMASK_B32 : SOP1_32 <"s_quadmask_b32", + [(set i32:$sdst, (int_amdgcn_s_quadmask i32:$src0))]>; +def S_QUADMASK_B64 : SOP1_64 <"s_quadmask_b64", + [(set i64:$sdst, (int_amdgcn_s_quadmask i64:$src0))]>; let Uses = [M0] in { def S_MOVRELS_B32 : SOP1_32R <"s_movrels_b32">; @@ -1501,7 +1503,10 @@ def S_INCPERFLEVEL : SOPP_Pseudo <"s_incperflevel", (ins i32imm:$simm16), "$simm def S_DECPERFLEVEL : SOPP_Pseudo <"s_decperflevel", (ins i32imm:$simm16), "$simm16", [(int_amdgcn_s_decperflevel timm:$simm16)]> { } -def S_TTRACEDATA : SOPP_Pseudo <"s_ttracedata", (ins)> { + +let Uses = [M0] in +def S_TTRACEDATA : SOPP_Pseudo <"s_ttracedata", (ins), "", + [(int_amdgcn_s_ttracedata M0)]> { let simm16 = 0; let fixed_imm = 1; } @@ -1545,8 +1550,10 @@ let SubtargetPredicate = isGFX10Plus in { [(SIdenorm_mode (i32 timm:$simm16))]>; } + let hasSideEffects = 1 in def S_TTRACEDATA_IMM : - SOPP_Pseudo<"s_ttracedata_imm", (ins s16imm:$simm16), "$simm16">; + SOPP_Pseudo<"s_ttracedata_imm", (ins s16imm:$simm16), "$simm16", + [(int_amdgcn_s_ttracedata_imm timm:$simm16)]>; } // End SubtargetPredicate = isGFX10Plus let SubtargetPredicate = isGFX11Plus in { diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp index ecb68ff401e9f4..8744bcb45f75f3 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp @@ -17,7 +17,6 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" #define DEBUG_TYPE "loongarch-asmbackend" diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index f2d7fb3e245dbb..4a1247991e3429 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -76,16 +76,14 @@ PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { if (getOrEnforceKnownAlignment( II.getArgOperand(0), Align(16), IC.getDataLayout(), &II, &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) { - Value *Ptr = IC.Builder.CreateBitCast( - II.getArgOperand(0), PointerType::getUnqual(II.getType())); + Value *Ptr = II.getArgOperand(0); return new LoadInst(II.getType(), Ptr, "", false, Align(16)); } break; case Intrinsic::ppc_vsx_lxvw4x: case Intrinsic::ppc_vsx_lxvd2x: { // Turn PPC VSX loads into normal loads. - Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(0), - PointerType::getUnqual(II.getType())); + Value *Ptr = II.getArgOperand(0); return new LoadInst(II.getType(), Ptr, Twine(""), false, Align(1)); } case Intrinsic::ppc_altivec_stvx: @@ -94,16 +92,14 @@ PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { if (getOrEnforceKnownAlignment( II.getArgOperand(1), Align(16), IC.getDataLayout(), &II, &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) { - Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType()); - Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy); + Value *Ptr = II.getArgOperand(1); return new StoreInst(II.getArgOperand(0), Ptr, false, Align(16)); } break; case Intrinsic::ppc_vsx_stxvw4x: case Intrinsic::ppc_vsx_stxvd2x: { // Turn PPC VSX stores into normal stores. - Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType()); - Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy); + Value *Ptr = II.getArgOperand(1); return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1)); } case Intrinsic::ppc_altivec_vperm: diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index 458bf9a2efde4d..887671ecb435d2 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -81,20 +81,9 @@ def : Pat<(XLenVT (sub GPR:$rs1, simm12Plus1:$imm)), (ADDI GPR:$rs1, (NegImm simm12Plus1:$imm))>; let Predicates = [IsRV64] in { -def : Pat<(i32 (and GPR:$rs1, GPR:$rs2)), (AND GPR:$rs1, GPR:$rs2)>; -def : Pat<(i32 (or GPR:$rs1, GPR:$rs2)), (OR GPR:$rs1, GPR:$rs2)>; -def : Pat<(i32 (xor GPR:$rs1, GPR:$rs2)), (XOR GPR:$rs1, GPR:$rs2)>; - def : Pat<(i32 (sub GPR:$rs1, simm12Plus1i32:$imm)), (ADDIW GPR:$rs1, (i64 (NegImm $imm)))>; -def : Pat<(i32 (and GPR:$rs1, simm12i32:$imm)), - (ANDI GPR:$rs1, (i64 (as_i64imm $imm)))>; -def : Pat<(i32 (or GPR:$rs1, simm12i32:$imm)), - (ORI GPR:$rs1, (i64 (as_i64imm $imm)))>; -def : Pat<(i32 (xor GPR:$rs1, simm12i32:$imm)), - (XORI GPR:$rs1, (i64 (as_i64imm $imm)))>; - def : Pat<(i32 (shl GPR:$rs1, (i32 GPR:$rs2))), (SLLW GPR:$rs1, GPR:$rs2)>; def : Pat<(i32 (sra GPR:$rs1, (i32 GPR:$rs2))), (SRAW GPR:$rs1, GPR:$rs2)>; def : Pat<(i32 (srl GPR:$rs1, (i32 GPR:$rs2))), (SRLW GPR:$rs1, GPR:$rs2)>; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 67f09377bf45e4..ec5ae3e8f8d541 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -275,16 +275,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.is64Bit()) { setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); - if (!RV64LegalI32) + if (!RV64LegalI32) { setOperationAction(ISD::LOAD, MVT::i32, Custom); - - if (RV64LegalI32) - setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, MVT::i32, Promote); - else setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL}, MVT::i32, Custom); - - if (!RV64LegalI32) { setOperationAction(ISD::SADDO, MVT::i32, Custom); setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT}, MVT::i32, Custom); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 71ba4025b6a07e..c0a5c94835a379 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -2023,12 +2023,21 @@ def : Pat<(trunc GPR:$src), (COPY GPR:$src)>; def : PatGprGpr; def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; def : PatGprGpr, SLLW, i32, i64>; def : PatGprGpr, SRLW, i32, i64>; def : PatGprGpr, SRAW, i32, i64>; def : Pat<(i32 (add GPR:$rs1, simm12i32:$imm)), (ADDIW GPR:$rs1, (i64 (as_i64imm $imm)))>; +def : Pat<(i32 (and GPR:$rs1, simm12i32:$imm)), + (ANDI GPR:$rs1, (i64 (as_i64imm $imm)))>; +def : Pat<(i32 (or GPR:$rs1, simm12i32:$imm)), + (ORI GPR:$rs1, (i64 (as_i64imm $imm)))>; +def : Pat<(i32 (xor GPR:$rs1, simm12i32:$imm)), + (XORI GPR:$rs1, (i64 (as_i64imm $imm)))>; def : PatGprImm; def : PatGprImm; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index fec6396c602baa..0a19d3542d7b13 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -817,6 +817,13 @@ def : PatGprGpr; // Experimental RV64 i32 legalization patterns. //===----------------------------------------------------------------------===// +def BCLRMaski32 : ImmLeaf(Imm) && isPowerOf2_32(~Imm); +}]>; +def SingleBitSetMaski32 : ImmLeaf(Imm) && isPowerOf2_32(Imm); +}]>; + let Predicates = [HasStdExtZbb, IsRV64] in { def : PatGpr; def : PatGpr; @@ -827,6 +834,10 @@ def : Pat<(i32 (sext_inreg GPR:$rs1, i16)), (SEXT_H GPR:$rs1)>; } // Predicates = [HasStdExtZbb, IsRV64] let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in { +def : Pat<(i32 (and GPR:$rs1, (not GPR:$rs2))), (ANDN GPR:$rs1, GPR:$rs2)>; +def : Pat<(i32 (or GPR:$rs1, (not GPR:$rs2))), (ORN GPR:$rs1, GPR:$rs2)>; +def : Pat<(i32 (xor GPR:$rs1, (not GPR:$rs2))), (XNOR GPR:$rs1, GPR:$rs2)>; + def : PatGprGpr, ROLW, i32, i64>; def : PatGprGpr, RORW, i32, i64>; def : PatGprImm; @@ -838,3 +849,12 @@ def : Pat<(i32 (rotl GPR:$rs1, uimm5:$rs2)), let Predicates = [HasStdExtZba, IsRV64] in { def : Pat<(zext GPR:$src), (ADD_UW GPR:$src, (XLenVT X0))>; } + +let Predicates = [HasStdExtZbs, IsRV64] in { +def : Pat<(i32 (and GPR:$rs1, BCLRMaski32:$mask)), + (BCLRI GPR:$rs1, (i64 (BCLRXForm $mask)))>; +def : Pat<(i32 (or GPR:$rs1, SingleBitSetMaski32:$mask)), + (BSETI GPR:$rs1, (i64 (SingleBitSetMaskToIndex $mask)))>; +def : Pat<(i32 (xor GPR:$rs1, SingleBitSetMaski32:$mask)), + (BINVI GPR:$rs1, (i64 (SingleBitSetMaskToIndex $mask)))>; +} // Predicates = [HasStdExtZbs, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp index a62c7b4bbae062..e73cab58d7016f 100644 --- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp +++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp @@ -359,6 +359,10 @@ static bool isSignExtendingOpW(const MachineInstr &MI, // An ORI with an >11 bit immediate (negative 12-bit) will set bits 63:11. case RISCV::ORI: return !isUInt<11>(MI.getOperand(2).getImm()); + // A bseti with X0 is sign extended if the immediate is less than 31. + case RISCV::BSETI: + return MI.getOperand(2).getImm() < 31 && + MI.getOperand(1).getReg() == RISCV::X0; // Copying from X0 produces zero. case RISCV::COPY: return MI.getOperand(1).getReg() == RISCV::X0; diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp index 9f6b9d8ef18fda..b70fa957a85994 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -32,7 +32,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolWasm.h" #include "llvm/MC/TargetRegistry.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/SourceMgr.h" using namespace llvm; diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index 5d4eb79675f891..c5e9ad43d22588 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -264,6 +264,7 @@ StringRef Triple::getOSTypeName(OSType Kind) { case PS5: return "ps5"; case RTEMS: return "rtems"; case Solaris: return "solaris"; + case Serenity: return "serenity"; case TvOS: return "tvos"; case UEFI: return "uefi"; case WASI: return "wasi"; @@ -641,6 +642,7 @@ static Triple::OSType parseOS(StringRef OSName) { .StartsWith("emscripten", Triple::Emscripten) .StartsWith("shadermodel", Triple::ShaderModel) .StartsWith("liteos", Triple::LiteOS) + .StartsWith("serenity", Triple::Serenity) .Default(Triple::UnknownOS); } diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 13fca7338454d6..95df36e3221cec 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12415,11 +12415,14 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo { Instruction *ThenTI = SplitBlockAndInsertIfThen(LastCmp, IP, /* Unreachable */ false); BasicBlock *CBBB = CB->getParent(); + A.registerManifestAddedBasicBlock(*ThenTI->getParent()); + A.registerManifestAddedBasicBlock(*CBBB); auto *SplitTI = cast(LastCmp->getNextNode()); BasicBlock *ElseBB; if (IP == CB) { ElseBB = BasicBlock::Create(ThenTI->getContext(), "", ThenTI->getFunction(), CBBB); + A.registerManifestAddedBasicBlock(*ElseBB); IP = BranchInst::Create(CBBB, ElseBB); SplitTI->replaceUsesOfWith(CBBB, ElseBB); } else { diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index be75a97c80d28f..392dd541523449 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -4598,6 +4598,8 @@ struct AAKernelInfoFunction : AAKernelInfo { BasicBlock *PRNextBB = BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check", Kernel, StateMachineEndParallelBB); + A.registerManifestAddedBasicBlock(*PRExecuteBB); + A.registerManifestAddedBasicBlock(*PRNextBB); // Check if we need to compare the pointer at all or if we can just // call the parallel region function. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 7e585e9166247c..b0d31a92464e37 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1962,17 +1962,49 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { break; } case Intrinsic::ptrmask: { + unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType()); + KnownBits Known(BitWidth); + if (SimplifyDemandedInstructionBits(*II, Known)) + return II; + Value *InnerPtr, *InnerMask; + bool Changed = false; + // Combine: + // (ptrmask (ptrmask p, A), B) + // -> (ptrmask p, (and A, B)) if (match(II->getArgOperand(0), m_OneUse(m_Intrinsic(m_Value(InnerPtr), m_Value(InnerMask))))) { assert(II->getArgOperand(1)->getType() == InnerMask->getType() && "Mask types must match"); + // TODO: If InnerMask == Op1, we could copy attributes from inner + // callsite -> outer callsite. Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask); - return replaceInstUsesWith( - *II, Builder.CreateIntrinsic(InnerPtr->getType(), Intrinsic::ptrmask, - {InnerPtr, NewMask})); + replaceOperand(CI, 0, InnerPtr); + replaceOperand(CI, 1, NewMask); + Changed = true; + } + + // See if we can deduce non-null. + if (!CI.hasRetAttr(Attribute::NonNull) && + (Known.isNonZero() || + isKnownNonZero(II, DL, /*Depth*/ 0, &AC, II, &DT))) { + CI.addRetAttr(Attribute::NonNull); + Changed = true; + } + + unsigned NewAlignmentLog = + std::min(Value::MaxAlignmentExponent, + std::min(BitWidth - 1, Known.countMinTrailingZeros())); + // Known bits will capture if we had alignment information associated with + // the pointer argument. + if (NewAlignmentLog > Log2(CI.getRetAlign().valueOrOne())) { + CI.addRetAttr(Attribute::getWithAlignment( + CI.getContext(), Align(uint64_t(1) << NewAlignmentLog))); + Changed = true; } + if (Changed) + return &CI; break; } case Intrinsic::uadd_with_overflow: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index e2f17aab9c442c..efd18b44657e5d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1935,6 +1935,15 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) { return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false); } + // (ptrtoint (ptrmask P, M)) + // -> (and (ptrtoint P), M) + // This is generally beneficial as `and` is better supported than `ptrmask`. + Value *Ptr, *Mask; + if (match(SrcOp, m_OneUse(m_Intrinsic(m_Value(Ptr), + m_Value(Mask)))) && + Mask->getType() == Ty) + return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask); + if (auto *GEP = dyn_cast(SrcOp)) { // Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use. // While this can increase the number of instructions it doesn't actually diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 01c89ea06f2d9d..34b10220ec88ab 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -544,6 +544,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final /// Tries to simplify operands to an integer instruction based on its /// demanded bits. bool SimplifyDemandedInstructionBits(Instruction &Inst); + bool SimplifyDemandedInstructionBits(Instruction &Inst, KnownBits &Known); Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, unsigned Depth = 0, diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index cd6b017874e8d6..fa6fe9d30abd1b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -48,15 +48,20 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, return true; } +/// Returns the bitwidth of the given scalar or pointer type. For vector types, +/// returns the element type's bitwidth. +static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { + if (unsigned BitWidth = Ty->getScalarSizeInBits()) + return BitWidth; + return DL.getPointerTypeSizeInBits(Ty); +} /// Inst is an integer instruction that SimplifyDemandedBits knows about. See if /// the instruction has any properties that allow us to simplify its operands. -bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) { - unsigned BitWidth = Inst.getType()->getScalarSizeInBits(); - KnownBits Known(BitWidth); - APInt DemandedMask(APInt::getAllOnes(BitWidth)); - +bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst, + KnownBits &Known) { + APInt DemandedMask(APInt::getAllOnes(Known.getBitWidth())); Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known, 0, &Inst); if (!V) return false; @@ -65,6 +70,13 @@ bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) { return true; } +/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if +/// the instruction has any properties that allow us to simplify its operands. +bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) { + KnownBits Known(getBitWidth(Inst.getType(), DL)); + return SimplifyDemandedInstructionBits(Inst, Known); +} + /// This form of SimplifyDemandedBits simplifies the specified instruction /// operand if possible, updating it in place. It returns true if it made any /// change and false otherwise. @@ -143,7 +155,6 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return SimplifyMultipleUseDemandedBits(I, DemandedMask, Known, Depth, CxtI); KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth); - // If this is the root being simplified, allow it to have multiple uses, // just set the DemandedMask to all bits so that we can try to simplify the // operands. This allows visitTruncInst (for example) to simplify the @@ -893,6 +904,48 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } break; } + case Intrinsic::ptrmask: { + unsigned MaskWidth = I->getOperand(1)->getType()->getScalarSizeInBits(); + RHSKnown = KnownBits(MaskWidth); + // If either the LHS or the RHS are Zero, the result is zero. + if (SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1) || + SimplifyDemandedBits( + I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth), + RHSKnown, Depth + 1)) + return I; + + // TODO: Should be 1-extend + RHSKnown = RHSKnown.anyextOrTrunc(BitWidth); + assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); + assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); + + Known = LHSKnown & RHSKnown; + KnownBitsComputed = true; + + // If the client is only demanding bits we know to be zero, return + // `llvm.ptrmask(p, 0)`. We can't return `null` here due to pointer + // provenance, but making the mask zero will be easily optimizable in + // the backend. + if (DemandedMask.isSubsetOf(Known.Zero) && + !match(I->getOperand(1), m_Zero())) + return replaceOperand( + *I, 1, Constant::getNullValue(I->getOperand(1)->getType())); + + // Mask in demanded space does nothing. + // NOTE: We may have attributes associated with the return value of the + // llvm.ptrmask intrinsic that will be lost when we just return the + // operand. We should try to preserve them. + if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero)) + return I->getOperand(0); + + // If the RHS is a constant, see if we can simplify it. + if (ShrinkDemandedConstant( + I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth))) + return I; + + break; + } + case Intrinsic::fshr: case Intrinsic::fshl: { const APInt *SA; @@ -978,8 +1031,10 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } // If the client is only demanding bits that we know, return the known - // constant. - if (DemandedMask.isSubsetOf(Known.Zero|Known.One)) + // constant. We can't directly simplify pointers as a constant because of + // pointer provenance. + // TODO: We could return `(inttoptr const)` for pointers. + if (!V->getType()->isPointerTy() && DemandedMask.isSubsetOf(Known.Zero | Known.One)) return Constant::getIntegerValue(VTy, Known.One); return nullptr; } diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 9549a9f41fa11e..6e04f54e13da54 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -5546,10 +5546,12 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, "a scale at the same time!"); Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy), -(uint64_t)Offset); - if (C->getType() != OpTy) - C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, - OpTy, false), - C, OpTy); + if (C->getType() != OpTy) { + C = ConstantFoldCastOperand( + CastInst::getCastOpcode(C, false, OpTy, false), C, OpTy, + CI->getModule()->getDataLayout()); + assert(C && "Cast of ConstantInt should have folded"); + } CI->setOperand(1, C); } diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index ba4ecf8b8853e1..b8c9d9d100f117 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -661,15 +661,16 @@ Value *ConstantOffsetExtractor::applyExts(Value *V) { // in the reversed order. for (CastInst *I : llvm::reverse(ExtInsts)) { if (Constant *C = dyn_cast(Current)) { - // If Current is a constant, apply s/zext using ConstantExpr::getCast. - // ConstantExpr::getCast emits a ConstantInt if C is a ConstantInt. - Current = ConstantExpr::getCast(I->getOpcode(), C, I->getType()); - } else { - Instruction *Ext = I->clone(); - Ext->setOperand(0, Current); - Ext->insertBefore(IP); - Current = Ext; + // Try to constant fold the cast. + Current = ConstantFoldCastOperand(I->getOpcode(), C, I->getType(), DL); + if (Current) + continue; } + + Instruction *Ext = I->clone(); + Ext->setOperand(0, Current); + Ext->insertBefore(IP); + Current = Ext; } return Current; } diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 21affe7bdce406..d14fd09943839d 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1690,7 +1690,7 @@ static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, End = Builder.CreateFreeze(End, End->getName() + ".fr"); } Value *StrideVal = - Stride ? Exp.expandCodeFor(Stride, Type::getInt64Ty(Ctx), Loc) : nullptr; + Stride ? Exp.expandCodeFor(Stride, Stride->getType(), Loc) : nullptr; LLVM_DEBUG(dbgs() << "Start: " << *Low << " End: " << *High << "\n"); return {Start, End, StrideVal}; } diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll index 0c52a8a683e3a0..88061756d8feeb 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll @@ -926,7 +926,6 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -949,7 +948,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -972,7 +970,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -995,7 +992,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1018,7 +1014,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1306,7 +1301,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1321,7 +1315,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1336,7 +1329,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1351,7 +1343,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1366,7 +1357,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1706,7 +1696,6 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1728,7 +1717,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1750,7 +1738,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1772,7 +1759,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1794,7 +1780,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -2081,7 +2066,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -2095,7 +2079,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -2109,7 +2092,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -2123,7 +2105,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -2137,7 +2118,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -5392,9 +5372,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5415,9 +5393,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5438,9 +5414,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5461,9 +5435,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5484,9 +5456,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5507,9 +5477,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5529,9 +5497,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5551,9 +5517,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5573,9 +5537,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5595,9 +5557,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5616,9 +5576,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5636,9 +5594,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5656,9 +5612,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5676,9 +5630,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5696,9 +5648,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5716,9 +5666,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5736,9 +5684,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5756,9 +5702,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5776,9 +5720,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5796,9 +5738,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5816,15 +5756,11 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5850,15 +5786,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5884,15 +5816,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5918,15 +5846,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5952,15 +5876,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5987,9 +5907,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6010,9 +5928,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6033,9 +5949,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6056,9 +5970,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6079,9 +5991,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6102,9 +6012,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -6120,9 +6028,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -6138,9 +6044,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -6156,9 +6060,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -6174,9 +6076,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -6191,9 +6091,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -6207,9 +6105,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -6223,9 +6119,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -6239,9 +6133,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -6255,9 +6147,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -6271,9 +6161,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -6287,9 +6175,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -6303,9 +6189,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -6319,9 +6203,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -6335,9 +6217,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -6351,15 +6231,11 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6377,15 +6253,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6403,15 +6275,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6429,15 +6297,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6455,15 +6319,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6482,9 +6342,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6505,9 +6363,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6528,9 +6384,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6551,9 +6405,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6574,9 +6426,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6597,9 +6447,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6619,9 +6467,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6641,9 +6487,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6663,9 +6507,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6685,9 +6527,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6706,9 +6546,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6726,9 +6564,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6746,9 +6582,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6766,9 +6600,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6786,9 +6618,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6806,9 +6636,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6826,9 +6654,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6846,9 +6672,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6866,9 +6690,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6886,9 +6708,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6906,15 +6726,11 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -6940,15 +6756,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -6974,15 +6786,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -7008,15 +6816,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7042,15 +6846,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7077,9 +6877,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7100,9 +6898,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7123,9 +6919,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7146,9 +6940,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7169,9 +6961,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7192,9 +6982,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -7210,9 +6998,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -7228,9 +7014,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -7246,9 +7030,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -7264,9 +7046,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -7281,9 +7061,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -7297,9 +7075,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -7313,9 +7089,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -7329,9 +7103,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -7345,9 +7117,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -7361,9 +7131,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -7377,9 +7145,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -7393,9 +7159,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -7409,9 +7173,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -7425,9 +7187,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -7441,15 +7201,11 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7467,15 +7223,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7493,15 +7245,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7519,15 +7267,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7545,15 +7289,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7572,9 +7312,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7595,9 +7333,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7618,9 +7354,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7641,9 +7375,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7664,9 +7396,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7686,9 +7416,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7707,9 +7435,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7728,9 +7454,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7749,9 +7473,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7770,9 +7492,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7791,9 +7511,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7811,9 +7529,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7831,9 +7547,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7851,9 +7565,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7871,9 +7583,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7891,9 +7601,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7911,9 +7619,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7931,9 +7637,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7951,9 +7655,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7971,9 +7673,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7991,15 +7691,11 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8025,15 +7721,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8059,15 +7751,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8093,15 +7781,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8127,15 +7811,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8162,9 +7842,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value) ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8185,9 +7863,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8208,9 +7884,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8231,9 +7905,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8254,9 +7926,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8276,9 +7946,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -8293,9 +7961,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -8310,9 +7976,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -8327,9 +7991,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -8344,9 +8006,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -8361,9 +8021,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -8377,9 +8035,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -8393,9 +8049,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -8409,9 +8063,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -8425,9 +8077,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -8441,9 +8091,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -8457,9 +8105,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -8473,9 +8119,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -8489,9 +8133,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -8505,9 +8147,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -8521,15 +8161,11 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8547,15 +8183,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8573,15 +8205,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8599,15 +8227,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8625,15 +8249,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8652,9 +8272,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8675,9 +8293,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8698,9 +8314,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8721,9 +8335,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8744,9 +8356,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8766,9 +8376,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8787,9 +8395,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8808,9 +8414,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8829,9 +8433,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8850,9 +8452,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8871,9 +8471,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8891,9 +8489,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8911,9 +8507,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8931,9 +8525,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8951,9 +8543,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8971,9 +8561,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -8991,9 +8579,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9011,9 +8597,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9031,9 +8615,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9051,9 +8633,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9071,15 +8651,11 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9105,15 +8681,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9139,15 +8711,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9173,15 +8741,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9207,15 +8771,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9242,9 +8802,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value) ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9265,9 +8823,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9288,9 +8844,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9311,9 +8865,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9334,9 +8886,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9356,9 +8906,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -9373,9 +8921,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -9390,9 +8936,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -9407,9 +8951,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -9424,9 +8966,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -9441,9 +8981,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -9457,9 +8995,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -9473,9 +9009,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -9489,9 +9023,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -9505,9 +9037,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -9521,9 +9051,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -9537,9 +9065,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -9553,9 +9079,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -9569,9 +9093,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -9585,9 +9107,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -9601,15 +9121,11 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9627,15 +9143,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9653,15 +9165,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9679,15 +9187,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9705,15 +9209,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll index 4b0371d23b5b89..a1712a5ec7a27c 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll @@ -511,7 +511,6 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: casp x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -532,7 +531,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspa x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -553,7 +551,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspl x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -574,7 +571,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -595,7 +591,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -831,7 +826,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -846,7 +840,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -861,7 +854,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -876,7 +868,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -891,7 +882,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1046,7 +1036,6 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: casp x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1066,7 +1055,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspa x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1086,7 +1074,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspl x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1106,7 +1093,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1126,7 +1112,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1361,7 +1346,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -1375,7 +1359,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -1389,7 +1372,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -1403,7 +1385,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -1417,7 +1398,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -3951,15 +3931,11 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -3983,15 +3959,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -4015,15 +3987,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -4047,15 +4015,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4079,15 +4043,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4147,9 +4107,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -4165,9 +4123,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -4183,9 +4139,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -4201,9 +4155,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -4219,9 +4171,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -4236,9 +4186,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -4252,9 +4200,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -4268,9 +4214,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -4284,9 +4228,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -4300,9 +4242,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -4316,9 +4256,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -4332,9 +4270,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -4348,9 +4284,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -4364,9 +4298,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -4380,9 +4312,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -4396,15 +4326,11 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4422,15 +4348,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4448,15 +4370,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4474,15 +4392,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4500,15 +4414,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4666,15 +4576,11 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -4698,15 +4604,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -4730,15 +4632,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -4762,15 +4660,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4794,15 +4688,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4862,9 +4752,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -4880,9 +4768,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -4898,9 +4784,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -4916,9 +4800,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -4934,9 +4816,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -4951,9 +4831,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -4967,9 +4845,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -4983,9 +4859,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -4999,9 +4873,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -5015,9 +4887,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -5031,9 +4901,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -5047,9 +4915,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -5063,9 +4929,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -5079,9 +4943,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -5095,9 +4957,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -5111,15 +4971,11 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5137,15 +4993,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5163,15 +5015,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5189,15 +5037,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5215,15 +5059,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5381,15 +5221,11 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -5413,15 +5249,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -5445,15 +5277,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -5477,15 +5305,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -5509,15 +5333,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -5576,9 +5396,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -5593,9 +5411,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -5610,9 +5426,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -5627,9 +5441,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -5644,9 +5456,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -5661,9 +5471,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -5677,9 +5485,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -5693,9 +5499,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -5709,9 +5513,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -5725,9 +5527,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -5741,9 +5541,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -5757,9 +5555,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -5773,9 +5569,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -5789,9 +5583,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -5805,9 +5597,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -5821,15 +5611,11 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5847,15 +5633,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5873,15 +5655,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5899,15 +5677,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5925,15 +5699,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6091,15 +5861,11 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -6123,15 +5889,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -6155,15 +5917,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -6187,15 +5945,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -6219,15 +5973,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -6286,9 +6036,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -6303,9 +6051,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -6320,9 +6066,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -6337,9 +6081,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -6354,9 +6096,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -6371,9 +6111,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -6387,9 +6125,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -6403,9 +6139,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -6419,9 +6153,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -6435,9 +6167,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -6451,9 +6181,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -6467,9 +6195,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -6483,9 +6209,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -6499,9 +6223,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -6515,9 +6237,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -6531,15 +6251,11 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6557,15 +6273,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6583,15 +6295,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6609,15 +6317,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6635,15 +6339,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll index 89d22c59e630b4..c660c139e35d44 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll @@ -556,7 +556,6 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -579,7 +578,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -602,7 +600,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -625,7 +622,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -648,7 +644,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -886,7 +881,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -901,7 +895,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -916,7 +909,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -931,7 +923,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -946,7 +937,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1181,7 +1171,6 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1203,7 +1192,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1225,7 +1213,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1247,7 +1234,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1269,7 +1255,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1526,7 +1511,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -1540,7 +1524,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -1554,7 +1537,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -1568,7 +1550,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -1582,7 +1563,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -4257,9 +4237,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4280,9 +4258,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4303,9 +4279,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4326,9 +4300,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4349,9 +4321,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4372,9 +4342,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -4394,9 +4362,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -4416,9 +4382,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -4438,9 +4402,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -4460,9 +4422,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -4481,9 +4441,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -4501,9 +4459,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -4521,9 +4477,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -4541,9 +4495,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -4561,9 +4513,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -4581,9 +4531,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -4601,9 +4549,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -4621,9 +4567,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -4641,9 +4585,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -4661,9 +4603,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -4681,15 +4621,11 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -4715,15 +4651,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -4749,15 +4681,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -4783,15 +4711,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -4817,15 +4741,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -4852,9 +4772,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4875,9 +4793,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4898,9 +4814,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4921,9 +4835,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4944,9 +4856,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4967,9 +4877,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -4985,9 +4893,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -5003,9 +4909,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -5021,9 +4925,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -5039,9 +4941,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -5056,9 +4956,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -5072,9 +4970,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -5088,9 +4984,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -5104,9 +4998,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -5120,9 +5012,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -5136,9 +5026,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -5152,9 +5040,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -5168,9 +5054,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -5184,9 +5068,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -5200,9 +5082,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -5216,15 +5096,11 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5242,15 +5118,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5268,15 +5140,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5294,15 +5162,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5320,15 +5184,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5347,9 +5207,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5370,9 +5228,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5393,9 +5249,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5416,9 +5270,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5439,9 +5291,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5462,9 +5312,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5484,9 +5332,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5506,9 +5352,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5528,9 +5372,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5550,9 +5392,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5571,9 +5411,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5591,9 +5429,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5611,9 +5447,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5631,9 +5465,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5651,9 +5483,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5671,9 +5501,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5691,9 +5519,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5711,9 +5537,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5731,9 +5555,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5751,9 +5573,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5771,15 +5591,11 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5805,15 +5621,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5839,15 +5651,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5873,15 +5681,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5907,15 +5711,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5942,9 +5742,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5965,9 +5763,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5988,9 +5784,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6011,9 +5805,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6034,9 +5826,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6057,9 +5847,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -6075,9 +5863,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -6093,9 +5879,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -6111,9 +5895,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -6129,9 +5911,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -6146,9 +5926,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -6162,9 +5940,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -6178,9 +5954,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -6194,9 +5968,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -6210,9 +5982,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -6226,9 +5996,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -6242,9 +6010,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -6258,9 +6024,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -6274,9 +6038,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -6290,9 +6052,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -6306,15 +6066,11 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6332,15 +6088,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6358,15 +6110,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6384,15 +6132,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6410,15 +6154,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6437,9 +6177,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6460,9 +6198,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6483,9 +6219,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6506,9 +6240,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6529,9 +6261,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6551,9 +6281,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6572,9 +6300,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6593,9 +6319,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6614,9 +6338,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6635,9 +6357,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6656,9 +6376,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6676,9 +6394,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6696,9 +6412,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6716,9 +6430,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6736,9 +6448,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6756,9 +6466,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6776,9 +6484,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6796,9 +6502,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6816,9 +6520,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6836,9 +6538,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6856,15 +6556,11 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -6890,15 +6586,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -6924,15 +6616,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -6958,15 +6646,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -6992,15 +6676,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7027,9 +6707,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value) ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7050,9 +6728,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7073,9 +6749,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7096,9 +6770,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7119,9 +6791,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7141,9 +6811,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -7158,9 +6826,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -7175,9 +6841,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -7192,9 +6856,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -7209,9 +6871,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -7226,9 +6886,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -7242,9 +6900,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -7258,9 +6914,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -7274,9 +6928,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -7290,9 +6942,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -7306,9 +6956,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -7322,9 +6970,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -7338,9 +6984,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -7354,9 +6998,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -7370,9 +7012,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -7386,15 +7026,11 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7412,15 +7048,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7438,15 +7070,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7464,15 +7092,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7490,15 +7114,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7517,9 +7137,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7540,9 +7158,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7563,9 +7179,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7586,9 +7200,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7609,9 +7221,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7631,9 +7241,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7652,9 +7260,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7673,9 +7279,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7694,9 +7298,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7715,9 +7317,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7736,9 +7336,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7756,9 +7354,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7776,9 +7372,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7796,9 +7390,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7816,9 +7408,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7836,9 +7426,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7856,9 +7444,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7876,9 +7462,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7896,9 +7480,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7916,9 +7498,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7936,15 +7516,11 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -7970,15 +7546,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8004,15 +7576,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8038,15 +7606,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8072,15 +7636,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8107,9 +7667,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value) ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8130,9 +7688,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8153,9 +7709,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8176,9 +7730,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8199,9 +7751,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8221,9 +7771,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -8238,9 +7786,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -8255,9 +7801,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -8272,9 +7816,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -8289,9 +7831,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -8306,9 +7846,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -8322,9 +7860,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -8338,9 +7874,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -8354,9 +7888,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -8370,9 +7902,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -8386,9 +7916,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -8402,9 +7930,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -8418,9 +7944,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -8434,9 +7958,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -8450,9 +7972,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -8466,15 +7986,11 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8492,15 +8008,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8518,15 +8030,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8544,15 +8052,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8570,15 +8074,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll index bb6163f5bc3875..4f9e520997a22f 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll @@ -926,7 +926,6 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -949,7 +948,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -972,7 +970,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -995,7 +992,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1018,7 +1014,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1306,7 +1301,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1321,7 +1315,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1336,7 +1329,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1351,7 +1343,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1366,7 +1357,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1706,7 +1696,6 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1728,7 +1717,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1750,7 +1738,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1772,7 +1759,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1794,7 +1780,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -2081,7 +2066,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -2095,7 +2079,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -2109,7 +2092,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -2123,7 +2105,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -2137,7 +2118,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -5392,9 +5372,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5415,9 +5393,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5438,9 +5414,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5461,9 +5435,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5484,9 +5456,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5507,9 +5477,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5529,9 +5497,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5551,9 +5517,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5573,9 +5537,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5595,9 +5557,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5616,9 +5576,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5636,9 +5594,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5656,9 +5612,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5676,9 +5630,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5696,9 +5648,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5716,9 +5666,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5736,9 +5684,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5756,9 +5702,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5776,9 +5720,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5796,9 +5738,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5816,15 +5756,11 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5850,15 +5786,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5884,15 +5816,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5918,15 +5846,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5952,15 +5876,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5987,9 +5907,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6010,9 +5928,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6033,9 +5949,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6056,9 +5970,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6079,9 +5991,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6102,9 +6012,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -6120,9 +6028,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -6138,9 +6044,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -6156,9 +6060,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -6174,9 +6076,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -6191,9 +6091,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -6207,9 +6105,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -6223,9 +6119,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -6239,9 +6133,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -6255,9 +6147,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -6271,9 +6161,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -6287,9 +6175,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -6303,9 +6189,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -6319,9 +6203,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -6335,9 +6217,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -6351,15 +6231,11 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6377,15 +6253,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6403,15 +6275,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6429,15 +6297,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6455,15 +6319,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6482,9 +6342,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6505,9 +6363,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6528,9 +6384,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6551,9 +6405,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6574,9 +6426,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6597,9 +6447,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6619,9 +6467,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6641,9 +6487,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6663,9 +6507,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6685,9 +6527,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6706,9 +6546,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6726,9 +6564,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6746,9 +6582,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6766,9 +6600,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6786,9 +6618,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6806,9 +6636,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6826,9 +6654,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6846,9 +6672,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6866,9 +6690,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6886,9 +6708,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6906,15 +6726,11 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -6940,15 +6756,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -6974,15 +6786,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -7008,15 +6816,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7042,15 +6846,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7077,9 +6877,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7100,9 +6898,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7123,9 +6919,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7146,9 +6940,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7169,9 +6961,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7192,9 +6982,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -7210,9 +6998,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -7228,9 +7014,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -7246,9 +7030,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -7264,9 +7046,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -7281,9 +7061,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -7297,9 +7075,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -7313,9 +7089,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -7329,9 +7103,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -7345,9 +7117,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -7361,9 +7131,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -7377,9 +7145,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -7393,9 +7159,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -7409,9 +7173,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -7425,9 +7187,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -7441,15 +7201,11 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7467,15 +7223,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7493,15 +7245,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7519,15 +7267,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7545,15 +7289,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7572,9 +7312,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7595,9 +7333,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7618,9 +7354,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7641,9 +7375,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7664,9 +7396,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7686,9 +7416,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7707,9 +7435,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7728,9 +7454,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7749,9 +7473,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7770,9 +7492,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7791,9 +7511,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7811,9 +7529,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7831,9 +7547,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7851,9 +7565,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7871,9 +7583,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7891,9 +7601,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7911,9 +7619,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7931,9 +7637,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7951,9 +7655,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7971,9 +7673,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7991,15 +7691,11 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8025,15 +7721,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8059,15 +7751,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8093,15 +7781,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8127,15 +7811,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8162,9 +7842,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value) ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8185,9 +7863,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8208,9 +7884,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8231,9 +7905,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8254,9 +7926,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8276,9 +7946,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -8293,9 +7961,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -8310,9 +7976,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -8327,9 +7991,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -8344,9 +8006,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -8361,9 +8021,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -8377,9 +8035,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -8393,9 +8049,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -8409,9 +8063,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -8425,9 +8077,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -8441,9 +8091,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -8457,9 +8105,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -8473,9 +8119,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -8489,9 +8133,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -8505,9 +8147,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -8521,15 +8161,11 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8547,15 +8183,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8573,15 +8205,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8599,15 +8227,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8625,15 +8249,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8652,9 +8272,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8675,9 +8293,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8698,9 +8314,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8721,9 +8335,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8744,9 +8356,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8766,9 +8376,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8787,9 +8395,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8808,9 +8414,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8829,9 +8433,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8850,9 +8452,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8871,9 +8471,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8891,9 +8489,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8911,9 +8507,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8931,9 +8525,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8951,9 +8543,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8971,9 +8561,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -8991,9 +8579,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9011,9 +8597,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9031,9 +8615,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9051,9 +8633,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9071,15 +8651,11 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9105,15 +8681,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9139,15 +8711,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9173,15 +8741,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9207,15 +8771,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9242,9 +8802,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value) ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9265,9 +8823,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9288,9 +8844,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9311,9 +8865,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9334,9 +8886,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9356,9 +8906,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -9373,9 +8921,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -9390,9 +8936,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -9407,9 +8951,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -9424,9 +8966,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -9441,9 +8981,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -9457,9 +8995,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -9473,9 +9009,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -9489,9 +9023,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -9505,9 +9037,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -9521,9 +9051,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -9537,9 +9065,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -9553,9 +9079,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -9569,9 +9093,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -9585,9 +9107,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -9601,15 +9121,11 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9627,15 +9143,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9653,15 +9165,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9679,15 +9187,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9705,15 +9209,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll index 635620bb5ae11b..3437ccc8be40d7 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll @@ -926,7 +926,6 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -949,7 +948,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -972,7 +970,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -995,7 +992,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1018,7 +1014,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1306,7 +1301,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1321,7 +1315,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1336,7 +1329,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1351,7 +1343,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1366,7 +1357,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1706,7 +1696,6 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1728,7 +1717,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1750,7 +1738,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1772,7 +1759,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1794,7 +1780,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -2081,7 +2066,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -2095,7 +2079,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -2109,7 +2092,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -2123,7 +2105,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -2137,7 +2118,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -5392,9 +5372,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5415,9 +5393,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5438,9 +5414,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5461,9 +5435,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5484,9 +5456,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5507,9 +5477,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5529,9 +5497,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5551,9 +5517,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5573,9 +5537,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5595,9 +5557,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5616,9 +5576,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5636,9 +5594,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5656,9 +5612,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5676,9 +5630,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5696,9 +5648,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5716,9 +5666,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5736,9 +5684,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5756,9 +5702,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5776,9 +5720,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5796,9 +5738,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5816,15 +5756,11 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5850,15 +5786,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5884,15 +5816,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5918,15 +5846,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5952,15 +5876,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5987,9 +5907,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6010,9 +5928,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6033,9 +5949,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6056,9 +5970,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6079,9 +5991,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6102,9 +6012,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -6120,9 +6028,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -6138,9 +6044,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -6156,9 +6060,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -6174,9 +6076,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -6191,9 +6091,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -6207,9 +6105,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -6223,9 +6119,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -6239,9 +6133,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -6255,9 +6147,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -6271,9 +6161,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -6287,9 +6175,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -6303,9 +6189,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -6319,9 +6203,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -6335,9 +6217,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -6351,15 +6231,11 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6377,15 +6253,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6403,15 +6275,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6429,15 +6297,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6455,15 +6319,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6482,9 +6342,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6505,9 +6363,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6528,9 +6384,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6551,9 +6405,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6574,9 +6426,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6597,9 +6447,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6619,9 +6467,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6641,9 +6487,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6663,9 +6507,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6685,9 +6527,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6706,9 +6546,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6726,9 +6564,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6746,9 +6582,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6766,9 +6600,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6786,9 +6618,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6806,9 +6636,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6826,9 +6654,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6846,9 +6672,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6866,9 +6690,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6886,9 +6708,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6906,15 +6726,11 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -6940,15 +6756,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -6974,15 +6786,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -7008,15 +6816,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7042,15 +6846,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7077,9 +6877,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7100,9 +6898,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7123,9 +6919,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7146,9 +6940,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7169,9 +6961,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7192,9 +6982,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -7210,9 +6998,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -7228,9 +7014,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -7246,9 +7030,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -7264,9 +7046,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -7281,9 +7061,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -7297,9 +7075,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -7313,9 +7089,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -7329,9 +7103,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -7345,9 +7117,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -7361,9 +7131,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -7377,9 +7145,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -7393,9 +7159,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -7409,9 +7173,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -7425,9 +7187,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -7441,15 +7201,11 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7467,15 +7223,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7493,15 +7245,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7519,15 +7267,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7545,15 +7289,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7572,9 +7312,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7595,9 +7333,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7618,9 +7354,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7641,9 +7375,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7664,9 +7396,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7686,9 +7416,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7707,9 +7435,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7728,9 +7454,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7749,9 +7473,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7770,9 +7492,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7791,9 +7511,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7811,9 +7529,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7831,9 +7547,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7851,9 +7565,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7871,9 +7583,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7891,9 +7601,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7911,9 +7619,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7931,9 +7637,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7951,9 +7655,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7971,9 +7673,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7991,15 +7691,11 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8025,15 +7721,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8059,15 +7751,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8093,15 +7781,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8127,15 +7811,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8162,9 +7842,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value) ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8185,9 +7863,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8208,9 +7884,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8231,9 +7905,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8254,9 +7926,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8276,9 +7946,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -8293,9 +7961,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -8310,9 +7976,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -8327,9 +7991,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -8344,9 +8006,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -8361,9 +8021,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -8377,9 +8035,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -8393,9 +8049,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -8409,9 +8063,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -8425,9 +8077,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -8441,9 +8091,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -8457,9 +8105,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -8473,9 +8119,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -8489,9 +8133,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -8505,9 +8147,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -8521,15 +8161,11 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8547,15 +8183,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8573,15 +8205,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8599,15 +8227,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8625,15 +8249,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8652,9 +8272,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8675,9 +8293,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8698,9 +8314,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8721,9 +8335,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8744,9 +8356,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8766,9 +8376,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8787,9 +8395,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8808,9 +8414,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8829,9 +8433,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8850,9 +8452,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8871,9 +8471,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8891,9 +8489,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8911,9 +8507,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8931,9 +8525,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8951,9 +8543,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8971,9 +8561,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -8991,9 +8579,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9011,9 +8597,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9031,9 +8615,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9051,9 +8633,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9071,15 +8651,11 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9105,15 +8681,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9139,15 +8711,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9173,15 +8741,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9207,15 +8771,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9242,9 +8802,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value) ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9265,9 +8823,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9288,9 +8844,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9311,9 +8865,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9334,9 +8886,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9356,9 +8906,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -9373,9 +8921,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -9390,9 +8936,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -9407,9 +8951,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -9424,9 +8966,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -9441,9 +8981,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -9457,9 +8995,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -9473,9 +9009,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -9489,9 +9023,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -9505,9 +9037,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -9521,9 +9051,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -9537,9 +9065,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -9553,9 +9079,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -9569,9 +9093,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -9585,9 +9107,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -9601,15 +9121,11 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9627,15 +9143,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9653,15 +9165,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9679,15 +9187,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9705,15 +9209,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll index 7fc733a13bf07f..ee5fbe39b4492c 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll @@ -546,7 +546,6 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: casp x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -567,7 +566,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspa x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -588,7 +586,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspl x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -609,7 +606,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -630,7 +626,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -866,7 +861,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -881,7 +875,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -896,7 +889,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -911,7 +903,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -926,7 +917,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1081,7 +1071,6 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: casp x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1101,7 +1090,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspa x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1121,7 +1109,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspl x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1141,7 +1128,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1161,7 +1147,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1396,7 +1381,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -1410,7 +1394,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -1424,7 +1407,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -1438,7 +1420,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -1452,7 +1433,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -4076,15 +4056,11 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -4108,15 +4084,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -4140,15 +4112,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -4172,15 +4140,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4204,15 +4168,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4272,9 +4232,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -4290,9 +4248,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -4308,9 +4264,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -4326,9 +4280,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -4344,9 +4296,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -4361,9 +4311,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -4377,9 +4325,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -4393,9 +4339,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -4409,9 +4353,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -4425,9 +4367,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -4441,9 +4381,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -4457,9 +4395,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -4473,9 +4409,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -4489,9 +4423,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -4505,9 +4437,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -4521,15 +4451,11 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4547,15 +4473,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4573,15 +4495,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4599,15 +4517,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4625,15 +4539,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4791,15 +4701,11 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -4823,15 +4729,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -4855,15 +4757,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -4887,15 +4785,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4919,15 +4813,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4987,9 +4877,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -5005,9 +4893,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -5023,9 +4909,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -5041,9 +4925,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -5059,9 +4941,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -5076,9 +4956,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -5092,9 +4970,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -5108,9 +4984,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -5124,9 +4998,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -5140,9 +5012,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -5156,9 +5026,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -5172,9 +5040,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -5188,9 +5054,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -5204,9 +5068,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -5220,9 +5082,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -5236,15 +5096,11 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5262,15 +5118,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5288,15 +5140,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5314,15 +5162,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5340,15 +5184,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5506,15 +5346,11 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -5538,15 +5374,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -5570,15 +5402,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -5602,15 +5430,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -5634,15 +5458,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -5701,9 +5521,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -5718,9 +5536,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -5735,9 +5551,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -5752,9 +5566,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -5769,9 +5581,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -5786,9 +5596,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -5802,9 +5610,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -5818,9 +5624,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -5834,9 +5638,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -5850,9 +5652,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -5866,9 +5666,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -5882,9 +5680,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -5898,9 +5694,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -5914,9 +5708,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -5930,9 +5722,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -5946,15 +5736,11 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5972,15 +5758,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5998,15 +5780,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6024,15 +5802,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6050,15 +5824,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6216,15 +5986,11 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -6248,15 +6014,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -6280,15 +6042,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -6312,15 +6070,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -6344,15 +6098,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -6411,9 +6161,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -6428,9 +6176,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -6445,9 +6191,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -6462,9 +6206,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -6479,9 +6221,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -6496,9 +6236,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -6512,9 +6250,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -6528,9 +6264,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -6544,9 +6278,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -6560,9 +6292,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -6576,9 +6306,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -6592,9 +6320,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -6608,9 +6334,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -6624,9 +6348,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -6640,9 +6362,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -6656,15 +6376,11 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6682,15 +6398,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6708,15 +6420,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6734,15 +6442,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6760,15 +6464,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll index 0ea04d18788f68..2473147509dc87 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll @@ -926,7 +926,6 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -949,7 +948,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -972,7 +970,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -995,7 +992,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1018,7 +1014,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1306,7 +1301,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1321,7 +1315,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1336,7 +1329,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1351,7 +1343,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1366,7 +1357,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1706,7 +1696,6 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1728,7 +1717,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1750,7 +1738,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1772,7 +1759,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1794,7 +1780,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -2081,7 +2066,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -2095,7 +2079,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -2109,7 +2092,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -2123,7 +2105,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -2137,7 +2118,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -5392,9 +5372,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5415,9 +5393,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5438,9 +5414,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5461,9 +5435,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5484,9 +5456,7 @@ define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5507,9 +5477,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5529,9 +5497,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5551,9 +5517,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5573,9 +5537,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5595,9 +5557,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5616,9 +5576,7 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5636,9 +5594,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5656,9 +5612,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5676,9 +5630,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5696,9 +5648,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5716,9 +5666,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5736,9 +5684,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5756,9 +5702,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5776,9 +5720,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5796,9 +5738,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5816,15 +5756,11 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5850,15 +5786,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5884,15 +5816,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5918,15 +5846,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5952,15 +5876,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5987,9 +5907,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6010,9 +5928,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6033,9 +5949,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6056,9 +5970,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6079,9 +5991,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6102,9 +6012,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -6120,9 +6028,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -6138,9 +6044,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -6156,9 +6060,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -6174,9 +6076,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -6191,9 +6091,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -6207,9 +6105,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -6223,9 +6119,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -6239,9 +6133,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -6255,9 +6147,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -6271,9 +6161,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -6287,9 +6175,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -6303,9 +6189,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -6319,9 +6203,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -6335,9 +6217,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -6351,15 +6231,11 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6377,15 +6253,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6403,15 +6275,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6429,15 +6297,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6455,15 +6319,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6482,9 +6342,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6505,9 +6363,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6528,9 +6384,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6551,9 +6405,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6574,9 +6426,7 @@ define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6597,9 +6447,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6619,9 +6467,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6641,9 +6487,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6663,9 +6507,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6685,9 +6527,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6706,9 +6546,7 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6726,9 +6564,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6746,9 +6582,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6766,9 +6600,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6786,9 +6618,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6806,9 +6636,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6826,9 +6654,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6846,9 +6672,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6866,9 +6690,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6886,9 +6708,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6906,15 +6726,11 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -6940,15 +6756,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -6974,15 +6786,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -7008,15 +6816,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7042,15 +6846,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7077,9 +6877,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7100,9 +6898,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7123,9 +6919,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7146,9 +6940,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7169,9 +6961,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7192,9 +6982,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -7210,9 +6998,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -7228,9 +7014,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -7246,9 +7030,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -7264,9 +7046,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -7281,9 +7061,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -7297,9 +7075,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -7313,9 +7089,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -7329,9 +7103,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -7345,9 +7117,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -7361,9 +7131,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -7377,9 +7145,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -7393,9 +7159,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -7409,9 +7173,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -7425,9 +7187,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -7441,15 +7201,11 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7467,15 +7223,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7493,15 +7245,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7519,15 +7267,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7545,15 +7289,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7572,9 +7312,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7595,9 +7333,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7618,9 +7354,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7641,9 +7375,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7664,9 +7396,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7686,9 +7416,7 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7707,9 +7435,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7728,9 +7454,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7749,9 +7473,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7770,9 +7492,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7791,9 +7511,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7811,9 +7529,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7831,9 +7547,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7851,9 +7565,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7871,9 +7583,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7891,9 +7601,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7911,9 +7619,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7931,9 +7637,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7951,9 +7655,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7971,9 +7673,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7991,15 +7691,11 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8025,15 +7721,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8059,15 +7751,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8093,15 +7781,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8127,15 +7811,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8162,9 +7842,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value) ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8185,9 +7863,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8208,9 +7884,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8231,9 +7905,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8254,9 +7926,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8276,9 +7946,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -8293,9 +7961,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -8310,9 +7976,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -8327,9 +7991,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -8344,9 +8006,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -8361,9 +8021,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -8377,9 +8035,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -8393,9 +8049,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -8409,9 +8063,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -8425,9 +8077,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -8441,9 +8091,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -8457,9 +8105,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -8473,9 +8119,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -8489,9 +8133,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -8505,9 +8147,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -8521,15 +8161,11 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8547,15 +8183,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8573,15 +8205,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8599,15 +8227,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8625,15 +8249,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8652,9 +8272,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8675,9 +8293,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8698,9 +8314,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8721,9 +8335,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8744,9 +8356,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8766,9 +8376,7 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8787,9 +8395,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8808,9 +8414,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8829,9 +8433,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8850,9 +8452,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8871,9 +8471,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8891,9 +8489,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8911,9 +8507,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8931,9 +8525,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8951,9 +8543,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8971,9 +8561,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -8991,9 +8579,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9011,9 +8597,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9031,9 +8615,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9051,9 +8633,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9071,15 +8651,11 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9105,15 +8681,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9139,15 +8711,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9173,15 +8741,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9207,15 +8771,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9242,9 +8802,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value) ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9265,9 +8823,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9288,9 +8844,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9311,9 +8865,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9334,9 +8886,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9356,9 +8906,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) { define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -9373,9 +8921,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -9390,9 +8936,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -9407,9 +8951,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -9424,9 +8966,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -9441,9 +8981,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -9457,9 +8995,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -9473,9 +9009,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -9489,9 +9023,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -9505,9 +9037,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -9521,9 +9051,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -9537,9 +9065,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -9553,9 +9079,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -9569,9 +9093,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -9585,9 +9107,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -9601,15 +9121,11 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9627,15 +9143,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9653,15 +9165,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9679,15 +9187,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9705,15 +9209,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll index aa5e54992f486f..d03647f8b294ef 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -1770,10 +1770,7 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: sxtb w9, w10 ; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, sxtb -; CHECK-NOLSE-O0-NEXT: cset w9, le -; CHECK-NOLSE-O0-NEXT: and w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, le ; CHECK-NOLSE-O0-NEXT: LBB33_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB33_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -1843,10 +1840,7 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: sxtb w9, w10 ; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, sxtb -; CHECK-NOLSE-O0-NEXT: cset w9, gt -; CHECK-NOLSE-O0-NEXT: and w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, gt ; CHECK-NOLSE-O0-NEXT: LBB34_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB34_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -1917,10 +1911,7 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: and w9, w10, #0xff ; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, uxtb -; CHECK-NOLSE-O0-NEXT: cset w9, ls -; CHECK-NOLSE-O0-NEXT: and w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ls ; CHECK-NOLSE-O0-NEXT: LBB35_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB35_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -1991,10 +1982,7 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: and w9, w10, #0xff ; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, uxtb -; CHECK-NOLSE-O0-NEXT: cset w9, hi -; CHECK-NOLSE-O0-NEXT: and w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, hi ; CHECK-NOLSE-O0-NEXT: LBB36_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB36_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -2463,10 +2451,7 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: sxth w10, w8 ; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, sxth -; CHECK-NOLSE-O0-NEXT: cset w10, le -; CHECK-NOLSE-O0-NEXT: and w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, le ; CHECK-NOLSE-O0-NEXT: LBB43_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB43_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -2536,10 +2521,7 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: sxth w10, w8 ; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, sxth -; CHECK-NOLSE-O0-NEXT: cset w10, gt -; CHECK-NOLSE-O0-NEXT: and w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, gt ; CHECK-NOLSE-O0-NEXT: LBB44_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB44_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -2610,10 +2592,7 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: uxth w10, w8 ; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, uxth -; CHECK-NOLSE-O0-NEXT: cset w10, ls -; CHECK-NOLSE-O0-NEXT: and w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ls ; CHECK-NOLSE-O0-NEXT: LBB45_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB45_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -2684,10 +2663,7 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: uxth w10, w8 ; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, uxth -; CHECK-NOLSE-O0-NEXT: cset w10, hi -; CHECK-NOLSE-O0-NEXT: and w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, hi ; CHECK-NOLSE-O0-NEXT: LBB46_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB46_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -2763,8 +2739,7 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) { ; CHECK-NOLSE-O0-NEXT: LBB47_3: ; CHECK-NOLSE-O0-NEXT: and w8, w0, #0xff ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w1, uxtb -; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: and w1, w8, #0x1 +; CHECK-NOLSE-O0-NEXT: cset w1, eq ; CHECK-NOLSE-O0-NEXT: ret ; ; CHECK-LSE-O1-LABEL: cmpxchg_i8: @@ -2784,8 +2759,7 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) { ; CHECK-LSE-O0-NEXT: casb w0, w2, [x8] ; CHECK-LSE-O0-NEXT: and w8, w0, #0xff ; CHECK-LSE-O0-NEXT: subs w8, w8, w1, uxtb -; CHECK-LSE-O0-NEXT: cset w8, eq -; CHECK-LSE-O0-NEXT: and w1, w8, #0x1 +; CHECK-LSE-O0-NEXT: cset w1, eq ; CHECK-LSE-O0-NEXT: ret %res = cmpxchg ptr %ptr, i8 %desired, i8 %new monotonic monotonic ret { i8, i1 } %res @@ -2829,8 +2803,7 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) { ; CHECK-NOLSE-O0-NEXT: LBB48_3: ; CHECK-NOLSE-O0-NEXT: and w8, w0, #0xffff ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w1, uxth -; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: and w1, w8, #0x1 +; CHECK-NOLSE-O0-NEXT: cset w1, eq ; CHECK-NOLSE-O0-NEXT: ret ; ; CHECK-LSE-O1-LABEL: cmpxchg_i16: @@ -2850,8 +2823,7 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) { ; CHECK-LSE-O0-NEXT: cash w0, w2, [x8] ; CHECK-LSE-O0-NEXT: and w8, w0, #0xffff ; CHECK-LSE-O0-NEXT: subs w8, w8, w1, uxth -; CHECK-LSE-O0-NEXT: cset w8, eq -; CHECK-LSE-O0-NEXT: and w1, w8, #0x1 +; CHECK-LSE-O0-NEXT: cset w1, eq ; CHECK-LSE-O0-NEXT: ret %res = cmpxchg ptr %ptr, i16 %desired, i16 %new monotonic monotonic ret { i16, i1 } %res diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir index 02d27e556fd588..860df510b21187 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir @@ -2,7 +2,7 @@ # Check that when we combine ZEXT/ANYEXT we assign the correct location. # CHECK: !8 = !DILocation(line: 23, column: 5, scope: !4) -# CHECK: G_AND %16, %15, debug-location !8 +# CHECK: G_AND %15, %16, debug-location !8 --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-debugloc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-debugloc.mir index 4f44badb8e38c0..b0d7b04ed50a94 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-debugloc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-debugloc.mir @@ -2,7 +2,7 @@ # Check that when we combine SEXT we assign the correct debug location. # CHECK: !9 = !DILocation(line: 36, column: 21, scope: !4) -# CHECK: G_AND %5, %4, debug-location !9 +# CHECK: G_AND %4, %5, debug-location !9 --- | target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll b/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll index 3c193307a863ed..8742a848c4af10 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll @@ -1,9 +1,7 @@ ; RUN: llc -mtriple=arm64-apple-ios %s -o - -O0 -global-isel=1 | FileCheck %s define void @foo(i512 %in) { ; CHECK-LABEL: foo: -; CHECK: subs -; CHECK-NEXT: cset -; CHECK-NEXT: tbnz +; CHECK: cbz switch i512 %in, label %default [ i512 3923188584616675477397368389504791510063972152790021570560, label %l1 i512 3923188584616675477397368389504791510063972152790021570561, label %l2 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/invoke-region.ll b/llvm/test/CodeGen/AArch64/GlobalISel/invoke-region.ll index 291eb5e22ca142..007e1fb3d63dad 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/invoke-region.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/invoke-region.ll @@ -38,8 +38,8 @@ define i1 @test_lpad_phi_widen_into_pred() personality ptr @__gxx_personality_v0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.continue: ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s16) = G_PHI [[C2]](s16), %bb.1, [[C3]](s16), %bb.2 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s16) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C4]] ; CHECK-NEXT: $w0 = COPY [[AND]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 @@ -95,8 +95,8 @@ define i1 @test_lpad_phi_widen_into_pred_ext(ptr %ptr) personality ptr @__gxx_pe ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.continue: ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s16) = G_PHI [[ANYEXT]](s16), %bb.1, [[C2]](s16), %bb.2 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s16) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] ; CHECK-NEXT: $w0 = COPY [[AND]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll index fa3074a554944c..a7d7a1e81617e4 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll @@ -38,6 +38,7 @@ define i32 @test(i32 %a, i1 %c) { ; TRANSLATED-NEXT: BL @callee, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp ; TRANSLATED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; TRANSLATED-NEXT: G_BR %bb.2 + ; ; PRESELECTION-LABEL: name: test ; PRESELECTION: bb.1.entry: ; PRESELECTION-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) @@ -50,8 +51,8 @@ define i32 @test(i32 %a, i1 %c) { ; PRESELECTION-NEXT: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 ; PRESELECTION-NEXT: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 100000 ; PRESELECTION-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:gpr(s32) = G_CONSTANT_FOLD_BARRIER [[C1]] - ; PRESELECTION-NEXT: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 ; PRESELECTION-NEXT: [[ANYEXT:%[0-9]+]]:gpr(s32) = G_ANYEXT [[ASSERT_ZEXT]](s8) + ; PRESELECTION-NEXT: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 ; PRESELECTION-NEXT: [[AND:%[0-9]+]]:gpr(s32) = G_AND [[ANYEXT]], [[C2]] ; PRESELECTION-NEXT: G_BRCOND [[AND]](s32), %bb.3 ; PRESELECTION-NEXT: G_BR %bb.2 @@ -69,6 +70,7 @@ define i32 @test(i32 %a, i1 %c) { ; PRESELECTION-NEXT: BL @callee, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp ; PRESELECTION-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; PRESELECTION-NEXT: G_BR %bb.2 + ; ; POSTSELECTION-LABEL: name: test ; POSTSELECTION: bb.1.entry: ; POSTSELECTION-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir index 02f96cbe997898..ceddb4bca72559 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir @@ -10,9 +10,7 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[UADDO1]] ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) %0:_(s64) = COPY $x0 @@ -37,11 +35,8 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY2]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[UADDO1]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY2]], [[COPY3]], [[UADDE1]] ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) ; CHECK-NEXT: $x2 = COPY [[UADDE2]](s64) @@ -89,9 +84,7 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[UADDO1]] ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) %0:_(s64) = COPY $x0 @@ -119,9 +112,7 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[UADDO1]] ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) %0:_(s64) = COPY $x0 @@ -367,15 +358,15 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY $b1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY $b2 - ; CHECK-NEXT: [[ANYEXT0:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY1]](s8) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY2]](s8) - ; CHECK-NEXT: [[IMPLICIT_DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT0]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[IMPLICIT_DEF]](s16) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[DEF]](s16) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<4 x s16>) = G_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR]] - ; CHECK-NEXT: [[VAL0:%[0-9]+]]:_(s16), [[VAL1:%[0-9]+]]:_(s16), [[VAL2:%[0-9]+]]:_(s16), [[VAL3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ADD]](<4 x s16>) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[VAL0]](s16) - ; CHECK-NEXT: $b0 = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ADD]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) + ; CHECK-NEXT: $b0 = COPY [[TRUNC]](s8) ; CHECK-NEXT: RET_ReallyLR implicit $b0 %1:_(s8) = COPY $b0 %2:_(s8) = COPY $b1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir index c7c061a15a2f98..e9b3aa0a3a8fd8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir @@ -9,8 +9,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sge), [[COPY]](s64), [[COPY1]] ; CHECK-NEXT: $w0 = COPY [[ICMP]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] @@ -50,11 +50,8 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[DEF]](s64), [[C1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[DEF]](s64), [[C1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[DEF]](s64), [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] - ; CHECK-NEXT: G_BRCOND [[AND1]](s32), %bb.1 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: G_BRCOND [[SELECT]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -91,9 +88,7 @@ body: | ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[XOR1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -132,9 +127,7 @@ body: | ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[AND1]], [[AND3]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[XOR1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: G_BRCOND [[AND4]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -173,9 +166,7 @@ body: | ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[AND1]], [[AND3]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[XOR1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[OR]](s64), [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: G_BRCOND [[AND4]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -214,9 +205,7 @@ body: | ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[AND1]], [[AND3]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[XOR1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: G_BRCOND [[AND4]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -278,9 +267,7 @@ body: | ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[OR4]], [[XOR6]] ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[OR5]], [[XOR7]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR6]](s64), [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: G_BRCOND [[AND16]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -326,9 +313,7 @@ body: | ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[OR]], [[XOR2]] ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[XOR3]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR2]](s64), [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: G_BRCOND [[AND8]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctlz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctlz.mir index 35261d26fbb4e9..1cd1ab4a22e180 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctlz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctlz.mir @@ -292,18 +292,13 @@ body: | ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[AND]](s64) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[CTLZ]], [[C3]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C4]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C2]], [[C2]], [[AND2]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C2]], [[C2]], [[UADDO1]] ; CHECK-NEXT: [[CTLZ1:%[0-9]+]]:_(s64) = G_CTLZ [[AND1]](s64) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND3]](s32), [[UADDO]], [[CTLZ1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s32), [[UADDE]], [[C2]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[SELECT]], [[C5]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C4]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[SELECT1]], [[C2]], [[AND5]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDO]], [[CTLZ1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDE]], [[C2]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[SELECT]], [[C4]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[SELECT1]], [[C2]], [[USUBO1]] ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir index 472b09a3d78cb3..ae0c29927afa6d 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir @@ -172,8 +172,8 @@ body: | ; CHECK: liveins: $w0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:_(s32) = COPY $w0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) @@ -211,8 +211,8 @@ body: | ; CHECK: liveins: $w0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:_(s32) = COPY $w0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) @@ -250,8 +250,8 @@ body: | ; CHECK: liveins: $w0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:_(s32) = COPY $w0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) @@ -288,8 +288,8 @@ body: | ; CHECK: liveins: $w0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:_(s32) = COPY $w0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir index 17b80e03df7e2c..94ac5146006b16 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir @@ -19,6 +19,7 @@ body: | ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32) ; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; ; CHECK-CSSC-LABEL: name: s8 ; CHECK-CSSC: liveins: $w0 ; CHECK-CSSC-NEXT: {{ $}} @@ -51,6 +52,7 @@ body: | ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32) ; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; ; CHECK-CSSC-LABEL: name: s16 ; CHECK-CSSC: liveins: $w0 ; CHECK-CSSC-NEXT: {{ $}} @@ -83,6 +85,7 @@ body: | ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32) ; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; ; CHECK-CSSC-LABEL: name: s32 ; CHECK-CSSC: liveins: $w0 ; CHECK-CSSC-NEXT: {{ $}} @@ -112,6 +115,7 @@ body: | ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE]](s64) ; CHECK-NEXT: $x0 = COPY [[CTLZ]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 + ; ; CHECK-CSSC-LABEL: name: s64 ; CHECK-CSSC: liveins: $x0 ; CHECK-CSSC-NEXT: {{ $}} @@ -144,6 +148,7 @@ body: | ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<4 x s32>) = G_CTPOP [[AND]](<4 x s32>) ; CHECK-NEXT: $q0 = COPY [[CTPOP]](<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 + ; ; CHECK-CSSC-LABEL: name: v4s32 ; CHECK-CSSC: liveins: $q0 ; CHECK-CSSC-NEXT: {{ $}} @@ -180,6 +185,7 @@ body: | ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE]](s64) ; CHECK-NEXT: $x0 = COPY [[CTLZ]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 + ; ; CHECK-CSSC-LABEL: name: s35 ; CHECK-CSSC: liveins: $x0 ; CHECK-CSSC-NEXT: {{ $}} @@ -218,17 +224,15 @@ body: | ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[CTLZ]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C3]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C]], [[C]], [[AND]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C]], [[C]], [[UADDO1]] ; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:_(s64) = G_BITREVERSE [[OR]] ; CHECK-NEXT: [[CTLZ1:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE1]](s64) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO]], [[CTLZ1]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDE]], [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDO]], [[CTLZ1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDE]], [[C]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: $x1 = COPY [[SELECT1]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1 + ; ; CHECK-CSSC-LABEL: name: s65 ; CHECK-CSSC: liveins: $x0, $x1 ; CHECK-CSSC-NEXT: {{ $}} @@ -242,13 +246,10 @@ body: | ; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s64) = G_CTTZ [[OR1]](s64) ; CHECK-CSSC-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK-CSSC-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[CTTZ]], [[C2]] - ; CHECK-CSSC-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-CSSC-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C3]] - ; CHECK-CSSC-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C]], [[C]], [[AND]] + ; CHECK-CSSC-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C]], [[C]], [[UADDO1]] ; CHECK-CSSC-NEXT: [[CTTZ1:%[0-9]+]]:_(s64) = G_CTTZ [[OR]](s64) - ; CHECK-CSSC-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-CSSC-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO]], [[CTTZ1]] - ; CHECK-CSSC-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDE]], [[C]] + ; CHECK-CSSC-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDO]], [[CTTZ1]] + ; CHECK-CSSC-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDE]], [[C]] ; CHECK-CSSC-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-CSSC-NEXT: $x1 = COPY [[SELECT1]](s64) ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $x0, implicit $x1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-div.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-div.mir index 7fec515fe3fb07..d0089bba68bec8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-div.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-div.mir @@ -6,20 +6,20 @@ body: | bb.0.entry: ; CHECK-LABEL: name: test_div ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 8 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 8 - ; CHECK: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: $w0 = COPY [[SDIV]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C]] - ; CHECK: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] - ; CHECK: $w0 = COPY [[UDIV]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 8 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 8 + ; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: $w0 = COPY [[SDIV]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C]] + ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] + ; CHECK-NEXT: $w0 = COPY [[UDIV]](s32) %0:_(s64) = COPY $x0 %1:_(s64) = COPY $x1 %2:_(s8) = G_TRUNC %0(s64) @@ -43,17 +43,18 @@ body: | ; CHECK-LABEL: name: sdiv_v4s32 ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; CHECK: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[UV]], [[UV4]] - ; CHECK: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[UV1]], [[UV5]] - ; CHECK: [[SDIV2:%[0-9]+]]:_(s32) = G_SDIV [[UV2]], [[UV6]] - ; CHECK: [[SDIV3:%[0-9]+]]:_(s32) = G_SDIV [[UV3]], [[UV7]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SDIV]](s32), [[SDIV1]](s32), [[SDIV2]](s32), [[SDIV3]](s32) - ; CHECK: $q0 = COPY [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[UV]], [[UV4]] + ; CHECK-NEXT: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[UV1]], [[UV5]] + ; CHECK-NEXT: [[SDIV2:%[0-9]+]]:_(s32) = G_SDIV [[UV2]], [[UV6]] + ; CHECK-NEXT: [[SDIV3:%[0-9]+]]:_(s32) = G_SDIV [[UV3]], [[UV7]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SDIV]](s32), [[SDIV1]](s32), [[SDIV2]](s32), [[SDIV3]](s32) + ; CHECK-NEXT: $q0 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = COPY $q1 %2:_(<4 x s32>) = G_SDIV %0, %1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-cse.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-cse.mir index 98e6cbbda593c4..f105b2e934d3c0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-cse.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-cse.mir @@ -6,12 +6,12 @@ body: | bb.0.entry: ; CHECK-LABEL: name: test_cse_in_legalizer ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; CHECK: $w0 = COPY [[COPY1]](s32) - ; CHECK: $w0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[AND]](s32) + ; CHECK-NEXT: $w0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: $w0 = COPY [[AND]](s32) %0:_(s64) = COPY $x0 %1:_(s8) = G_TRUNC %0(s64) %19:_(s32) = G_ZEXT %1(s8) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-csedebug-output.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-csedebug-output.mir index fe6f4a1344c773..2a45a05b2e5a0a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-csedebug-output.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-csedebug-output.mir @@ -10,11 +10,11 @@ body: | ; CHECK: CSEInfo::Add MI: %{{[0-9]+}}:_(s32) = G_ZEXT ; CHECK: CSEInfo::Add MI: %{{[0-9]+}}:_(s8) = G_TRUNC ; CHECK: CSEInfo::Add MI: %{{[0-9]+}}:_(s32) = G_ZEXT - ; CHECK: CSEInfo::Recording new MI G_CONSTANT ; CHECK: CSEInfo::Recording new MI G_TRUNC + ; CHECK: CSEInfo::Recording new MI G_CONSTANT ; CHECK: CSEInfo::Recording new MI G_AND - ; CHECK: CSEInfo::Found Instr %{{[0-9]+}}:_(s32) = G_CONSTANT ; CHECK: CSEInfo::Found Instr %{{[0-9]+}}:_(s32) = G_TRUNC + ; CHECK: CSEInfo::Found Instr %{{[0-9]+}}:_(s32) = G_CONSTANT ; CHECK: CSEInfo::Found Instr %{{[0-9]+}}:_(s32) = G_AND ; CHECK: CSEInfo::CSE Hit for Opc {{[0-9]+}} : 1 ; CHECK: CSEInfo::CSE Hit for Opc {{[0-9]+}} : 1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir index 8275ed81d3e1ff..74401a2f1ceace 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir @@ -24,14 +24,14 @@ body: | ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC4]], 1 ; CHECK-NEXT: $w0 = COPY [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC5]], [[C1]] ; CHECK-NEXT: $w0 = COPY [[AND1]](s32) ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: $w0 = COPY [[TRUNC6]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC7]], [[C2]] ; CHECK-NEXT: $w0 = COPY [[AND2]](s32) ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir index 62a0c92004f7f8..68302f5f186958 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir @@ -291,8 +291,8 @@ body: | ; CHECK: liveins: $w0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; CHECK-NEXT: %ext:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: $w0 = COPY %ext(s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir index 57bd3e761f81d0..be674d79b54f1d 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir @@ -7,7 +7,9 @@ body: | liveins: $x0 ; CHECK-LABEL: name: test_freeze_s64 - ; CHECK: %x0:_(s64) = COPY $x0 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x0:_(s64) = COPY $x0 ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %x0 ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64) %x0:_(s64) = COPY $x0 @@ -21,7 +23,9 @@ body: | liveins: $q0 ; CHECK-LABEL: name: test_freeze_v4s32 - ; CHECK: %q0:_(<4 x s32>) = COPY $q0 + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %q0:_(<4 x s32>) = COPY $q0 ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE %q0 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FREEZE]](<4 x s32>) ; CHECK-NEXT: $x0 = COPY [[UV]](<2 x s32>) @@ -56,7 +60,9 @@ body: | liveins: $d0 ; CHECK-LABEL: name: test_freeze_v2s32 - ; CHECK: %d0:_(<2 x s32>) = COPY $d0 + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %d0:_(<2 x s32>) = COPY $d0 ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s32>) = G_FREEZE %d0 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<2 x s32>) ; CHECK-NEXT: $w0 = COPY [[UV]](s32) @@ -74,7 +80,9 @@ body: | liveins: $d0 ; CHECK-LABEL: name: test_freeze_v8s8 - ; CHECK: %d0:_(<8 x s8>) = COPY $d0 + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %d0:_(<8 x s8>) = COPY $d0 ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<8 x s8>) = G_FREEZE %d0 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s8>), [[UV1:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[FREEZE]](<8 x s8>) ; CHECK-NEXT: $w0 = COPY [[UV]](<4 x s8>) @@ -91,10 +99,12 @@ body: | bb.0.entry: liveins: $x0 ; CHECK-LABEL: name: test_freeze_s1 - ; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s8) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[FREEZE]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: %ext:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: $x0 = COPY %ext(s64) %x:_(s1) = G_IMPLICIT_DEF @@ -108,10 +118,12 @@ body: | bb.0.entry: liveins: $x0 ; CHECK-LABEL: name: test_freeze_s2 - ; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s8) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[FREEZE]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CHECK-NEXT: %ext:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: $x0 = COPY %ext(s64) %x:_(s2) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshl.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshl.mir index 80617314ce0e54..c3c23aecc161f0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshl.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshl.mir @@ -21,15 +21,13 @@ body: | ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[COPY3]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C3]](s64) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C2]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s64) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] ; CHECK-NEXT: $w0 = COPY [[OR]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 @@ -66,15 +64,13 @@ body: | ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[COPY3]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C3]](s64) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C2]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s64) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] ; CHECK-NEXT: $w0 = COPY [[OR]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshr.mir index 81bf4332b456d5..09520445b71d98 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshr.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshr.mir @@ -23,12 +23,10 @@ body: | ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[COPY3]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[AND]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] ; CHECK-NEXT: $w0 = COPY [[OR]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 @@ -67,12 +65,10 @@ body: | ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[COPY3]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[AND]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] ; CHECK-NEXT: $w0 = COPY [[OR]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir index c49e62e53f07bb..942bb60f5c0623 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir @@ -66,9 +66,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-min-max.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-min-max.mir index 3ad54ba3f17fb5..fafaa4646fa229 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-min-max.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-min-max.mir @@ -18,9 +18,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 @@ -47,9 +45,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 @@ -76,9 +72,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 @@ -105,9 +99,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 @@ -136,9 +128,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 @@ -165,9 +155,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 @@ -194,9 +182,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 @@ -223,9 +209,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir index 8c6a30aaed0483..fc417b2eca6189 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir @@ -274,12 +274,12 @@ body: | ; CHECK-LABEL: name: test_uitofp_v2s64_v2i1 ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[DEF]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[DEF]](s64), [[COPY]](s64) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[DEF]](s64), [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(<2 x s64>) = G_UITOFP [[AND]](<2 x s64>) ; CHECK-NEXT: $q0 = COPY [[UITOFP]](<2 x s64>) %0:_(<2 x s1>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir index 1040a5d6ff4d99..5cbb8649d158b0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir @@ -688,8 +688,8 @@ body: | ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD %ptr(p0) :: (load (s8)) ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[LOAD]], 1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ASSERT_ZEXT]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: %ext:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: $x0 = COPY %ext(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir index 6e399d8a388350..8fc2ad3e41789b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir @@ -6,14 +6,11 @@ name: test_merge_s4 body: | bb.0: ; CHECK-LABEL: name: test_merge_s4 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[C1]], [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s64) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) ; CHECK-NEXT: $x0 = COPY [[ANYEXT]](s64) %0:_(s64) = G_CONSTANT i64 0 @@ -29,7 +26,6 @@ name: test_merge_s16_s8 body: | bb.0: - ; This isn't legal but we don't support widening the destination type. ; CHECK-LABEL: name: test_merge_s16_s8 ; CHECK: %a:_(s32) = COPY $w0 ; CHECK-NEXT: %b:_(s32) = COPY $w1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir index 0e895c49eff099..b29670a89c8cf6 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir @@ -131,8 +131,8 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %lhs_wide, [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND %rhs_wide, [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16777215 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %lhs_wide(s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16777215 ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT %rhs_wide(s32) ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C1]] @@ -223,8 +223,8 @@ body: | ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[LOAD]], [[LOAD1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]] ; CHECK-NEXT: G_STORE [[C]](s64), [[FRAME_INDEX2]](p0) :: (store (s64), align 1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] ; CHECK-NEXT: $x0 = COPY [[MUL]](s64) ; CHECK-NEXT: $x1 = COPY [[AND]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi-insertpt-decrement.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi-insertpt-decrement.mir index 60d7af273eb5e3..f1aee90bb07dde 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi-insertpt-decrement.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi-insertpt-decrement.mir @@ -52,8 +52,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(p0) = G_PHI %6(p0), %bb.2, [[DEF]](p0), %bb.0 ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s16) = G_PHI %22(s16), %bb.2, [[DEF1]](s16), %bb.0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.3 ; CHECK-NEXT: {{ $}} @@ -71,10 +71,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ZEXT1]](s32), [[COPY]] ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PHI]], [[C2]](s64) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C4]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ICMP1]](s32) - ; CHECK-NEXT: G_BRCOND [[AND1]](s32), %bb.3 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.3 ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.bb10: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir index 3c952cb779d230..4154ab7039c2f3 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir @@ -32,8 +32,7 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -51,10 +50,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.1, [[TRUNC1]](s16), %bb.2 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: $w0 = COPY [[AND1]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: $w0 = COPY [[AND]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.0: ; Test that we insert legalization artifacts(Truncs here) into the correct BBs @@ -185,8 +184,7 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[C]] ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C1]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -203,10 +201,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.1, [[TRUNC1]](s16), %bb.2 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: $w0 = COPY [[AND1]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: $w0 = COPY [[AND]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.0: successors: %bb.1(0x40000000), %bb.2(0x40000000) @@ -281,14 +279,13 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[COPY]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) - ; CHECK-NEXT: G_BRCOND [[AND1]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C3]] - ; CHECK-NEXT: $w0 = COPY [[AND2]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C3]] + ; CHECK-NEXT: $w0 = COPY [[AND1]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.0: successors: %bb.1(0x80000000) @@ -342,13 +339,11 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[C]](s16), %bb.0, [[PHI]](s16), %bb.1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[COPY]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: G_BRCOND [[AND1]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: $w0 = COPY [[AND]](s32) @@ -412,8 +407,7 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[C]] ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C1]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -433,12 +427,12 @@ body: | ; CHECK-NEXT: bb.3: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC1]](s16), %bb.1, [[TRUNC2]](s16), %bb.2 ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.1, [[C3]](s16), %bb.2 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C4]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C4]] ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s16) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C4]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND1]], [[AND2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C4]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] ; CHECK-NEXT: $w0 = COPY [[ADD2]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.0: @@ -524,31 +518,28 @@ body: | ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC1]](s16), %bb.0, %22(s16), %bb.1 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C4]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[AND1]], [[C2]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C4]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[C2]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[ADD1]](s32), [[C3]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C5]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 43 - ; CHECK-NEXT: G_BRCOND [[AND2]](s32), %bb.2 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 43 + ; CHECK-NEXT: G_BRCOND [[ICMP1]](s32), %bb.2 ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s16) = G_PHI [[PHI]](s16), %bb.1, [[TRUNC]](s16), %bb.0 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s16) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C7]] - ; CHECK-NEXT: $w0 = COPY [[AND3]](s32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C6]] + ; CHECK-NEXT: $w0 = COPY [[AND1]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.0: successors: %bb.1(0x40000000), %bb.3(0x40000000) @@ -671,8 +662,8 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 @@ -727,8 +718,8 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr1, [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C1]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.2 ; CHECK-NEXT: G_BR %bb.1 @@ -778,8 +769,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.2 ; CHECK-NEXT: G_BR %bb.1 @@ -824,8 +815,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF1]], [[C]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.2 ; CHECK-NEXT: G_BR %bb.1 @@ -919,8 +910,8 @@ body: | ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr1, [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16) ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[LOAD1]](<2 x s64>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C1]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.2 ; CHECK-NEXT: G_BR %bb.1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptrtoint.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptrtoint.mir index f62a948a4b36e2..dbc538a21afda4 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptrtoint.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptrtoint.mir @@ -83,8 +83,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT %ptr(p0) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PTRTOINT]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: %ext:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: $w0 = COPY %ext(s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir index b707e3ce2a3d50..7a8f42b43787a3 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir @@ -96,8 +96,8 @@ body: | ; CHECK-LABEL: name: test_urem_1 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sadde.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sadde.mir index 6e72e749eaef78..bbb6fda40dd512 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sadde.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sadde.mir @@ -11,12 +11,11 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY1]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY1]], [[COPY3]], [[UADDE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SADDE1]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDE]](s64) ; CHECK-NEXT: $x1 = COPY [[SADDE]](s64) @@ -47,14 +46,12 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY]], [[COPY1]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UADDE3]], [[C]] - ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY2]], [[COPY3]], [[AND2]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[UADDE1]] + ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY2]], [[COPY3]], [[UADDE3]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SADDE1]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDE]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE2]](s64) @@ -89,8 +86,8 @@ body: | ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 8 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 8 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[SEXT_INREG]], [[SEXT_INREG1]], [[AND]] ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UADDE]], 8 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddo.mir index 0b04b45cddf3ec..bf0af67e56f874 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddo.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddo.mir @@ -11,9 +11,7 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY1]], [[COPY3]], [[UADDO1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SADDE1]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[SADDE]](s64) @@ -42,11 +40,8 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY2]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[UADDO1]] + ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY2]], [[COPY3]], [[UADDE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SADDE1]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir index b2fb18733b7cb6..b8bdef06cac6d1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir @@ -18,9 +18,7 @@ body: | ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SADDO]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SADDO1]], [[C2]] - ; CHECK-NEXT: %saddsat:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[SADDO]] + ; CHECK-NEXT: %saddsat:_(s32) = G_SELECT [[SADDO1]](s32), [[ADD]], [[SADDO]] ; CHECK-NEXT: $w0 = COPY %saddsat(s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %x:_(s32) = COPY $w0 @@ -46,9 +44,7 @@ body: | ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SADDO]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SADDO1]], [[C2]] - ; CHECK-NEXT: %saddsat:_(s64) = G_SELECT [[AND]](s32), [[ADD]], [[SADDO]] + ; CHECK-NEXT: %saddsat:_(s64) = G_SELECT [[SADDO1]](s32), [[ADD]], [[SADDO]] ; CHECK-NEXT: $x0 = COPY %saddsat(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %x:_(s64) = COPY $x0 @@ -80,9 +76,7 @@ body: | ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD1]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD1]], [[ADD]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 @@ -117,8 +111,7 @@ body: | ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD1]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD1]], [[ADD]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 @@ -153,9 +146,7 @@ body: | ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD1]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD1]], [[ADD]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 @@ -187,13 +178,10 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s64), [[SEXT_INREG2]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 35 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[SEXT_INREG2]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[ADD1]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[ADD1]], [[ADD]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s64) = COPY $x0 @@ -227,9 +215,7 @@ body: | ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 24 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 24 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C1]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[SEXT_INREG]], [[SEXT_INREG1]], [[AND]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[SEXT_INREG]], [[SEXT_INREG1]], [[UADDO1]] ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UADDE]], 24 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UADDE]](s32), [[SEXT_INREG2]] ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDE]](s32) @@ -240,11 +226,10 @@ body: | ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV8]](s8) ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MV2]], 24 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[C2]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[C1]](s64) ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s64), [[UADDO3:%[0-9]+]]:_(s32) = G_UADDO [[ASHR]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO2]], [[UADDO]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDO2]], [[UADDO]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s128) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir index 31b87a04a6dc17..4879ffd28784c1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir @@ -125,20 +125,18 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[AND]], 1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ICMP]], 1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY2]](s32), [[C3]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY2]](s32), [[C2]](s64) ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s32>), [[DEF]], shufflemask(0, 0, 0, 0) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32) ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s32>) = G_XOR [[SHUF]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY1]], [[SHUF]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s32>) = G_AND [[BUILD_VECTOR]], [[XOR]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[AND1]], [[AND2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY1]], [[SHUF]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[BUILD_VECTOR]], [[XOR]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[AND]], [[AND1]] ; CHECK-NEXT: $q0 = COPY [[OR]](<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s32) = COPY $w0 @@ -166,9 +164,7 @@ body: | ; CHECK-NEXT: %b:_(s32) = COPY $w1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), %a(s32), %b ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[DEF]], [[DEF]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[DEF]], [[DEF]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %a:_(s32) = COPY $w0 @@ -311,17 +307,17 @@ body: | ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[BUILD_VECTOR1]](<4 x s1>) ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[ANYEXT]], [[ANYEXT1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[XOR]](<4 x s16>) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond0(<4 x s1>) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[SHUF]](<4 x s1>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT3]], [[ANYEXT4]] + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond0(<4 x s1>) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[SHUF]](<4 x s1>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT2]], [[ANYEXT3]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[AND]](<4 x s16>) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond1(<4 x s1>) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC2]](<4 x s1>) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT5]], [[ANYEXT6]] + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond1(<4 x s1>) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC2]](<4 x s1>) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT4]], [[ANYEXT5]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[AND1]](<4 x s16>) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC3]](<4 x s1>) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC4]](<4 x s1>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[ANYEXT7]], [[ANYEXT8]] + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC3]](<4 x s1>) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC4]](<4 x s1>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[ANYEXT6]], [[ANYEXT7]] ; CHECK-NEXT: %select:_(<4 x s1>) = G_TRUNC [[OR]](<4 x s16>) ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_ZEXT %select(<4 x s1>) ; CHECK-NEXT: $q0 = COPY %zext_select(<4 x s32>) @@ -359,22 +355,20 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[C]](p0), [[C]](p0) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](p0), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[COPY1]](<2 x p0>) ; CHECK-NEXT: [[PTRTOINT1:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[BUILD_VECTOR]](<2 x p0>) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[AND]], 1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ICMP]], 1 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXT_INREG]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[SEXT]](s64), [[C2]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[SEXT]](s64), [[C1]](s64) ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[IVEC]](<2 x s64>), [[DEF]], shufflemask(0, 0) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C3]](s64), [[C3]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s64>) = G_XOR [[SHUF]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[PTRTOINT]], [[SHUF]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<2 x s64>) = G_AND [[PTRTOINT1]], [[XOR]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[AND1]], [[AND2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[PTRTOINT]], [[SHUF]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[PTRTOINT1]], [[XOR]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[AND]], [[AND1]] ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(<2 x p0>) = G_INTTOPTR [[OR]](<2 x s64>) ; CHECK-NEXT: $q0 = COPY [[INTTOPTR]](<2 x p0>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir index e7073bdc3d6858..9a9b35cf293da1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir @@ -7,8 +7,8 @@ body: | ; CHECK-LABEL: name: test_shift ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 8 @@ -109,13 +109,9 @@ body: | ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s64) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[SHL]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[OR]], [[SHL2]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s32), [[UV1]], [[SELECT1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[SHL]], [[C1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[OR]], [[SHL2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s32), [[UV1]], [[SELECT1]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; CHECK-NEXT: $q0 = COPY [[MV]](s128) %0:_(s128) = COPY $q0 @@ -145,13 +141,9 @@ body: | ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[SUB]](s64) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[OR]], [[LSHR2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UV]], [[SELECT]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s32), [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[OR]], [[LSHR2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s32), [[UV]], [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[LSHR]], [[C1]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; CHECK-NEXT: $q0 = COPY [[MV]](s128) %0:_(s128) = COPY $q0 @@ -183,13 +175,9 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C2]](s64) ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[SUB]](s64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[OR]], [[ASHR2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C3]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UV]], [[SELECT]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s32), [[ASHR]], [[ASHR1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[OR]], [[ASHR2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s32), [[UV]], [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[ASHR]], [[ASHR1]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; CHECK-NEXT: $q0 = COPY [[MV]](s128) %0:_(s128) = COPY $q0 @@ -236,7 +224,9 @@ body: | liveins: $w0 ; CHECK-LABEL: name: shl_cimm_32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s64) ; CHECK-NEXT: $w0 = COPY [[SHL]](s32) @@ -255,7 +245,9 @@ body: | liveins: $w0 ; CHECK-LABEL: name: lshr_cimm_32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s64) ; CHECK-NEXT: $w0 = COPY [[LSHR]](s32) @@ -274,7 +266,9 @@ body: | liveins: $w0 ; CHECK-LABEL: name: ashr_cimm_32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: $w0 = COPY [[ASHR]](s32) @@ -496,11 +490,8 @@ body: | ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV1]], [[SUB1]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[MV1]], [[SUB]](s64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[OR]], [[LSHR1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C3]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[MV]], [[SELECT]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[OR]], [[LSHR1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s32), [[MV]], [[SELECT]] ; CHECK-NEXT: %d1:_(s32), %d2:_(s32) = G_UNMERGE_VALUES [[SELECT1]](s64) ; CHECK-NEXT: $w0 = COPY %d2(s32) %0:_(s64) = COPY $x0 @@ -529,7 +520,6 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[C2]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s64), [[C1]] ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[COPY]], [[C3]] ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(s64) = G_SUB [[C3]], [[COPY]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[C3]] @@ -538,14 +528,11 @@ body: | ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV1]], [[SUB3]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[MV1]], [[SUB2]](s64) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[OR]], [[LSHR1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP3]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[MV]], [[SELECT]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s32), [[OR]], [[LSHR1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s32), [[MV]], [[SELECT]] ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB1]](s64), [[C3]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[MV2]], [[SUB1]](s64) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[C4]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s32), [[SHL1]], [[C1]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s32), [[SHL1]], [[C1]] ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[SELECT1]], [[SELECT2]] ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB [[SUB]], [[C3]] ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s64) = G_SUB [[C3]], [[SUB]] @@ -555,14 +542,10 @@ body: | ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[C1]], [[SUB5]](s64) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL2]] ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[C1]], [[SUB4]](s64) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP5]], [[C4]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[AND3]](s32), [[OR2]], [[LSHR3]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP6]], [[C4]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s32), [[MV2]], [[SELECT3]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[AND5]](s32), [[OR1]], [[SELECT4]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[AND6]](s32), [[MV]], [[SELECT5]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s32), [[OR2]], [[LSHR3]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s32), [[MV2]], [[SELECT3]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[OR1]], [[SELECT4]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s32), [[MV]], [[SELECT5]] ; CHECK-NEXT: %d1:_(s32), %d2:_(s32) = G_UNMERGE_VALUES [[SELECT6]](s64) ; CHECK-NEXT: $w0 = COPY %d2(s32) %0:_(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir index c1bf5cd33bea43..62865bcfdf081e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir @@ -12,16 +12,16 @@ body: | ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[COPY]](s64) ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[INTTOPTR]](p0) ; CHECK-NEXT: $x0 = COPY [[PTRTOINT]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC4]], [[C1]] ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[TRUNC2]], [[TRUNC3]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) @@ -101,7 +101,9 @@ body: | liveins: $x0, $x1 ; CHECK-LABEL: name: bitcast128 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[MV]](s128) @@ -122,7 +124,9 @@ body: | liveins: $x0 ; CHECK-LABEL: name: testExtOfCopyOfTrunc - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: $x0 = COPY [[COPY]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 @@ -140,7 +144,9 @@ body: | liveins: $x0 ; CHECK-LABEL: name: testExtOf2CopyOfTrunc - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: $x0 = COPY [[COPY]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssube.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssube.mir index fa372217c88350..e7d1f8e7a50f97 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssube.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssube.mir @@ -11,12 +11,11 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY1]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY1]], [[COPY3]], [[USUBE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SSUBE1]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBE]](s64) ; CHECK-NEXT: $x1 = COPY [[SSUBE]](s64) @@ -47,14 +46,12 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY]], [[COPY1]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[USUBE3]], [[C]] - ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY2]], [[COPY3]], [[AND2]] + ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[USUBE1]] + ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY2]], [[COPY3]], [[USUBE3]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SSUBE1]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBE]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE2]](s64) @@ -89,8 +86,8 @@ body: | ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 8 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 8 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[SEXT_INREG]], [[SEXT_INREG1]], [[AND]] ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[USUBE]], 8 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubo.mir index cc53b387940b2d..5e8d96a4b6d689 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubo.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubo.mir @@ -11,9 +11,7 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C]] - ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY1]], [[COPY3]], [[USUBO1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SSUBE1]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[SSUBE]](s64) @@ -42,11 +40,8 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY2]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[USUBO1]] + ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY2]], [[COPY3]], [[USUBE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SSUBE1]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir index 8baa96bc564d5a..2311be6b425cb9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir @@ -18,9 +18,7 @@ body: | ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SSUBO]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SSUBO1]], [[C2]] - ; CHECK-NEXT: %ssubsat:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[SSUBO]] + ; CHECK-NEXT: %ssubsat:_(s32) = G_SELECT [[SSUBO1]](s32), [[ADD]], [[SSUBO]] ; CHECK-NEXT: $w0 = COPY %ssubsat(s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %x:_(s32) = COPY $w0 @@ -46,9 +44,7 @@ body: | ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SSUBO]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SSUBO1]], [[C2]] - ; CHECK-NEXT: %ssubsat:_(s64) = G_SELECT [[AND]](s32), [[ADD]], [[SSUBO]] + ; CHECK-NEXT: %ssubsat:_(s64) = G_SELECT [[SSUBO1]](s32), [[ADD]], [[SSUBO]] ; CHECK-NEXT: $x0 = COPY %ssubsat(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %x:_(s64) = COPY $x0 @@ -80,9 +76,7 @@ body: | ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD]], [[SUB]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 @@ -117,8 +111,7 @@ body: | ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD]], [[SUB]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 @@ -153,9 +146,7 @@ body: | ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD]], [[SUB]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 @@ -187,13 +178,10 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s64), [[SEXT_INREG2]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 35 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[SEXT_INREG2]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[ADD]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[ADD]], [[SUB]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s64) = COPY $x0 @@ -227,9 +215,7 @@ body: | ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 24 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 24 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C1]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[SEXT_INREG]], [[SEXT_INREG1]], [[AND]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[SEXT_INREG]], [[SEXT_INREG1]], [[USUBO1]] ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[USUBE]], 24 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[USUBE]](s32), [[SEXT_INREG2]] ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBE]](s32) @@ -240,11 +226,10 @@ body: | ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV8]](s8) ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MV2]], 24 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[C2]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[C1]](s64) ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[ASHR]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO]], [[USUBO]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDO]], [[USUBO]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s128) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir index 3de25d98f00c91..205c32f6971acc 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir @@ -11,9 +11,7 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY3]], [[USUBO1]] ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE]](s64) %0:_(s64) = COPY $x0 @@ -38,11 +36,8 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY2]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[USUBO1]] + ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY2]], [[COPY3]], [[USUBE1]] ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE]](s64) ; CHECK-NEXT: $x2 = COPY [[USUBE2]](s64) @@ -136,8 +131,8 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY]](<2 x s32>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY2]](<2 x s32>), [[COPY3]] - ; CHECK-NEXT: [[sub:%[0-9]+]]:_(<2 x s32>) = G_SUB [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[sub]](<2 x s32>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<2 x s32>) = G_SUB [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $d0 = COPY [[SUB]](<2 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<2 x s32>) = COPY $d0 %1:_(<2 x s32>) = COPY $d1 @@ -163,15 +158,15 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY $b1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY $b2 - ; CHECK-NEXT: [[ANYEXT0:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY1]](s8) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY2]](s8) - ; CHECK-NEXT: [[IMPLICIT_DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT0]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[IMPLICIT_DEF]](s16) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[DEF]](s16) ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<4 x s16>) = G_SUB [[BUILD_VECTOR]], [[BUILD_VECTOR]] - ; CHECK-NEXT: [[VAL0:%[0-9]+]]:_(s16), [[VAL1:%[0-9]+]]:_(s16), [[VAL2:%[0-9]+]]:_(s16), [[VAL3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[SUB]](<4 x s16>) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[VAL0]](s16) - ; CHECK-NEXT: $b0 = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[SUB]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) + ; CHECK-NEXT: $b0 = COPY [[TRUNC]](s8) ; CHECK-NEXT: RET_ReallyLR implicit $b0 %1:_(s8) = COPY $b0 %2:_(s8) = COPY $b1 @@ -200,8 +195,8 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s16>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY]](<4 x s16>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY2]](<4 x s16>), [[COPY3]] - ; CHECK-NEXT: [[sub:%[0-9]+]]:_(<4 x s16>) = G_SUB [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[sub]](<4 x s16>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<4 x s16>) = G_SUB [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $d0 = COPY [[SUB]](<4 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<4 x s16>) = COPY $d0 %1:_(<4 x s16>) = COPY $d1 @@ -230,8 +225,8 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<8 x s8>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s8>) = G_ICMP intpred(eq), [[COPY]](<8 x s8>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<8 x s8>) = G_ICMP intpred(eq), [[COPY2]](<8 x s8>), [[COPY3]] - ; CHECK-NEXT: [[sub:%[0-9]+]]:_(<8 x s8>) = G_SUB [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[sub]](<8 x s8>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s8>) = G_SUB [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $d0 = COPY [[SUB]](<8 x s8>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<8 x s8>) = COPY $d0 %1:_(<8 x s8>) = COPY $d1 @@ -260,8 +255,8 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<16 x s8>) = COPY $q3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<16 x s8>) = G_ICMP intpred(eq), [[COPY]](<16 x s8>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<16 x s8>) = G_ICMP intpred(eq), [[COPY2]](<16 x s8>), [[COPY3]] - ; CHECK-NEXT: [[sub:%[0-9]+]]:_(<16 x s8>) = G_SUB [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $q0 = COPY [[sub]](<16 x s8>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<16 x s8>) = G_SUB [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $q0 = COPY [[SUB]](<16 x s8>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<16 x s8>) = COPY $q0 %1:_(<16 x s8>) = COPY $q1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadd-sat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadd-sat.mir index 9dbbd145415462..e6293e72f547fb 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadd-sat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadd-sat.mir @@ -18,9 +18,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY1]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[C]], [[UADDO]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[UADDO1]](s32), [[C]], [[UADDO]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 @@ -48,9 +46,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY1]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[C]], [[UADDO]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[UADDO1]](s32), [[C]], [[UADDO]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 @@ -83,9 +79,7 @@ body: | ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[C1]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C1]], [[ADD]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:_(s32) = COPY $w0 @@ -121,9 +115,7 @@ body: | ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[C1]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C1]], [[ADD]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:_(s32) = COPY $w0 @@ -162,9 +154,7 @@ body: | ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[COPY2]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY2]], [[ADD]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadde.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadde.mir index fcd50131dc75e9..11f1f09cbbe0f3 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadde.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadde.mir @@ -11,12 +11,11 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[UADDE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[UADDE3]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDE]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE2]](s64) @@ -47,14 +46,12 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY]], [[COPY1]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UADDE3]], [[C]] - ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s64), [[UADDE5:%[0-9]+]]:_(s32) = G_UADDE [[COPY2]], [[COPY3]], [[AND2]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[UADDE1]] + ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s64), [[UADDE5:%[0-9]+]]:_(s32) = G_UADDE [[COPY2]], [[COPY3]], [[UADDE3]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[UADDE5]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDE]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE2]](s64) @@ -85,13 +82,13 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C1]] ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[AND]], [[AND1]], [[AND2]] ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UADDE]], [[C]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uaddo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uaddo.mir index f646c6b053f173..7628233a6addc6 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uaddo.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uaddo.mir @@ -11,9 +11,7 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[UADDO1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[UADDE1]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) @@ -42,11 +40,8 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY2]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[UADDO1]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY2]], [[COPY3]], [[UADDE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[UADDE3]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) @@ -74,8 +69,8 @@ body: | ; CHECK-LABEL: name: test_scalar_uaddo_small ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usub-sat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usub-sat.mir index 5fc30343b42f99..784dc389af58b9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usub-sat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usub-sat.mir @@ -18,9 +18,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY1]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[C]], [[USUBO]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[USUBO1]](s32), [[C]], [[USUBO]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 @@ -48,9 +46,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY1]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[C]], [[USUBO]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[USUBO1]](s32), [[C]], [[USUBO]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 @@ -83,9 +79,7 @@ body: | ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[C1]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C1]], [[SUB]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:_(s32) = COPY $w0 @@ -121,9 +115,7 @@ body: | ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[C1]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C1]], [[SUB]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:_(s32) = COPY $w0 @@ -159,9 +151,7 @@ body: | ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[C1]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C1]], [[SUB]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usube.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usube.mir index ca9fef41e45b54..a9c0f3135b4205 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usube.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usube.mir @@ -11,12 +11,11 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY3]], [[USUBE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[USUBE3]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBE]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE2]](s64) @@ -47,14 +46,12 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY]], [[COPY1]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[USUBE3]], [[C]] - ; CHECK-NEXT: [[USUBE4:%[0-9]+]]:_(s64), [[USUBE5:%[0-9]+]]:_(s32) = G_USUBE [[COPY2]], [[COPY3]], [[AND2]] + ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[USUBE1]] + ; CHECK-NEXT: [[USUBE4:%[0-9]+]]:_(s64), [[USUBE5:%[0-9]+]]:_(s32) = G_USUBE [[COPY2]], [[COPY3]], [[USUBE3]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[USUBE5]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBE]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE2]](s64) @@ -85,13 +82,13 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C1]] ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[AND]], [[AND1]], [[AND2]] ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[USUBE]], [[C]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usubo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usubo.mir index 34eabeebe5169d..30faa53d89362e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usubo.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usubo.mir @@ -11,9 +11,7 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY3]], [[USUBO1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[USUBE1]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE]](s64) @@ -42,11 +40,8 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY2]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[USUBO1]] + ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY2]], [[COPY3]], [[USUBE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[USUBE3]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE]](s64) @@ -74,8 +69,8 @@ body: | ; CHECK-LABEL: name: test_scalar_usubo_small ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir index 3fc7505e44e04e..0982d9a0218b72 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir @@ -17,8 +17,8 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI %33(s16), %bb.2, [[DEF]](s16), %bb.0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 46 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[AND]](s32), [[C1]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir b/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir index b39cdabb03f268..242b09546522b3 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir @@ -10,8 +10,7 @@ body: | ; CHECK: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ogt), [[COPY]](s32), [[COPY1]] ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] - ; CHECK: $w0 = COPY [[AND1]](s32) + ; CHECK: $w0 = COPY [[AND]](s32) %0:_(s32) = COPY $w0 %1:_(s32) = COPY $w1 %2:_(s1) = G_FCMP floatpred(ogt), %0(s32), %1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir index ee54f388683696..28d279d7421642 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-unknown-unknown -o - -verify-machineinstrs -run-pass=instruction-select %s | FileCheck %s # RUN: llc -mtriple=aarch64-unknown-unknown -o - -verify-machineinstrs -run-pass=instruction-select -code-model=large %s | FileCheck %s --check-prefix=LARGE +# RUN: llc -mtriple=aarch64-unknown-unknown -o - -verify-machineinstrs -run-pass=instruction-select -code-model=large -relocation-model=pic %s | FileCheck %s --check-prefix=LARGE-PIC --- | - ; ModuleID = 'blockaddress.ll' source_filename = "blockaddress.ll" target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-none-linux-gnu" @@ -37,6 +37,7 @@ body: | ; CHECK-NEXT: BR [[MOVaddrBA]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.block (ir-block-address-taken %ir-block.block): + ; ; LARGE-LABEL: name: test_blockaddress ; LARGE: bb.0 (%ir-block.0): ; LARGE-NEXT: [[MOVZXi:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block), 0 @@ -51,6 +52,16 @@ body: | ; LARGE-NEXT: BR [[MOVKXi2]] ; LARGE-NEXT: {{ $}} ; LARGE-NEXT: bb.1.block (ir-block-address-taken %ir-block.block): + ; + ; LARGE-PIC-LABEL: name: test_blockaddress + ; LARGE-PIC: bb.0 (%ir-block.0): + ; LARGE-PIC-NEXT: [[MOVaddrBA:%[0-9]+]]:gpr64common = MOVaddrBA target-flags(aarch64-page) blockaddress(@test_blockaddress, %ir-block.block), target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block) + ; LARGE-PIC-NEXT: [[MOVaddr:%[0-9]+]]:gpr64common = MOVaddr target-flags(aarch64-page) @addr, target-flags(aarch64-pageoff, aarch64-nc) @addr + ; LARGE-PIC-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY [[MOVaddrBA]] + ; LARGE-PIC-NEXT: STRXui [[COPY]], [[MOVaddr]], 0 :: (store (p0) into @addr) + ; LARGE-PIC-NEXT: BR [[MOVaddrBA]] + ; LARGE-PIC-NEXT: {{ $}} + ; LARGE-PIC-NEXT: bb.1.block (ir-block-address-taken %ir-block.block): bb.1 (%ir-block.0): %0:gpr(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block) %1:gpr(p0) = G_GLOBAL_VALUE @addr diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir index b2aadd7ca5c2d4..d0ac97e6908aa5 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-linux-gnu -code-model=large -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-linux-gnu -code-model=large -relocation-model=static -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s --check-prefix=STATIC +# RUN: llc -mtriple=aarch64-linux-gnu -code-model=large -relocation-model=pic -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s --check-prefix=PIC --- | target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -29,23 +30,35 @@ stack: constants: body: | bb.1: - ; CHECK-LABEL: name: gv_large - ; CHECK: [[MOVZXi:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @foo1, 0 - ; CHECK: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi]], target-flags(aarch64-g1, aarch64-nc) @foo1, 16 - ; CHECK: [[MOVKXi1:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi]], target-flags(aarch64-g2, aarch64-nc) @foo1, 32 - ; CHECK: [[MOVKXi2:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi1]], target-flags(aarch64-g3) @foo1, 48 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY [[MOVKXi2]] - ; CHECK: [[MOVZXi1:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @foo2, 0 - ; CHECK: [[MOVKXi3:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi1]], target-flags(aarch64-g1, aarch64-nc) @foo2, 16 - ; CHECK: [[MOVKXi4:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi3]], target-flags(aarch64-g2, aarch64-nc) @foo2, 32 - ; CHECK: [[MOVKXi5:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi4]], target-flags(aarch64-g3) @foo2, 48 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[MOVKXi5]] - ; CHECK: STRWui $wzr, %stack.0.retval, 0 :: (store (s32) into %ir.retval) - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from @foo1) - ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load (s32) from @foo2) - ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRWui]], [[LDRWui1]] - ; CHECK: $w0 = COPY [[ADDWrr]] - ; CHECK: RET_ReallyLR implicit $w0 + ; STATIC-LABEL: name: gv_large + ; STATIC: [[MOVZXi:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @foo1, 0 + ; STATIC-NEXT: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi]], target-flags(aarch64-g1, aarch64-nc) @foo1, 16 + ; STATIC-NEXT: [[MOVKXi1:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi]], target-flags(aarch64-g2, aarch64-nc) @foo1, 32 + ; STATIC-NEXT: [[MOVKXi2:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi1]], target-flags(aarch64-g3) @foo1, 48 + ; STATIC-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY [[MOVKXi2]] + ; STATIC-NEXT: [[MOVZXi1:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @foo2, 0 + ; STATIC-NEXT: [[MOVKXi3:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi1]], target-flags(aarch64-g1, aarch64-nc) @foo2, 16 + ; STATIC-NEXT: [[MOVKXi4:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi3]], target-flags(aarch64-g2, aarch64-nc) @foo2, 32 + ; STATIC-NEXT: [[MOVKXi5:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi4]], target-flags(aarch64-g3) @foo2, 48 + ; STATIC-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[MOVKXi5]] + ; STATIC-NEXT: STRWui $wzr, %stack.0.retval, 0 :: (store (s32) into %ir.retval) + ; STATIC-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from @foo1) + ; STATIC-NEXT: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load (s32) from @foo2) + ; STATIC-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRWui]], [[LDRWui1]] + ; STATIC-NEXT: $w0 = COPY [[ADDWrr]] + ; STATIC-NEXT: RET_ReallyLR implicit $w0 + ; + ; PIC-LABEL: name: gv_large + ; PIC: [[MOVaddr:%[0-9]+]]:gpr64common = MOVaddr target-flags(aarch64-page) @foo1, target-flags(aarch64-pageoff, aarch64-nc) @foo1 + ; PIC-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY [[MOVaddr]] + ; PIC-NEXT: [[MOVaddr1:%[0-9]+]]:gpr64common = MOVaddr target-flags(aarch64-page) @foo2, target-flags(aarch64-pageoff, aarch64-nc) @foo2 + ; PIC-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[MOVaddr1]] + ; PIC-NEXT: STRWui $wzr, %stack.0.retval, 0 :: (store (s32) into %ir.retval) + ; PIC-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from @foo1) + ; PIC-NEXT: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load (s32) from @foo2) + ; PIC-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRWui]], [[LDRWui1]] + ; PIC-NEXT: $w0 = COPY [[ADDWrr]] + ; PIC-NEXT: RET_ReallyLR implicit $w0 %1:gpr(s32) = G_CONSTANT i32 0 %4:gpr(p0) = G_GLOBAL_VALUE @foo1 %3:gpr(p0) = COPY %4(p0) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-with-offset.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-with-offset.mir index 8b4ae941eb4d70..1713f36683ef93 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-with-offset.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-with-offset.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64 -code-model=large -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s --check-prefix=LARGE +# RUN: llc -mtriple=aarch64 -code-model=large -relocation-model=pic -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s --check-prefix=LARGE-PIC # RUN: llc -mtriple=aarch64 -code-model=small -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s --check-prefix=SMALL # RUN: llc -mtriple=aarch64 -code-model=tiny -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s --check-prefix=TINY @@ -17,22 +18,34 @@ body: | liveins: $x0 ; LARGE-LABEL: name: select_gv_with_offset ; LARGE: liveins: $x0 - ; LARGE: [[MOVZXi:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @g + 1, 0 - ; LARGE: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi]], target-flags(aarch64-g1, aarch64-nc) @g + 1, 16 - ; LARGE: [[MOVKXi1:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi]], target-flags(aarch64-g2, aarch64-nc) @g + 1, 32 - ; LARGE: %g:gpr64 = MOVKXi [[MOVKXi1]], target-flags(aarch64-g3) @g + 1, 48 - ; LARGE: $x0 = COPY %g - ; LARGE: RET_ReallyLR implicit $x0 + ; LARGE-NEXT: {{ $}} + ; LARGE-NEXT: [[MOVZXi:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @g + 1, 0 + ; LARGE-NEXT: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi]], target-flags(aarch64-g1, aarch64-nc) @g + 1, 16 + ; LARGE-NEXT: [[MOVKXi1:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi]], target-flags(aarch64-g2, aarch64-nc) @g + 1, 32 + ; LARGE-NEXT: %g:gpr64 = MOVKXi [[MOVKXi1]], target-flags(aarch64-g3) @g + 1, 48 + ; LARGE-NEXT: $x0 = COPY %g + ; LARGE-NEXT: RET_ReallyLR implicit $x0 + ; + ; LARGE-PIC-LABEL: name: select_gv_with_offset + ; LARGE-PIC: liveins: $x0 + ; LARGE-PIC-NEXT: {{ $}} + ; LARGE-PIC-NEXT: %g:gpr64common = MOVaddr target-flags(aarch64-page) @g + 1, target-flags(aarch64-pageoff, aarch64-nc) @g + 1 + ; LARGE-PIC-NEXT: $x0 = COPY %g + ; LARGE-PIC-NEXT: RET_ReallyLR implicit $x0 + ; ; SMALL-LABEL: name: select_gv_with_offset ; SMALL: liveins: $x0 - ; SMALL: %g:gpr64common = MOVaddr target-flags(aarch64-page) @g + 1, target-flags(aarch64-pageoff, aarch64-nc) @g + 1 - ; SMALL: $x0 = COPY %g - ; SMALL: RET_ReallyLR implicit $x0 + ; SMALL-NEXT: {{ $}} + ; SMALL-NEXT: %g:gpr64common = MOVaddr target-flags(aarch64-page) @g + 1, target-flags(aarch64-pageoff, aarch64-nc) @g + 1 + ; SMALL-NEXT: $x0 = COPY %g + ; SMALL-NEXT: RET_ReallyLR implicit $x0 + ; ; TINY-LABEL: name: select_gv_with_offset ; TINY: liveins: $x0 - ; TINY: %g:gpr64 = ADR @g + 1 - ; TINY: $x0 = COPY %g - ; TINY: RET_ReallyLR implicit $x0 + ; TINY-NEXT: {{ $}} + ; TINY-NEXT: %g:gpr64 = ADR @g + 1 + ; TINY-NEXT: $x0 = COPY %g + ; TINY-NEXT: RET_ReallyLR implicit $x0 %g:gpr(p0) = G_GLOBAL_VALUE @g + 1 $x0 = COPY %g(p0) RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/blockaddress.ll b/llvm/test/CodeGen/AArch64/blockaddress.ll index 732cfc23292d7e..b222d2f373192b 100644 --- a/llvm/test/CodeGen/AArch64/blockaddress.ll +++ b/llvm/test/CodeGen/AArch64/blockaddress.ll @@ -1,5 +1,6 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s +; RUN: llc -code-model=large -relocation-model=pic -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 < %s | FileCheck %s ; RUN: llc -code-model=tiny -mtriple=aarch64-none-elf -aarch64-enable-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-TINY %s @addr = global ptr null diff --git a/llvm/test/CodeGen/AArch64/code-model-large-abs.ll b/llvm/test/CodeGen/AArch64/code-model-large-abs.ll deleted file mode 100644 index e6f1f28778df07..00000000000000 --- a/llvm/test/CodeGen/AArch64/code-model-large-abs.ll +++ /dev/null @@ -1,72 +0,0 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -code-model=large -o - %s | FileCheck %s - -@var8 = dso_local global i8 0 -@var16 = dso_local global i16 0 -@var32 = dso_local global i32 0 -@var64 = dso_local global i64 0 - -define dso_local ptr @global_addr() { -; CHECK-LABEL: global_addr: - ret ptr @var8 - ; The movz/movk calculation should end up returned directly in x0. -; CHECK: movz x0, #:abs_g0_nc:var8 -; CHECK: movk x0, #:abs_g1_nc:var8 -; CHECK: movk x0, #:abs_g2_nc:var8 -; CHECK: movk x0, #:abs_g3:var8 -; CHECK-NEXT: ret -} - -define dso_local i8 @global_i8() { -; CHECK-LABEL: global_i8: - %val = load i8, ptr @var8 - ret i8 %val -; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g0_nc:var8 -; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var8 -; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var8 -; CHECK: movk x[[ADDR_REG]], #:abs_g3:var8 -; CHECK: ldrb w0, [x[[ADDR_REG]]] -} - -define dso_local i16 @global_i16() { -; CHECK-LABEL: global_i16: - %val = load i16, ptr @var16 - ret i16 %val -; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g0_nc:var16 -; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var16 -; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var16 -; CHECK: movk x[[ADDR_REG]], #:abs_g3:var16 -; CHECK: ldrh w0, [x[[ADDR_REG]]] -} - -define dso_local i32 @global_i32() { -; CHECK-LABEL: global_i32: - %val = load i32, ptr @var32 - ret i32 %val -; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g0_nc:var32 -; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var32 -; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var32 -; CHECK: movk x[[ADDR_REG]], #:abs_g3:var32 -; CHECK: ldr w0, [x[[ADDR_REG]]] -} - -define dso_local i64 @global_i64() { -; CHECK-LABEL: global_i64: - %val = load i64, ptr @var64 - ret i64 %val -; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g0_nc:var64 -; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var64 -; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var64 -; CHECK: movk x[[ADDR_REG]], #:abs_g3:var64 -; CHECK: ldr x0, [x[[ADDR_REG]]] -} - -define dso_local <2 x i64> @constpool() { -; CHECK-LABEL: constpool: - ret <2 x i64> - -; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g0_nc:[[CPADDR:.LCPI[0-9]+_[0-9]+]] -; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:[[CPADDR]] -; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:[[CPADDR]] -; CHECK: movk x[[ADDR_REG]], #:abs_g3:[[CPADDR]] -; CHECK: ldr q0, [x[[ADDR_REG]]] -} diff --git a/llvm/test/CodeGen/AArch64/code-model-large.ll b/llvm/test/CodeGen/AArch64/code-model-large.ll new file mode 100644 index 00000000000000..11133abcb8be35 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/code-model-large.ll @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -mtriple=aarch64-linux-gnu -code-model=large -o - %s | FileCheck %s --check-prefix=STATIC +; RUN: llc -mtriple=aarch64-linux-gnu -code-model=large -relocation-model=pic -o - %s | FileCheck %s --check-prefix=PIC + +@var8 = dso_local global i8 0 +@var16 = dso_local global i16 0 +@var32 = dso_local global i32 0 +@var64 = dso_local global i64 0 + +define dso_local ptr @global_addr() { +; +; STATIC-LABEL: global_addr: +; STATIC: // %bb.0: +; STATIC-NEXT: movz x0, #:abs_g0_nc:var8 +; STATIC-NEXT: movk x0, #:abs_g1_nc:var8 +; STATIC-NEXT: movk x0, #:abs_g2_nc:var8 +; STATIC-NEXT: movk x0, #:abs_g3:var8 +; STATIC-NEXT: ret +; +; PIC-LABEL: global_addr: +; PIC: .Lglobal_addr$local: +; PIC-NEXT: .type .Lglobal_addr$local,@function +; PIC-NEXT: .cfi_startproc +; PIC-NEXT: // %bb.0: +; PIC-NEXT: adrp x0, .Lvar8$local +; PIC-NEXT: add x0, x0, :lo12:.Lvar8$local +; PIC-NEXT: ret + ret ptr @var8 + ; The movz/movk calculation should end up returned directly in x0. +} + +define dso_local i8 @global_i8() { +; +; STATIC-LABEL: global_i8: +; STATIC: // %bb.0: +; STATIC-NEXT: movz x8, #:abs_g0_nc:var8 +; STATIC-NEXT: movk x8, #:abs_g1_nc:var8 +; STATIC-NEXT: movk x8, #:abs_g2_nc:var8 +; STATIC-NEXT: movk x8, #:abs_g3:var8 +; STATIC-NEXT: ldrb w0, [x8] +; STATIC-NEXT: ret +; +; PIC-LABEL: global_i8: +; PIC: .Lglobal_i8$local: +; PIC-NEXT: .type .Lglobal_i8$local,@function +; PIC-NEXT: .cfi_startproc +; PIC-NEXT: // %bb.0: +; PIC-NEXT: adrp x8, .Lvar8$local +; PIC-NEXT: ldrb w0, [x8, :lo12:.Lvar8$local] +; PIC-NEXT: ret + %val = load i8, ptr @var8 + ret i8 %val +} + +define dso_local i16 @global_i16() { +; +; STATIC-LABEL: global_i16: +; STATIC: // %bb.0: +; STATIC-NEXT: movz x8, #:abs_g0_nc:var16 +; STATIC-NEXT: movk x8, #:abs_g1_nc:var16 +; STATIC-NEXT: movk x8, #:abs_g2_nc:var16 +; STATIC-NEXT: movk x8, #:abs_g3:var16 +; STATIC-NEXT: ldrh w0, [x8] +; STATIC-NEXT: ret +; +; PIC-LABEL: global_i16: +; PIC: .Lglobal_i16$local: +; PIC-NEXT: .type .Lglobal_i16$local,@function +; PIC-NEXT: .cfi_startproc +; PIC-NEXT: // %bb.0: +; PIC-NEXT: adrp x8, .Lvar16$local +; PIC-NEXT: ldrh w0, [x8, :lo12:.Lvar16$local] +; PIC-NEXT: ret + %val = load i16, ptr @var16 + ret i16 %val +} + +define dso_local i32 @global_i32() { +; +; STATIC-LABEL: global_i32: +; STATIC: // %bb.0: +; STATIC-NEXT: movz x8, #:abs_g0_nc:var32 +; STATIC-NEXT: movk x8, #:abs_g1_nc:var32 +; STATIC-NEXT: movk x8, #:abs_g2_nc:var32 +; STATIC-NEXT: movk x8, #:abs_g3:var32 +; STATIC-NEXT: ldr w0, [x8] +; STATIC-NEXT: ret +; +; PIC-LABEL: global_i32: +; PIC: .Lglobal_i32$local: +; PIC-NEXT: .type .Lglobal_i32$local,@function +; PIC-NEXT: .cfi_startproc +; PIC-NEXT: // %bb.0: +; PIC-NEXT: adrp x8, .Lvar32$local +; PIC-NEXT: ldr w0, [x8, :lo12:.Lvar32$local] +; PIC-NEXT: ret + %val = load i32, ptr @var32 + ret i32 %val +} + +define dso_local i64 @global_i64() { +; +; STATIC-LABEL: global_i64: +; STATIC: // %bb.0: +; STATIC-NEXT: movz x8, #:abs_g0_nc:var64 +; STATIC-NEXT: movk x8, #:abs_g1_nc:var64 +; STATIC-NEXT: movk x8, #:abs_g2_nc:var64 +; STATIC-NEXT: movk x8, #:abs_g3:var64 +; STATIC-NEXT: ldr x0, [x8] +; STATIC-NEXT: ret +; +; PIC-LABEL: global_i64: +; PIC: .Lglobal_i64$local: +; PIC-NEXT: .type .Lglobal_i64$local,@function +; PIC-NEXT: .cfi_startproc +; PIC-NEXT: // %bb.0: +; PIC-NEXT: adrp x8, .Lvar64$local +; PIC-NEXT: ldr x0, [x8, :lo12:.Lvar64$local] +; PIC-NEXT: ret + %val = load i64, ptr @var64 + ret i64 %val +} + +define dso_local <2 x i64> @constpool() { +; +; STATIC-LABEL: constpool: +; STATIC: // %bb.0: +; STATIC-NEXT: movz x8, #:abs_g0_nc:.LCPI5_0 +; STATIC-NEXT: movk x8, #:abs_g1_nc:.LCPI5_0 +; STATIC-NEXT: movk x8, #:abs_g2_nc:.LCPI5_0 +; STATIC-NEXT: movk x8, #:abs_g3:.LCPI5_0 +; STATIC-NEXT: ldr q0, [x8] +; STATIC-NEXT: ret +; +; PIC-LABEL: constpool: +; PIC: .Lconstpool$local: +; PIC-NEXT: .type .Lconstpool$local,@function +; PIC-NEXT: .cfi_startproc +; PIC-NEXT: // %bb.0: +; PIC-NEXT: adrp x8, .LCPI5_0 +; PIC-NEXT: ldr q0, [x8, :lo12:.LCPI5_0] +; PIC-NEXT: ret + ret <2 x i64> +} diff --git a/llvm/test/CodeGen/AArch64/jump-table.ll b/llvm/test/CodeGen/AArch64/jump-table.ll index 5b414d5b5e33dd..6c32444657542d 100644 --- a/llvm/test/CodeGen/AArch64/jump-table.ll +++ b/llvm/test/CodeGen/AArch64/jump-table.ll @@ -1,5 +1,6 @@ ; RUN: llc -no-integrated-as -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s ; RUN: llc -no-integrated-as -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-LARGE %s +; RUN: llc -no-integrated-as -code-model=large -relocation-model=pic -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-PIC %s ; RUN: llc -no-integrated-as -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -aarch64-enable-atomic-cfg-tidy=0 -o - %s | FileCheck --check-prefix=CHECK-PIC %s ; RUN: llc -no-integrated-as -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-IOS %s ; RUN: llc -no-integrated-as -code-model=tiny -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-TINY %s diff --git a/llvm/test/CodeGen/AArch64/zext.ll b/llvm/test/CodeGen/AArch64/zext.ll index 8573a8a2d2571d..42c0bf79e77897 100644 --- a/llvm/test/CodeGen/AArch64/zext.ll +++ b/llvm/test/CodeGen/AArch64/zext.ll @@ -1196,7 +1196,7 @@ define <16 x i64> @zext_v16i10_v16i64(<16 x i10> %a) { ; ; CHECK-GI-LABEL: zext_v16i10_v16i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: fmov s7, w0 +; CHECK-GI-NEXT: fmov s16, w0 ; CHECK-GI-NEXT: fmov s17, w2 ; CHECK-GI-NEXT: ldr s0, [sp] ; CHECK-GI-NEXT: fmov s18, w4 @@ -1207,33 +1207,33 @@ define <16 x i64> @zext_v16i10_v16i64(<16 x i10> %a) { ; CHECK-GI-NEXT: ldr s4, [sp, #32] ; CHECK-GI-NEXT: ldr s5, [sp, #40] ; CHECK-GI-NEXT: ldr s6, [sp, #48] -; CHECK-GI-NEXT: ldr s16, [sp, #56] -; CHECK-GI-NEXT: mov v7.s[1], w1 +; CHECK-GI-NEXT: ldr s7, [sp, #56] +; CHECK-GI-NEXT: mov v16.s[1], w1 ; CHECK-GI-NEXT: mov v17.s[1], w3 ; CHECK-GI-NEXT: mov v18.s[1], w5 ; CHECK-GI-NEXT: mov v19.s[1], w7 ; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] ; CHECK-GI-NEXT: mov v2.s[1], v3.s[0] ; CHECK-GI-NEXT: mov v4.s[1], v5.s[0] -; CHECK-GI-NEXT: mov v6.s[1], v16.s[0] +; CHECK-GI-NEXT: mov v6.s[1], v7.s[0] ; CHECK-GI-NEXT: adrp x8, .LCPI54_0 -; CHECK-GI-NEXT: ldr q16, [x8, :lo12:.LCPI54_0] -; CHECK-GI-NEXT: ushll v1.2d, v7.2s, #0 +; CHECK-GI-NEXT: ushll v1.2d, v16.2s, #0 ; CHECK-GI-NEXT: ushll v3.2d, v17.2s, #0 ; CHECK-GI-NEXT: ushll v5.2d, v18.2s, #0 ; CHECK-GI-NEXT: ushll v7.2d, v19.2s, #0 -; CHECK-GI-NEXT: ushll v17.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll v16.2d, v0.2s, #0 ; CHECK-GI-NEXT: ushll v18.2d, v2.2s, #0 ; CHECK-GI-NEXT: ushll v19.2d, v4.2s, #0 ; CHECK-GI-NEXT: ushll v20.2d, v6.2s, #0 -; CHECK-GI-NEXT: and v0.16b, v1.16b, v16.16b -; CHECK-GI-NEXT: and v1.16b, v3.16b, v16.16b -; CHECK-GI-NEXT: and v2.16b, v5.16b, v16.16b -; CHECK-GI-NEXT: and v3.16b, v7.16b, v16.16b -; CHECK-GI-NEXT: and v4.16b, v17.16b, v16.16b -; CHECK-GI-NEXT: and v5.16b, v18.16b, v16.16b -; CHECK-GI-NEXT: and v6.16b, v19.16b, v16.16b -; CHECK-GI-NEXT: and v7.16b, v20.16b, v16.16b +; CHECK-GI-NEXT: ldr q17, [x8, :lo12:.LCPI54_0] +; CHECK-GI-NEXT: and v0.16b, v1.16b, v17.16b +; CHECK-GI-NEXT: and v1.16b, v3.16b, v17.16b +; CHECK-GI-NEXT: and v2.16b, v5.16b, v17.16b +; CHECK-GI-NEXT: and v3.16b, v7.16b, v17.16b +; CHECK-GI-NEXT: and v4.16b, v16.16b, v17.16b +; CHECK-GI-NEXT: and v5.16b, v18.16b, v17.16b +; CHECK-GI-NEXT: and v6.16b, v19.16b, v17.16b +; CHECK-GI-NEXT: and v7.16b, v20.16b, v17.16b ; CHECK-GI-NEXT: ret entry: %c = zext <16 x i10> %a to <16 x i64> diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir index b99b4cf54c5b3d..bd2f5181fd525d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir @@ -50,8 +50,8 @@ body: | ; CHECK-NEXT: %and5:_(s1) = G_XOR %unmerge3_5, %negone ; CHECK-NEXT: %and6:_(s1) = G_XOR %unmerge3_6, %negone ; CHECK-NEXT: %and7:_(s1) = G_XOR %unmerge3_7, %negone - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT %and0(s1) + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C10]] ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT %and1(s1) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT1]], [[C10]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir index 2ff6433b362d69..3b456ed248b3af 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir @@ -139,8 +139,7 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -549,11 +548,9 @@ body: | ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND2]](s32), [[AND3]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND]](s32), [[LSHR]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND1]](s32), [[LSHR1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64), implicit [[MV1]](s64) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 @@ -1305,8 +1302,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] @@ -1453,8 +1450,8 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY2]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir index 6f866ea4785696..7334ae71d63cf9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir @@ -31,10 +31,10 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) @@ -55,13 +55,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[TRUNC]], [[BITCAST]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](<2 x s16>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -134,8 +134,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 @@ -158,8 +158,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 @@ -181,8 +181,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 8 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 @@ -209,12 +209,12 @@ body: | ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV3]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -241,12 +241,12 @@ body: | ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV3]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -269,12 +269,12 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 8 ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 8 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll index 0f27f445fe569f..4a593d26f809c8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll @@ -44,7 +44,7 @@ define amdgpu_kernel void @call_debug_loc() { ; CHECK-NEXT: $sgpr14 = COPY [[COPY13]], debug-location !6 ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]], debug-location !6 ; CHECK-NEXT: $vgpr31 = COPY [[V_OR3_B32_e64_]], debug-location !6 - ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def $scc, debug-location !6 + ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee, target-flags(amdgpu-gotprel32-hi) @callee, implicit-def $scc, debug-location !6 ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0, debug-location !6 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @callee, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, debug-location !6 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, debug-location !6 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll index 056629ca354518..525075e516d21b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -1327,9 +1327,9 @@ define i32 @v_fshl_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) { ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v2 ; GFX9-NEXT: v_and_b32_e32 v8, 7, v2 ; GFX9-NEXT: v_not_b32_e32 v2, v2 -; GFX9-NEXT: s_mov_b32 s5, 1 +; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 -; GFX9-NEXT: v_lshrrev_b16_sdwa v10, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshrrev_b16_sdwa v10, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: v_lshlrev_b16_e32 v8, v8, v0 ; GFX9-NEXT: v_lshrrev_b16_e32 v2, v2, v10 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 @@ -1338,7 +1338,7 @@ define i32 @v_fshl_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) { ; GFX9-NEXT: v_not_b32_e32 v5, v5 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 ; GFX9-NEXT: v_and_b32_e32 v5, 7, v5 -; GFX9-NEXT: v_lshrrev_b16_sdwa v4, s5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshrrev_b16_sdwa v4, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: v_mov_b32_e32 v9, 0xff ; GFX9-NEXT: v_lshlrev_b16_e32 v3, v8, v3 ; GFX9-NEXT: v_lshrrev_b16_e32 v4, v5, v4 @@ -1360,9 +1360,9 @@ define i32 @v_fshl_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) { ; GFX9-NEXT: v_lshrrev_b16_e32 v1, v6, v1 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX9-NEXT: v_mov_b32_e32 v1, 8 -; GFX9-NEXT: s_movk_i32 s4, 0xff +; GFX9-NEXT: s_movk_i32 s5, 0xff ; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_or_b32 v1, v2, s4, v1 +; GFX9-NEXT: v_and_or_b32 v1, v2, s5, v1 ; GFX9-NEXT: v_and_b32_e32 v2, 0xff, v4 ; GFX9-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 @@ -1807,48 +1807,47 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) { define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) { ; GFX6-LABEL: s_fshl_v2i24: ; GFX6: ; %bb.0: -; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX6-NEXT: s_lshr_b32 s6, s0, 16 ; GFX6-NEXT: s_lshr_b32 s7, s0, 24 ; GFX6-NEXT: s_and_b32 s9, s0, 0xff ; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008 +; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 ; GFX6-NEXT: s_lshl_b32 s0, s0, 8 ; GFX6-NEXT: s_and_b32 s6, s6, 0xff +; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX6-NEXT: s_or_b32 s0, s9, s0 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: s_lshr_b32 s8, s1, 8 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff -; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: s_or_b32 s0, s0, s6 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 ; GFX6-NEXT: s_and_b32 s6, s8, 0xff ; GFX6-NEXT: s_or_b32 s1, s7, s1 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 -; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8 +; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: s_or_b32 s1, s1, s6 ; GFX6-NEXT: s_lshr_b32 s6, s2, 16 ; GFX6-NEXT: s_lshr_b32 s7, s2, 24 ; GFX6-NEXT: s_and_b32 s9, s2, 0xff ; GFX6-NEXT: s_bfe_u32 s2, s2, 0x80008 -; GFX6-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX6-NEXT: s_lshl_b32 s2, s2, 8 ; GFX6-NEXT: s_and_b32 s6, s6, 0xff ; GFX6-NEXT: s_or_b32 s2, s9, s2 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX6-NEXT: v_mov_b32_e32 v2, 0xffffffe8 ; GFX6-NEXT: s_lshr_b32 s8, s3, 8 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 ; GFX6-NEXT: s_and_b32 s3, s3, 0xff +; GFX6-NEXT: v_mul_lo_u32 v3, v1, v2 ; GFX6-NEXT: s_or_b32 s2, s2, s6 ; GFX6-NEXT: s_lshl_b32 s3, s3, 8 ; GFX6-NEXT: s_and_b32 s6, s8, 0xff -; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX6-NEXT: s_or_b32 s3, s7, s3 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s3 @@ -1858,78 +1857,77 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX6-NEXT: s_lshr_b32 s7, s4, 24 ; GFX6-NEXT: s_and_b32 s9, s4, 0xff ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x80008 +; GFX6-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX6-NEXT: s_lshl_b32 s4, s4, 8 ; GFX6-NEXT: s_and_b32 s6, s6, 0xff -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 ; GFX6-NEXT: s_or_b32 s4, s9, s4 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 ; GFX6-NEXT: s_or_b32 s4, s4, s6 -; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 -; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GFX6-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: s_lshr_b32 s8, s5, 8 -; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX6-NEXT: s_and_b32 s5, s5, 0xff -; GFX6-NEXT: v_mul_lo_u32 v1, v2, v1 +; GFX6-NEXT: v_mul_lo_u32 v2, v0, v2 ; GFX6-NEXT: s_lshl_b32 s5, s5, 8 -; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX6-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s4, v1 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX6-NEXT: s_and_b32 s6, s8, 0xff -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX6-NEXT: s_or_b32 s5, s7, s5 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v1 ; GFX6-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 ; GFX6-NEXT: s_or_b32 s5, s5, s6 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1 -; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v0 -; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v1 +; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1 ; GFX6-NEXT: s_lshr_b32 s0, s2, 1 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v3 ; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2 -; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v1 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1 -; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s5, v0 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v0 +; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX6-NEXT: s_lshr_b32 s0, s3, 1 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX6-NEXT: v_lshl_b32_e32 v1, s1, v1 +; GFX6-NEXT: v_lshl_b32_e32 v0, s1, v0 ; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2 -; GFX6-NEXT: v_bfe_u32 v3, v0, 8, 8 -; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8 -; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX6-NEXT: v_bfe_u32 v3, v1, 8, 8 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX6-NEXT: v_bfe_u32 v2, v1, 8, 8 +; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 ; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: v_readfirstlane_b32 s1, v1 +; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_bfe_u32 v2, v0, 8, 8 +; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX6-NEXT: v_readfirstlane_b32 s0, v1 +; GFX6-NEXT: v_readfirstlane_b32 s1, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_fshl_v2i24: @@ -1942,9 +1940,7 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX8-NEXT: s_lshl_b32 s6, s6, 8 ; GFX8-NEXT: s_or_b32 s0, s0, s6 ; GFX8-NEXT: s_and_b32 s6, s7, 0xff -; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX8-NEXT: s_lshr_b32 s9, s1, 8 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16 @@ -1952,24 +1948,24 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX8-NEXT: s_or_b32 s0, s0, s6 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8 ; GFX8-NEXT: s_and_b32 s6, s9, 0xff +; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 ; GFX8-NEXT: s_or_b32 s1, s8, s1 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16 -; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX8-NEXT: s_or_b32 s1, s1, s6 ; GFX8-NEXT: s_lshr_b32 s6, s2, 8 -; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX8-NEXT: s_and_b32 s6, s6, 0xff ; GFX8-NEXT: s_lshr_b32 s7, s2, 16 ; GFX8-NEXT: s_lshr_b32 s8, s2, 24 ; GFX8-NEXT: s_and_b32 s2, s2, 0xff ; GFX8-NEXT: s_lshl_b32 s6, s6, 8 +; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX8-NEXT: s_or_b32 s2, s2, s6 ; GFX8-NEXT: s_and_b32 s6, s7, 0xff -; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8 +; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX8-NEXT: s_lshr_b32 s9, s3, 8 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16 @@ -1977,11 +1973,12 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX8-NEXT: s_or_b32 s2, s2, s6 ; GFX8-NEXT: s_lshl_b32 s3, s3, 8 ; GFX8-NEXT: s_and_b32 s6, s9, 0xff +; GFX8-NEXT: v_mov_b32_e32 v2, 0xffffffe8 ; GFX8-NEXT: s_or_b32 s3, s8, s3 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX8-NEXT: v_mul_lo_u32 v3, v1, v2 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16 -; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX8-NEXT: s_or_b32 s3, s3, s6 ; GFX8-NEXT: s_lshr_b32 s6, s4, 8 ; GFX8-NEXT: s_and_b32 s6, s6, 0xff @@ -1989,212 +1986,207 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX8-NEXT: s_lshr_b32 s8, s4, 24 ; GFX8-NEXT: s_and_b32 s4, s4, 0xff ; GFX8-NEXT: s_lshl_b32 s6, s6, 8 +; GFX8-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX8-NEXT: s_or_b32 s4, s4, s6 ; GFX8-NEXT: s_and_b32 s6, s7, 0xff -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16 ; GFX8-NEXT: s_or_b32 s4, s4, s6 -; GFX8-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX8-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 -; GFX8-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 +; GFX8-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX8-NEXT: s_lshr_b32 s9, s5, 8 -; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX8-NEXT: s_and_b32 s5, s5, 0xff -; GFX8-NEXT: v_mul_lo_u32 v1, v2, v1 +; GFX8-NEXT: v_mul_lo_u32 v2, v0, v2 ; GFX8-NEXT: s_lshl_b32 s5, s5, 8 -; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0 -; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX8-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s4, v1 +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v1 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX8-NEXT: s_and_b32 s6, s9, 0xff -; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX8-NEXT: s_or_b32 s5, s8, s5 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0 +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v1 ; GFX8-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 ; GFX8-NEXT: s_or_b32 s5, s5, s6 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1 -; GFX8-NEXT: v_mul_hi_u32 v1, s5, v1 -; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s0 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v1 +; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0 ; GFX8-NEXT: s_lshr_b32 s0, s2, 1 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v3 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, v2, s0 -; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s5, v1 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v1 -; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s5, v0 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v0 +; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX8-NEXT: s_lshr_b32 s0, s3, 1 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s1 +; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s1 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, v2, s0 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX8-NEXT: v_mov_b32_e32 v2, 8 -; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_mov_b32_e32 v4, 16 -; GFX8-NEXT: v_or_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 -; GFX8-NEXT: v_or_b32_e32 v0, v3, v0 -; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v1 +; GFX8-NEXT: v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD -; GFX8-NEXT: v_readfirstlane_b32 s0, v0 -; GFX8-NEXT: v_readfirstlane_b32 s1, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +; GFX8-NEXT: v_readfirstlane_b32 s0, v1 +; GFX8-NEXT: v_readfirstlane_b32 s1, v0 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_fshl_v2i24: ; GFX9: ; %bb.0: -; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX9-NEXT: s_lshr_b32 s7, s0, 8 ; GFX9-NEXT: s_and_b32 s7, s7, 0xff ; GFX9-NEXT: s_lshr_b32 s9, s0, 16 -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: s_lshr_b32 s10, s0, 24 ; GFX9-NEXT: s_and_b32 s0, s0, 0xff ; GFX9-NEXT: s_lshl_b32 s7, s7, 8 ; GFX9-NEXT: s_or_b32 s0, s0, s7 ; GFX9-NEXT: s_and_b32 s7, s9, 0xff ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 -; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8 ; GFX9-NEXT: s_lshr_b32 s11, s1, 8 ; GFX9-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff -; GFX9-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX9-NEXT: s_or_b32 s0, s0, s7 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8 ; GFX9-NEXT: s_and_b32 s7, s11, 0xff +; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 ; GFX9-NEXT: s_or_b32 s1, s10, s1 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 ; GFX9-NEXT: s_or_b32 s1, s1, s7 ; GFX9-NEXT: s_lshr_b32 s7, s2, 8 -; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX9-NEXT: s_and_b32 s7, s7, 0xff ; GFX9-NEXT: s_lshr_b32 s9, s2, 16 ; GFX9-NEXT: s_lshr_b32 s10, s2, 24 ; GFX9-NEXT: s_and_b32 s2, s2, 0xff ; GFX9-NEXT: s_lshl_b32 s7, s7, 8 +; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX9-NEXT: s_or_b32 s2, s2, s7 ; GFX9-NEXT: s_and_b32 s7, s9, 0xff +; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 -; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 -; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 ; GFX9-NEXT: s_lshr_b32 s11, s3, 8 ; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 ; GFX9-NEXT: s_and_b32 s3, s3, 0xff -; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX9-NEXT: s_or_b32 s2, s2, s7 ; GFX9-NEXT: s_lshl_b32 s3, s3, 8 ; GFX9-NEXT: s_and_b32 s7, s11, 0xff +; GFX9-NEXT: v_mov_b32_e32 v2, 0xffffffe8 ; GFX9-NEXT: s_or_b32 s3, s10, s3 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 +; GFX9-NEXT: v_mul_lo_u32 v3, v1, v2 ; GFX9-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 ; GFX9-NEXT: s_or_b32 s3, s3, s7 ; GFX9-NEXT: s_lshr_b32 s7, s4, 8 -; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 +; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX9-NEXT: s_and_b32 s7, s7, 0xff -; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: s_lshr_b32 s9, s4, 16 ; GFX9-NEXT: s_lshr_b32 s10, s4, 24 ; GFX9-NEXT: s_and_b32 s4, s4, 0xff ; GFX9-NEXT: s_lshl_b32 s7, s7, 8 +; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX9-NEXT: s_or_b32 s4, s4, s7 ; GFX9-NEXT: s_and_b32 s7, s9, 0xff ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 ; GFX9-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 -; GFX9-NEXT: v_mul_lo_u32 v1, v2, v1 +; GFX9-NEXT: v_mul_lo_u32 v2, v0, v2 ; GFX9-NEXT: s_or_b32 s4, s4, s7 -; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX9-NEXT: v_mul_hi_u32 v1, s4, v1 ; GFX9-NEXT: s_lshr_b32 s11, s5, 8 ; GFX9-NEXT: s_and_b32 s5, s5, 0xff -; GFX9-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX9-NEXT: s_lshl_b32 s5, s5, 8 ; GFX9-NEXT: s_and_b32 s7, s11, 0xff ; GFX9-NEXT: s_or_b32 s5, s10, s5 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 -; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX9-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 ; GFX9-NEXT: s_or_b32 s5, s5, s7 -; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 -; GFX9-NEXT: v_mul_hi_u32 v1, s5, v1 -; GFX9-NEXT: v_sub_u32_e32 v0, s4, v0 -; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX9-NEXT: v_sub_u32_e32 v2, 23, v0 -; GFX9-NEXT: s_lshr_b32 s2, s2, 1 -; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GFX9-NEXT: v_lshrrev_b32_e64 v2, v2, s2 -; GFX9-NEXT: v_sub_u32_e32 v1, s5, v1 -; GFX9-NEXT: v_lshl_or_b32 v0, s0, v0, v2 -; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX9-NEXT: v_sub_u32_e32 v1, s4, v1 +; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v1 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v1 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX9-NEXT: v_sub_u32_e32 v2, 23, v1 -; GFX9-NEXT: s_lshr_b32 s0, s3, 1 +; GFX9-NEXT: s_lshr_b32 s2, s2, 1 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX9-NEXT: v_lshrrev_b32_e64 v2, v2, s2 +; GFX9-NEXT: v_sub_u32_e32 v0, s5, v0 +; GFX9-NEXT: v_lshl_or_b32 v1, s0, v1, v2 +; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_sub_u32_e32 v2, 23, v0 +; GFX9-NEXT: s_lshr_b32 s0, s3, 1 +; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 +; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX9-NEXT: v_lshrrev_b32_e64 v2, v2, s0 ; GFX9-NEXT: s_mov_b32 s6, 8 -; GFX9-NEXT: v_lshl_or_b32 v1, s1, v1, v2 +; GFX9-NEXT: v_lshl_or_b32 v0, s1, v0, v2 ; GFX9-NEXT: s_mov_b32 s8, 16 ; GFX9-NEXT: s_movk_i32 s0, 0xff -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 -; GFX9-NEXT: v_and_b32_e32 v3, 0xff, v1 -; GFX9-NEXT: v_and_or_b32 v2, v0, s0, v2 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_b32_e32 v3, 0xff, v0 +; GFX9-NEXT: v_and_or_b32 v2, v1, s0, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX9-NEXT: v_or3_b32 v0, v2, v0, v3 -; GFX9-NEXT: v_bfe_u32 v2, v1, 8, 8 -; GFX9-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX9-NEXT: v_lshl_or_b32 v1, v1, 8, v2 -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: v_or3_b32 v1, v2, v1, v3 +; GFX9-NEXT: v_bfe_u32 v2, v0, 8, 8 +; GFX9-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX9-NEXT: v_lshl_or_b32 v0, v0, 8, v2 +; GFX9-NEXT: v_readfirstlane_b32 s0, v1 +; GFX9-NEXT: v_readfirstlane_b32 s1, v0 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: s_fshl_v2i24: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 ; GFX10-NEXT: s_lshr_b32 s6, s0, 8 ; GFX10-NEXT: s_lshr_b32 s7, s0, 16 ; GFX10-NEXT: s_and_b32 s6, s6, 0xff -; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX10-NEXT: s_lshr_b32 s8, s0, 24 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX10-NEXT: s_and_b32 s0, s0, 0xff ; GFX10-NEXT: s_lshl_b32 s6, s6, 8 ; GFX10-NEXT: s_and_b32 s7, s7, 0xff @@ -2202,244 +2194,251 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX10-NEXT: s_and_b32 s6, 0xffff, s7 ; GFX10-NEXT: s_lshr_b32 s7, s4, 8 ; GFX10-NEXT: s_lshr_b32 s10, s4, 16 -; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX10-NEXT: s_and_b32 s7, s7, 0xff ; GFX10-NEXT: s_lshr_b32 s11, s4, 24 +; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 +; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX10-NEXT: s_and_b32 s4, s4, 0xff -; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX10-NEXT: s_lshl_b32 s7, s7, 8 ; GFX10-NEXT: s_lshr_b32 s12, s5, 8 +; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX10-NEXT: s_or_b32 s4, s4, s7 -; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0 -; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1 ; GFX10-NEXT: s_and_b32 s7, s10, 0xff ; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 +; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v1 +; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v0 ; GFX10-NEXT: s_and_b32 s7, 0xffff, s7 ; GFX10-NEXT: s_and_b32 s5, s5, 0xff ; GFX10-NEXT: s_lshl_b32 s7, s7, 16 ; GFX10-NEXT: s_lshl_b32 s5, s5, 8 -; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 -; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX10-NEXT: s_or_b32 s4, s4, s7 ; GFX10-NEXT: s_and_b32 s7, s12, 0xff +; GFX10-NEXT: v_mul_hi_u32 v2, v1, v2 +; GFX10-NEXT: v_mul_hi_u32 v3, v0, v3 ; GFX10-NEXT: s_or_b32 s5, s11, s5 ; GFX10-NEXT: s_and_b32 s7, 0xffff, s7 ; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX10-NEXT: s_lshl_b32 s7, s7, 16 -; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 -; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3 -; GFX10-NEXT: s_or_b32 s5, s5, s7 ; GFX10-NEXT: s_lshr_b32 s9, s1, 8 +; GFX10-NEXT: s_or_b32 s5, s5, s7 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v3 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff -; GFX10-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX10-NEXT: v_mul_hi_u32 v1, s5, v1 -; GFX10-NEXT: s_lshl_b32 s1, s1, 8 ; GFX10-NEXT: s_and_b32 s7, s9, 0xff +; GFX10-NEXT: s_lshl_b32 s1, s1, 8 +; GFX10-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX10-NEXT: v_mul_hi_u32 v0, s5, v0 ; GFX10-NEXT: s_or_b32 s1, s8, s1 ; GFX10-NEXT: s_lshr_b32 s8, s2, 8 ; GFX10-NEXT: s_lshr_b32 s9, s2, 16 ; GFX10-NEXT: s_and_b32 s8, s8, 0xff -; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 -; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX10-NEXT: s_lshr_b32 s10, s2, 24 ; GFX10-NEXT: s_and_b32 s2, s2, 0xff +; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 +; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 ; GFX10-NEXT: s_lshl_b32 s8, s8, 8 ; GFX10-NEXT: s_and_b32 s7, 0xffff, s7 ; GFX10-NEXT: s_or_b32 s2, s2, s8 ; GFX10-NEXT: s_and_b32 s0, 0xffff, s0 -; GFX10-NEXT: v_sub_nc_u32_e32 v0, s4, v0 -; GFX10-NEXT: v_sub_nc_u32_e32 v1, s5, v1 +; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX10-NEXT: s_lshl_b32 s6, s6, 16 +; GFX10-NEXT: v_sub_nc_u32_e32 v1, s4, v1 +; GFX10-NEXT: v_sub_nc_u32_e32 v0, s5, v0 ; GFX10-NEXT: s_lshr_b32 s4, s3, 8 ; GFX10-NEXT: s_and_b32 s5, s9, 0xff ; GFX10-NEXT: s_and_b32 s3, s3, 0xff -; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 ; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX10-NEXT: s_lshl_b32 s3, s3, 8 ; GFX10-NEXT: s_and_b32 s4, s4, 0xff -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 -; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 ; GFX10-NEXT: s_lshl_b32 s5, s5, 16 ; GFX10-NEXT: s_or_b32 s3, s10, s3 -; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 ; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 +; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 ; GFX10-NEXT: s_or_b32 s2, s2, s5 ; GFX10-NEXT: s_and_b32 s3, 0xffff, s3 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 ; GFX10-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-NEXT: s_lshr_b32 s2, s2, 1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 ; GFX10-NEXT: s_or_b32 s3, s3, s4 -; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v0 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo -; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GFX10-NEXT: s_lshl_b32 s6, s6, 16 +; GFX10-NEXT: s_lshr_b32 s2, s2, 1 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 -; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v1 +; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo ; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1 ; GFX10-NEXT: s_lshl_b32 s7, s7, 16 ; GFX10-NEXT: s_or_b32 s0, s0, s6 +; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 +; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v0 +; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX10-NEXT: s_or_b32 s1, s1, s7 ; GFX10-NEXT: v_lshrrev_b32_e64 v2, v2, s2 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 ; GFX10-NEXT: s_lshr_b32 s2, s3, 1 -; GFX10-NEXT: s_or_b32 s1, s1, s7 -; GFX10-NEXT: v_lshl_or_b32 v0, s0, v0, v2 +; GFX10-NEXT: v_lshl_or_b32 v1, s0, v1, v2 ; GFX10-NEXT: v_lshrrev_b32_e64 v3, v3, s2 ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 -; GFX10-NEXT: v_lshl_or_b32 v1, s1, v1, v3 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshl_or_b32 v0, s1, v0, v3 ; GFX10-NEXT: s_mov_b32 s0, 16 -; GFX10-NEXT: v_and_or_b32 v2, v0, 0xff, v2 -; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v1 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 -; GFX10-NEXT: v_bfe_u32 v4, v1, 8, 8 -; GFX10-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX10-NEXT: v_and_or_b32 v2, v1, 0xff, v2 +; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_bfe_u32 v4, v0, 8, 8 +; GFX10-NEXT: v_bfe_u32 v0, v0, 16, 8 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX10-NEXT: v_lshl_or_b32 v1, v1, 8, v4 -; GFX10-NEXT: v_or3_b32 v0, v2, v0, v3 -; GFX10-NEXT: v_readfirstlane_b32 s1, v1 -; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: v_lshl_or_b32 v0, v0, 8, v4 +; GFX10-NEXT: v_or3_b32 v1, v2, v1, v3 +; GFX10-NEXT: v_readfirstlane_b32 s1, v0 +; GFX10-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: s_fshl_v2i24: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 ; GFX11-NEXT: s_lshr_b32 s6, s0, 8 ; GFX11-NEXT: s_lshr_b32 s7, s0, 16 ; GFX11-NEXT: s_and_b32 s6, s6, 0xff -; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX11-NEXT: s_lshr_b32 s8, s0, 24 +; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX11-NEXT: s_and_b32 s0, s0, 0xff ; GFX11-NEXT: s_lshl_b32 s6, s6, 8 -; GFX11-NEXT: s_lshr_b32 s10, s4, 24 +; GFX11-NEXT: s_and_b32 s7, s7, 0xff ; GFX11-NEXT: s_or_b32 s0, s0, s6 -; GFX11-NEXT: s_and_b32 s6, s7, 0xff +; GFX11-NEXT: s_and_b32 s6, 0xffff, s7 ; GFX11-NEXT: s_and_b32 s0, 0xffff, s0 -; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_dual_mul_f32 v0, 0x4f7ffffe, v0 :: v_dual_mul_f32 v1, 0x4f7ffffe, v1 ; GFX11-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-NEXT: s_lshr_b32 s7, s4, 16 +; GFX11-NEXT: s_lshr_b32 s9, s4, 16 ; GFX11-NEXT: s_or_b32 s0, s0, s6 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 +; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX11-NEXT: s_lshr_b32 s6, s4, 8 -; GFX11-NEXT: s_and_b32 s4, s4, 0xff +; GFX11-NEXT: s_lshr_b32 s10, s4, 24 ; GFX11-NEXT: s_and_b32 s6, s6, 0xff -; GFX11-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0 -; GFX11-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1 +; GFX11-NEXT: s_and_b32 s4, s4, 0xff +; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX11-NEXT: s_lshl_b32 s6, s6, 8 -; GFX11-NEXT: s_and_b32 s7, s7, 0xff -; GFX11-NEXT: s_or_b32 s4, s4, s6 -; GFX11-NEXT: s_and_b32 s6, 0xffff, s7 ; GFX11-NEXT: s_lshr_b32 s11, s5, 8 +; GFX11-NEXT: s_or_b32 s4, s4, s6 +; GFX11-NEXT: s_and_b32 s6, s9, 0xff +; GFX11-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v0 +; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 ; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 -; GFX11-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX11-NEXT: s_lshl_b32 s6, s6, 16 ; GFX11-NEXT: s_and_b32 s5, s5, 0xff ; GFX11-NEXT: s_or_b32 s4, s4, s6 ; GFX11-NEXT: s_lshl_b32 s5, s5, 8 ; GFX11-NEXT: s_and_b32 s6, s11, 0xff +; GFX11-NEXT: v_mul_hi_u32 v3, v0, v3 ; GFX11-NEXT: s_or_b32 s5, s10, s5 ; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v2 -; GFX11-NEXT: v_mul_hi_u32 v2, v1, v3 ; GFX11-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX11-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-NEXT: s_lshr_b32 s9, s1, 8 -; GFX11-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX11-NEXT: s_lshr_b32 s7, s1, 8 ; GFX11-NEXT: s_or_b32 s5, s5, s6 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff -; GFX11-NEXT: s_and_b32 s7, s9, 0xff -; GFX11-NEXT: v_add_nc_u32_e32 v1, v1, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v3 +; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX11-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-NEXT: s_and_b32 s6, 0xffff, s7 +; GFX11-NEXT: s_and_b32 s6, s7, 0xff ; GFX11-NEXT: s_lshr_b32 s7, s2, 8 -; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24 -; GFX11-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX11-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX11-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v1 ; GFX11-NEXT: s_or_b32 s1, s8, s1 ; GFX11-NEXT: s_lshr_b32 s8, s2, 16 ; GFX11-NEXT: s_and_b32 s7, s7, 0xff -; GFX11-NEXT: s_lshr_b32 s9, s3, 8 -; GFX11-NEXT: s_lshl_b32 s7, s7, 8 -; GFX11-NEXT: s_and_b32 s3, s3, 0xff -; GFX11-NEXT: v_sub_nc_u32_e32 v0, s4, v0 -; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX11-NEXT: s_lshr_b32 s4, s2, 24 +; GFX11-NEXT: s_lshr_b32 s9, s2, 24 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 -; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: s_lshl_b32 s7, s7, 8 +; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX11-NEXT: v_mul_hi_u32 v2, v1, v2 ; GFX11-NEXT: s_or_b32 s2, s2, s7 -; GFX11-NEXT: s_or_b32 s3, s4, s3 -; GFX11-NEXT: v_sub_nc_u32_e32 v1, s5, v1 +; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_sub_nc_u32_e32 v0, s5, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, v1, v2 ; GFX11-NEXT: s_and_b32 s5, s8, 0xff -; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: s_and_b32 s5, 0xffff, s5 -; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 -; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 -; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: v_mul_hi_u32 v1, s4, v1 ; GFX11-NEXT: s_lshl_b32 s5, s5, 16 -; GFX11-NEXT: s_and_b32 s4, s9, 0xff ; GFX11-NEXT: s_or_b32 s2, s2, s5 -; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo -; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_lshr_b32 s2, s2, 1 -; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 -; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1 +; GFX11-NEXT: s_lshr_b32 s4, s3, 8 +; GFX11-NEXT: s_and_b32 s3, s3, 0xff +; GFX11-NEXT: s_and_b32 s4, s4, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s3, 8 ; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 +; GFX11-NEXT: s_or_b32 s3, s9, s3 +; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 +; GFX11-NEXT: s_and_b32 s3, 0xffff, s3 +; GFX11-NEXT: s_lshl_b32 s4, s4, 16 +; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: s_or_b32 s3, s3, s4 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-NEXT: s_lshr_b32 s3, s3, 1 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 -; GFX11-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v0 -; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v2 :: v_dual_and_b32 v0, 0xffffff, v0 -; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v1 +; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_sub_nc_u32_e32 v2, 23, v1 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1 -; GFX11-NEXT: v_lshrrev_b32_e64 v2, v2, s2 -; GFX11-NEXT: s_and_b32 s2, 0xffff, s3 -; GFX11-NEXT: s_lshl_b32 s3, s4, 16 +; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3 -; GFX11-NEXT: s_or_b32 s2, s2, s3 -; GFX11-NEXT: v_lshl_or_b32 v0, s0, v0, v2 -; GFX11-NEXT: s_lshr_b32 s0, s2, 1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: v_lshrrev_b32_e64 v2, v3, s0 -; GFX11-NEXT: s_or_b32 s0, s1, s6 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_lshrrev_b32_e64 v2, v2, s2 +; GFX11-NEXT: s_lshl_b32 s2, s6, 16 +; GFX11-NEXT: v_lshrrev_b32_e64 v3, v3, s3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_lshl_or_b32 v1, s0, v1, v2 +; GFX11-NEXT: s_or_b32 s0, s1, s2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: v_lshl_or_b32 v0, s0, v0, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_lshlrev_b32_e32 v2, 8, v3 -; GFX11-NEXT: v_bfe_u32 v3, v0, 16, 8 -; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v1 +; GFX11-NEXT: v_bfe_u32 v2, v1, 8, 8 +; GFX11-NEXT: v_bfe_u32 v3, v1, 16, 8 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_and_or_b32 v0, v0, 0xff, v2 -; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_lshlrev_b32_e32 v3, 24, v4 -; GFX11-NEXT: v_bfe_u32 v4, v1, 8, 8 -; GFX11-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX11-NEXT: v_or3_b32 v0, v0, v2, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX11-NEXT: v_and_or_b32 v1, v1, 0xff, v2 +; GFX11-NEXT: v_bfe_u32 v2, v0, 8, 8 +; GFX11-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or3_b32 v1, v1, v3, v4 +; GFX11-NEXT: v_lshl_or_b32 v0, v0, 8, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_lshl_or_b32 v1, v1, 8, v4 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s0, v1 +; GFX11-NEXT: v_readfirstlane_b32 s1, v0 ; GFX11-NEXT: ; return to shader part epilog %lhs = bitcast i48 %lhs.arg to <2 x i24> %rhs = bitcast i48 %rhs.arg to <2 x i24> @@ -2455,37 +2454,35 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX6-NEXT: v_mov_b32_e32 v7, 0xffffffe8 -; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v9, v9 -; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX6-NEXT: v_mov_b32_e32 v8, 0xffffffe8 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5 +; GFX6-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX6-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 23 -; GFX6-NEXT: v_mul_lo_u32 v8, v6, v7 +; GFX6-NEXT: v_mul_lo_u32 v9, v7, v8 +; GFX6-NEXT: v_mul_lo_u32 v8, v6, v8 +; GFX6-NEXT: v_mul_hi_u32 v9, v7, v9 ; GFX6-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; GFX6-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX6-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v9 -; GFX6-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX6-NEXT: v_mul_lo_u32 v7, v8, v7 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 -; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 +; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GFX6-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX6-NEXT: v_mul_lo_u32 v7, v7, 24 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v7 +; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX6-NEXT: v_mul_hi_u32 v7, v8, v7 -; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX6-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GFX6-NEXT: v_mul_hi_u32 v7, v5, v7 -; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 23, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6 +; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 23, v4 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 +; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v4, v0 -; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v6 -; GFX6-NEXT: v_mul_lo_u32 v6, v7, 24 +; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v7 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6 @@ -2509,37 +2506,35 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX8-NEXT: v_mov_b32_e32 v7, 0xffffffe8 -; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v9, v9 -; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX8-NEXT: v_mov_b32_e32 v8, 0xffffffe8 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v5 +; GFX8-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX8-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX8-NEXT: v_bfe_u32 v2, v2, 1, 23 -; GFX8-NEXT: v_mul_lo_u32 v8, v6, v7 +; GFX8-NEXT: v_mul_lo_u32 v9, v7, v8 +; GFX8-NEXT: v_mul_lo_u32 v8, v6, v8 +; GFX8-NEXT: v_mul_hi_u32 v9, v7, v9 ; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 -; GFX8-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX8-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v9 -; GFX8-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX8-NEXT: v_mul_lo_u32 v7, v8, v7 -; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v6 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 +; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v9 +; GFX8-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX8-NEXT: v_mul_lo_u32 v7, v7, 24 +; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v7 +; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX8-NEXT: v_mul_hi_u32 v7, v8, v7 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v8, v7 -; GFX8-NEXT: v_mul_hi_u32 v7, v5, v7 -; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 23, v4 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 +; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6 +; GFX8-NEXT: v_sub_u32_e32 v7, vcc, 23, v4 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 +; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v4, v0 -; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v6 -; GFX8-NEXT: v_mul_lo_u32 v6, v7, 24 +; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v7 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6 @@ -2563,37 +2558,35 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8 -; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v9, v9 -; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX9-NEXT: v_mov_b32_e32 v8, 0xffffffe8 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 -; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v9 -; GFX9-NEXT: v_mul_lo_u32 v8, v6, v7 ; GFX9-NEXT: v_and_b32_e32 v5, 0xffffff, v5 +; GFX9-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX9-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX9-NEXT: v_bfe_u32 v2, v2, 1, 23 -; GFX9-NEXT: v_mul_lo_u32 v7, v9, v7 -; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX9-NEXT: v_mul_lo_u32 v9, v7, v8 ; GFX9-NEXT: v_bfe_u32 v3, v3, 1, 23 -; GFX9-NEXT: v_mul_hi_u32 v7, v9, v7 +; GFX9-NEXT: v_mul_lo_u32 v8, v6, v8 +; GFX9-NEXT: v_mul_hi_u32 v9, v7, v9 +; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX9-NEXT: v_add_u32_e32 v7, v7, v9 +; GFX9-NEXT: v_mul_hi_u32 v7, v4, v7 ; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 -; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX9-NEXT: v_add_u32_e32 v7, v9, v7 +; GFX9-NEXT: v_mul_hi_u32 v6, v5, v6 +; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 -; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 +; GFX9-NEXT: v_sub_u32_e32 v4, v4, v7 +; GFX9-NEXT: v_subrev_u32_e32 v7, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX9-NEXT: v_subrev_u32_e32 v7, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX9-NEXT: v_mul_hi_u32 v6, v5, v7 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc ; GFX9-NEXT: v_sub_u32_e32 v7, 23, v4 ; GFX9-NEXT: v_and_b32_e32 v7, 0xffffff, v7 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v7, v2 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, v4, v2 ; GFX9-NEXT: v_sub_u32_e32 v2, v5, v6 @@ -2614,29 +2607,27 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 -; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffffff, v5 ; GFX10-NEXT: v_bfe_u32 v2, v2, 1, 23 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v7, v7 ; GFX10-NEXT: v_bfe_u32 v3, v3, 1, 23 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GFX10-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 ; GFX10-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; GFX10-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 -; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GFX10-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6 -; GFX10-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7 -; GFX10-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX10-NEXT: v_mul_hi_u32 v9, v7, v9 -; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v8 -; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v9 -; GFX10-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX10-NEXT: v_mul_hi_u32 v7, v5, v7 -; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX10-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v7 +; GFX10-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v6 +; GFX10-NEXT: v_mul_hi_u32 v8, v7, v8 +; GFX10-NEXT: v_mul_hi_u32 v9, v6, v9 +; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v8 +; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v9 +; GFX10-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX10-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24 -; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v6 -; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v7 +; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v7 +; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v6 ; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 ; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 @@ -2665,64 +2656,63 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 -; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 ; GFX11-NEXT: v_bfe_u32 v2, v2, 1, 23 ; GFX11-NEXT: v_bfe_u32 v3, v3, 1, 23 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX11-NEXT: v_rcp_iflag_f32_e32 v7, v7 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_dual_mul_f32 v6, 0x4f7ffffe, v6 :: v_dual_mul_f32 v7, 0x4f7ffffe, v7 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX11-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; GFX11-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v6 +; GFX11-NEXT: v_mul_hi_u32 v9, v6, v9 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_add_nc_u32_e32 v6, v6, v9 ; GFX11-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX11-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6 -; GFX11-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX11-NEXT: v_mul_hi_u32 v9, v7, v9 +; GFX11-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v7 +; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_add_nc_u32_e32 v6, v6, v8 -; GFX11-NEXT: v_add_nc_u32_e32 v7, v7, v9 +; GFX11-NEXT: v_mul_hi_u32 v8, v7, v8 +; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_add_nc_u32_e32 v7, v7, v8 +; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4 +; GFX11-NEXT: v_mul_hi_u32 v7, v4, v7 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_mul_hi_u32 v7, v5, v7 ; GFX11-NEXT: v_mul_lo_u32 v7, v7, 24 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v7 +; GFX11-NEXT: v_sub_nc_u32_e32 v4, v4, v7 ; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 -; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_sub_nc_u32_e32 v4, v4, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 ; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_sub_nc_u32_e32 v6, 23, v4 -; GFX11-NEXT: v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6 +; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX11-NEXT: v_sub_nc_u32_e32 v7, 23, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_lshrrev_b32_e32 v2, v6, v2 ; GFX11-NEXT: v_and_b32_e32 v7, 0xffffff, v7 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_lshrrev_b32_e32 v2, v6, v2 +; GFX11-NEXT: v_lshrrev_b32_e32 v3, v7, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_lshl_or_b32 v0, v0, v4, v2 -; GFX11-NEXT: v_lshrrev_b32_e32 v3, v7, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_lshl_or_b32 v1, v1, v5, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i24> @llvm.fshl.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll index 88fa7a8406475f..3098ce672bd1cf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -1371,48 +1371,48 @@ define i32 @v_fshr_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) { ; GFX10-LABEL: v_fshr_v4i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_not_b32_e32 v5, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX10-NEXT: v_not_b32_e32 v8, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v2 -; GFX10-NEXT: v_not_b32_e32 v12, v7 -; GFX10-NEXT: v_lshlrev_b16 v3, 1, v3 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 8, v1 -; GFX10-NEXT: v_and_b32_e32 v12, 7, v12 -; GFX10-NEXT: v_and_b32_e32 v8, 7, v8 +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v0 +; GFX10-NEXT: v_and_b32_e32 v5, 7, v5 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 -; GFX10-NEXT: v_not_b32_e32 v13, v10 +; GFX10-NEXT: v_not_b32_e32 v10, v7 +; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v1 +; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v2 +; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v2 +; GFX10-NEXT: v_lshlrev_b16 v0, v5, v0 +; GFX10-NEXT: v_and_b32_e32 v5, 7, v10 +; GFX10-NEXT: v_lshlrev_b16 v3, 1, v3 +; GFX10-NEXT: v_not_b32_e32 v13, v11 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_lshlrev_b16 v3, v12, v3 -; GFX10-NEXT: v_not_b32_e32 v12, v11 ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v1 -; GFX10-NEXT: v_lshlrev_b16 v0, v8, v0 -; GFX10-NEXT: v_and_b32_e32 v8, 0xff, v1 +; GFX10-NEXT: v_and_b32_e32 v10, 0xff, v1 +; GFX10-NEXT: v_lshlrev_b16 v3, v5, v3 +; GFX10-NEXT: v_and_b32_e32 v5, 0xff, v8 +; GFX10-NEXT: v_not_b32_e32 v8, v12 ; GFX10-NEXT: v_and_b32_e32 v7, 7, v7 -; GFX10-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX10-NEXT: v_and_b32_e32 v10, 7, v10 +; GFX10-NEXT: v_and_b32_e32 v11, 7, v11 ; GFX10-NEXT: v_and_b32_e32 v13, 7, v13 ; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4 ; GFX10-NEXT: v_and_b32_sdwa v1, v1, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_and_b32_e32 v8, 7, v8 +; GFX10-NEXT: v_lshlrev_b16 v6, 1, v6 ; GFX10-NEXT: v_and_b32_e32 v12, 7, v12 -; GFX10-NEXT: v_lshlrev_b16 v5, 1, v5 -; GFX10-NEXT: v_and_b32_e32 v11, 7, v11 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 -; GFX10-NEXT: v_lshrrev_b16 v6, v7, v6 +; GFX10-NEXT: v_lshrrev_b16 v5, v7, v5 ; GFX10-NEXT: v_lshlrev_b16 v4, v13, v4 -; GFX10-NEXT: v_lshrrev_b16 v1, v10, v1 -; GFX10-NEXT: v_lshlrev_b16 v5, v12, v5 -; GFX10-NEXT: v_lshrrev_b16 v7, v11, v9 -; GFX10-NEXT: v_lshrrev_b16 v2, v2, v8 -; GFX10-NEXT: v_or_b32_e32 v3, v3, v6 -; GFX10-NEXT: v_mov_b32_e32 v6, 8 +; GFX10-NEXT: v_lshrrev_b16 v1, v11, v1 +; GFX10-NEXT: v_lshlrev_b16 v6, v8, v6 +; GFX10-NEXT: v_lshrrev_b16 v7, v12, v9 +; GFX10-NEXT: v_lshrrev_b16 v2, v2, v10 +; GFX10-NEXT: v_or_b32_e32 v3, v3, v5 +; GFX10-NEXT: v_mov_b32_e32 v5, 8 ; GFX10-NEXT: v_or_b32_e32 v1, v4, v1 -; GFX10-NEXT: v_or_b32_e32 v4, v5, v7 +; GFX10-NEXT: v_or_b32_e32 v4, v6, v7 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v4 ; GFX10-NEXT: v_and_or_b32 v0, v0, 0xff, v2 @@ -1820,14 +1820,13 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX6-NEXT: s_lshr_b32 s6, s0, 16 ; GFX6-NEXT: s_lshr_b32 s7, s0, 24 ; GFX6-NEXT: s_lshr_b32 s8, s1, 8 -; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: s_and_b32 s9, s0, 0xff ; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff +; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX6-NEXT: s_lshl_b32 s0, s0, 8 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 -; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8 +; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: s_or_b32 s0, s9, s0 ; GFX6-NEXT: s_or_b32 s1, s7, s1 ; GFX6-NEXT: s_and_b32 s7, s8, 0xff @@ -1835,19 +1834,19 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX6-NEXT: s_lshr_b32 s9, s2, 24 ; GFX6-NEXT: s_and_b32 s11, s2, 0xff ; GFX6-NEXT: s_bfe_u32 s2, s2, 0x80008 -; GFX6-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX6-NEXT: s_lshl_b32 s2, s2, 8 ; GFX6-NEXT: s_and_b32 s8, s8, 0xff ; GFX6-NEXT: s_or_b32 s2, s11, s2 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 +; GFX6-NEXT: v_mov_b32_e32 v2, 0xffffffe8 ; GFX6-NEXT: s_lshr_b32 s10, s3, 8 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16 ; GFX6-NEXT: s_and_b32 s3, s3, 0xff +; GFX6-NEXT: v_mul_lo_u32 v3, v1, v2 ; GFX6-NEXT: s_or_b32 s2, s2, s8 ; GFX6-NEXT: s_lshl_b32 s3, s3, 8 ; GFX6-NEXT: s_and_b32 s8, s10, 0xff -; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX6-NEXT: s_or_b32 s3, s9, s3 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s3 @@ -1857,103 +1856,100 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX6-NEXT: s_lshr_b32 s9, s4, 24 ; GFX6-NEXT: s_and_b32 s11, s4, 0xff ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x80008 +; GFX6-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX6-NEXT: s_lshl_b32 s4, s4, 8 ; GFX6-NEXT: s_and_b32 s8, s8, 0xff -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 ; GFX6-NEXT: s_or_b32 s4, s11, s4 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16 ; GFX6-NEXT: s_or_b32 s4, s4, s8 -; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 -; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GFX6-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: s_lshr_b32 s10, s5, 8 -; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX6-NEXT: s_and_b32 s5, s5, 0xff -; GFX6-NEXT: v_mul_lo_u32 v1, v2, v1 +; GFX6-NEXT: v_mul_lo_u32 v2, v0, v2 ; GFX6-NEXT: s_lshl_b32 s5, s5, 8 -; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX6-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s4, v1 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX6-NEXT: s_and_b32 s8, s10, 0xff -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX6-NEXT: s_or_b32 s5, s9, s5 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v1 ; GFX6-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 ; GFX6-NEXT: s_or_b32 s5, s5, s8 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0 ; GFX6-NEXT: s_and_b32 s6, s6, 0xff ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v0 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v1 ; GFX6-NEXT: s_lshl_b32 s4, s6, 17 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1 ; GFX6-NEXT: s_or_b32 s0, s4, s0 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v3 -; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 ; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 -; GFX6-NEXT: v_lshr_b32_e32 v0, s2, v0 -; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v1 -; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX6-NEXT: v_lshr_b32_e32 v1, s2, v1 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s5, v0 +; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX6-NEXT: s_and_b32 s7, 0xffff, s7 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v0 ; GFX6-NEXT: s_lshl_b32 s0, s7, 17 ; GFX6-NEXT: s_lshl_b32 s1, s1, 1 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 -; GFX6-NEXT: v_lshr_b32_e32 v1, s3, v1 -; GFX6-NEXT: v_bfe_u32 v3, v0, 8, 8 -; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8 -; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_lshr_b32_e32 v0, s3, v0 +; GFX6-NEXT: v_bfe_u32 v3, v1, 8, 8 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX6-NEXT: v_bfe_u32 v2, v1, 8, 8 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 ; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: v_readfirstlane_b32 s1, v1 +; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_bfe_u32 v2, v0, 8, 8 +; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX6-NEXT: v_readfirstlane_b32 s0, v1 +; GFX6-NEXT: v_readfirstlane_b32 s1, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_fshr_v2i24: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX8-NEXT: s_lshr_b32 s9, s1, 8 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff +; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX8-NEXT: s_lshr_b32 s6, s0, 8 ; GFX8-NEXT: s_lshr_b32 s8, s0, 24 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8 -; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX8-NEXT: s_and_b32 s6, s6, 0xff ; GFX8-NEXT: s_or_b32 s1, s8, s1 ; GFX8-NEXT: s_lshr_b32 s8, s2, 8 -; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX8-NEXT: s_lshr_b32 s7, s0, 16 ; GFX8-NEXT: s_and_b32 s0, s0, 0xff ; GFX8-NEXT: s_lshl_b32 s6, s6, 8 @@ -1965,11 +1961,11 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX8-NEXT: s_lshr_b32 s10, s2, 24 ; GFX8-NEXT: s_and_b32 s2, s2, 0xff ; GFX8-NEXT: s_lshl_b32 s8, s8, 8 +; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX8-NEXT: s_or_b32 s2, s2, s8 ; GFX8-NEXT: s_and_b32 s8, s9, 0xff -; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8 +; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX8-NEXT: s_lshr_b32 s11, s3, 8 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16 @@ -1977,11 +1973,12 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX8-NEXT: s_or_b32 s2, s2, s8 ; GFX8-NEXT: s_lshl_b32 s3, s3, 8 ; GFX8-NEXT: s_and_b32 s8, s11, 0xff +; GFX8-NEXT: v_mov_b32_e32 v2, 0xffffffe8 ; GFX8-NEXT: s_or_b32 s3, s10, s3 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8 +; GFX8-NEXT: v_mul_lo_u32 v3, v1, v2 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16 -; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX8-NEXT: s_or_b32 s3, s3, s8 ; GFX8-NEXT: s_lshr_b32 s8, s4, 8 ; GFX8-NEXT: s_and_b32 s8, s8, 0xff @@ -1989,101 +1986,95 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX8-NEXT: s_lshr_b32 s10, s4, 24 ; GFX8-NEXT: s_and_b32 s4, s4, 0xff ; GFX8-NEXT: s_lshl_b32 s8, s8, 8 +; GFX8-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX8-NEXT: s_or_b32 s4, s4, s8 ; GFX8-NEXT: s_and_b32 s8, s9, 0xff -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16 ; GFX8-NEXT: s_or_b32 s4, s4, s8 -; GFX8-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX8-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 -; GFX8-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 +; GFX8-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX8-NEXT: s_lshr_b32 s11, s5, 8 -; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX8-NEXT: s_and_b32 s5, s5, 0xff -; GFX8-NEXT: v_mul_lo_u32 v1, v2, v1 +; GFX8-NEXT: v_mul_lo_u32 v2, v0, v2 ; GFX8-NEXT: s_lshl_b32 s5, s5, 8 -; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0 -; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX8-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s4, v1 +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v1 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX8-NEXT: s_and_b32 s8, s11, 0xff -; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX8-NEXT: s_or_b32 s5, s10, s5 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0 +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v1 ; GFX8-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 ; GFX8-NEXT: s_or_b32 s5, s5, s8 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1 -; GFX8-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_mul_hi_u32 v0, s5, v0 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v0 -; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24 +; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v1 +; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 ; GFX8-NEXT: s_lshl_b32 s4, s6, 17 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1 ; GFX8-NEXT: s_or_b32 s0, s4, s0 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v3 -; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 ; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s2 -; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s5, v1 -; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s2 +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s5, v0 +; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX8-NEXT: s_and_b32 s7, 0xffff, s7 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v0 ; GFX8-NEXT: s_lshl_b32 s0, s7, 17 ; GFX8-NEXT: s_lshl_b32 s1, s1, 1 ; GFX8-NEXT: s_or_b32 s0, s0, s1 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s3 -; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s3 +; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX8-NEXT: v_mov_b32_e32 v2, 8 -; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_mov_b32_e32 v4, 16 -; GFX8-NEXT: v_or_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 -; GFX8-NEXT: v_or_b32_e32 v0, v3, v0 -; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v1 +; GFX8-NEXT: v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD -; GFX8-NEXT: v_readfirstlane_b32 s0, v0 -; GFX8-NEXT: v_readfirstlane_b32 s1, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +; GFX8-NEXT: v_readfirstlane_b32 s0, v1 +; GFX8-NEXT: v_readfirstlane_b32 s1, v0 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_fshr_v2i24: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8 ; GFX9-NEXT: s_lshr_b32 s11, s1, 8 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX9-NEXT: s_lshr_b32 s7, s0, 8 ; GFX9-NEXT: s_lshr_b32 s10, s0, 24 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8 -; GFX9-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX9-NEXT: s_and_b32 s7, s7, 0xff ; GFX9-NEXT: s_or_b32 s1, s10, s1 ; GFX9-NEXT: s_lshr_b32 s10, s2, 8 -; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX9-NEXT: s_lshr_b32 s9, s0, 16 ; GFX9-NEXT: s_and_b32 s0, s0, 0xff ; GFX9-NEXT: s_lshl_b32 s7, s7, 8 @@ -2095,12 +2086,11 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX9-NEXT: s_lshr_b32 s12, s2, 24 ; GFX9-NEXT: s_and_b32 s2, s2, 0xff ; GFX9-NEXT: s_lshl_b32 s10, s10, 8 +; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX9-NEXT: s_or_b32 s2, s2, s10 ; GFX9-NEXT: s_and_b32 s10, s11, 0xff -; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 -; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 +; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX9-NEXT: s_and_b32 s10, 0xffff, s10 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX9-NEXT: s_lshr_b32 s13, s3, 8 ; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX9-NEXT: s_lshl_b32 s10, s10, 16 @@ -2108,101 +2098,103 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX9-NEXT: s_or_b32 s2, s2, s10 ; GFX9-NEXT: s_lshl_b32 s3, s3, 8 ; GFX9-NEXT: s_and_b32 s10, s13, 0xff +; GFX9-NEXT: v_mov_b32_e32 v2, 0xffffffe8 ; GFX9-NEXT: s_or_b32 s3, s12, s3 ; GFX9-NEXT: s_and_b32 s10, 0xffff, s10 +; GFX9-NEXT: v_mul_lo_u32 v3, v1, v2 ; GFX9-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX9-NEXT: s_lshl_b32 s10, s10, 16 -; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 +; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX9-NEXT: s_or_b32 s3, s3, s10 ; GFX9-NEXT: s_lshr_b32 s10, s4, 8 -; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: s_and_b32 s10, s10, 0xff ; GFX9-NEXT: s_lshr_b32 s11, s4, 16 ; GFX9-NEXT: s_lshr_b32 s12, s4, 24 ; GFX9-NEXT: s_and_b32 s4, s4, 0xff ; GFX9-NEXT: s_lshl_b32 s10, s10, 8 +; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX9-NEXT: s_or_b32 s4, s4, s10 ; GFX9-NEXT: s_and_b32 s10, s11, 0xff ; GFX9-NEXT: s_and_b32 s10, 0xffff, s10 -; GFX9-NEXT: v_mul_lo_u32 v1, v2, v1 +; GFX9-NEXT: v_mul_lo_u32 v2, v0, v2 ; GFX9-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX9-NEXT: s_lshl_b32 s10, s10, 16 ; GFX9-NEXT: s_or_b32 s4, s4, s10 -; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX9-NEXT: v_mul_hi_u32 v1, s4, v1 ; GFX9-NEXT: s_lshr_b32 s13, s5, 8 ; GFX9-NEXT: s_and_b32 s5, s5, 0xff -; GFX9-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX9-NEXT: s_lshl_b32 s5, s5, 8 ; GFX9-NEXT: s_and_b32 s10, s13, 0xff ; GFX9-NEXT: s_or_b32 s5, s12, s5 ; GFX9-NEXT: s_and_b32 s10, 0xffff, s10 ; GFX9-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX9-NEXT: s_lshl_b32 s10, s10, 16 -; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 -; GFX9-NEXT: s_or_b32 s5, s5, s10 -; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 -; GFX9-NEXT: v_mul_hi_u32 v1, s5, v1 -; GFX9-NEXT: v_sub_u32_e32 v0, s4, v0 -; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX9-NEXT: s_or_b32 s5, s5, s10 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX9-NEXT: v_sub_u32_e32 v1, s4, v1 +; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 ; GFX9-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX9-NEXT: v_sub_u32_e32 v3, 23, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_sub_u32_e32 v3, 23, v1 ; GFX9-NEXT: s_lshl_b32 s4, s7, 17 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1 -; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1 ; GFX9-NEXT: s_or_b32 s0, s4, s0 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v3 -; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s2 -; GFX9-NEXT: v_sub_u32_e32 v1, s5, v1 -; GFX9-NEXT: v_lshl_or_b32 v0, s0, v2, v0 -; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX9-NEXT: v_lshrrev_b32_e64 v1, v1, s2 +; GFX9-NEXT: v_sub_u32_e32 v0, s5, v0 +; GFX9-NEXT: v_lshl_or_b32 v1, s0, v2, v1 +; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX9-NEXT: s_and_b32 s9, 0xffff, s9 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX9-NEXT: v_sub_u32_e32 v2, 23, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_sub_u32_e32 v2, 23, v0 ; GFX9-NEXT: s_lshl_b32 s0, s9, 17 ; GFX9-NEXT: s_lshl_b32 s1, s1, 1 -; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX9-NEXT: s_or_b32 s0, s0, s1 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX9-NEXT: v_lshrrev_b32_e64 v1, v1, s3 +; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s3 ; GFX9-NEXT: s_mov_b32 s6, 8 -; GFX9-NEXT: v_lshl_or_b32 v1, s0, v2, v1 +; GFX9-NEXT: v_lshl_or_b32 v0, s0, v2, v0 ; GFX9-NEXT: s_mov_b32 s8, 16 ; GFX9-NEXT: s_movk_i32 s0, 0xff -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 -; GFX9-NEXT: v_and_b32_e32 v3, 0xff, v1 -; GFX9-NEXT: v_and_or_b32 v2, v0, s0, v2 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_b32_e32 v3, 0xff, v0 +; GFX9-NEXT: v_and_or_b32 v2, v1, s0, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX9-NEXT: v_or3_b32 v0, v2, v0, v3 -; GFX9-NEXT: v_bfe_u32 v2, v1, 8, 8 -; GFX9-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX9-NEXT: v_lshl_or_b32 v1, v1, 8, v2 -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: v_or3_b32 v1, v2, v1, v3 +; GFX9-NEXT: v_bfe_u32 v2, v0, 8, 8 +; GFX9-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX9-NEXT: v_lshl_or_b32 v0, v0, 8, v2 +; GFX9-NEXT: v_readfirstlane_b32 s0, v1 +; GFX9-NEXT: v_readfirstlane_b32 s1, v0 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: s_fshr_v2i24: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 ; GFX10-NEXT: s_lshr_b32 s9, s1, 8 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff ; GFX10-NEXT: s_lshr_b32 s6, s0, 8 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX10-NEXT: s_lshr_b32 s8, s0, 24 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX10-NEXT: s_lshl_b32 s1, s1, 8 ; GFX10-NEXT: s_and_b32 s6, s6, 0xff ; GFX10-NEXT: s_or_b32 s1, s8, s1 @@ -2210,123 +2202,121 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX10-NEXT: s_lshr_b32 s7, s0, 16 ; GFX10-NEXT: s_and_b32 s0, s0, 0xff ; GFX10-NEXT: s_lshl_b32 s6, s6, 8 -; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX10-NEXT: s_and_b32 s8, s8, 0xff ; GFX10-NEXT: s_or_b32 s0, s0, s6 +; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 +; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX10-NEXT: s_and_b32 s6, s7, 0xff -; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX10-NEXT: s_and_b32 s7, s9, 0xff ; GFX10-NEXT: s_lshr_b32 s9, s4, 16 +; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX10-NEXT: s_lshr_b32 s10, s4, 24 -; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0 -; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1 ; GFX10-NEXT: s_and_b32 s4, s4, 0xff ; GFX10-NEXT: s_lshl_b32 s8, s8, 8 -; GFX10-NEXT: s_lshr_b32 s11, s5, 8 +; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v1 +; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v0 ; GFX10-NEXT: s_or_b32 s4, s4, s8 ; GFX10-NEXT: s_and_b32 s8, s9, 0xff -; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 -; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 -; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3 +; GFX10-NEXT: s_lshr_b32 s11, s5, 8 ; GFX10-NEXT: s_and_b32 s8, 0xffff, s8 +; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX10-NEXT: s_and_b32 s5, s5, 0xff +; GFX10-NEXT: v_mul_hi_u32 v2, v1, v2 +; GFX10-NEXT: v_mul_hi_u32 v3, v0, v3 ; GFX10-NEXT: s_lshl_b32 s8, s8, 16 ; GFX10-NEXT: s_lshl_b32 s5, s5, 8 ; GFX10-NEXT: s_or_b32 s4, s4, s8 ; GFX10-NEXT: s_and_b32 s8, s11, 0xff -; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 ; GFX10-NEXT: s_or_b32 s5, s10, s5 ; GFX10-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v3 ; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 -; GFX10-NEXT: v_mul_hi_u32 v0, s4, v0 ; GFX10-NEXT: s_lshl_b32 s8, s8, 16 ; GFX10-NEXT: s_lshr_b32 s9, s2, 8 +; GFX10-NEXT: v_mul_hi_u32 v1, s4, v1 ; GFX10-NEXT: s_or_b32 s5, s5, s8 ; GFX10-NEXT: s_lshr_b32 s8, s2, 16 -; GFX10-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX10-NEXT: v_mul_hi_u32 v0, s5, v0 ; GFX10-NEXT: s_and_b32 s9, s9, 0xff ; GFX10-NEXT: s_lshr_b32 s10, s2, 24 -; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 ; GFX10-NEXT: s_lshr_b32 s11, s3, 8 ; GFX10-NEXT: s_and_b32 s2, s2, 0xff +; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX10-NEXT: s_lshl_b32 s9, s9, 8 ; GFX10-NEXT: s_and_b32 s8, s8, 0xff -; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 +; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 ; GFX10-NEXT: s_and_b32 s3, s3, 0xff ; GFX10-NEXT: s_or_b32 s2, s2, s9 -; GFX10-NEXT: v_sub_nc_u32_e32 v0, s4, v0 -; GFX10-NEXT: s_and_b32 s4, 0xffff, s8 ; GFX10-NEXT: s_lshl_b32 s3, s3, 8 ; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 -; GFX10-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 -; GFX10-NEXT: v_sub_nc_u32_e32 v1, s5, v1 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 -; GFX10-NEXT: s_and_b32 s5, s11, 0xff +; GFX10-NEXT: v_sub_nc_u32_e32 v1, s4, v1 +; GFX10-NEXT: s_and_b32 s4, 0xffff, s8 ; GFX10-NEXT: s_or_b32 s3, s10, s3 -; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 -; GFX10-NEXT: s_and_b32 s3, 0xffff, s3 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX10-NEXT: v_sub_nc_u32_e32 v0, s5, v0 +; GFX10-NEXT: s_and_b32 s5, s11, 0xff ; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 +; GFX10-NEXT: s_lshl_b32 s4, s4, 16 +; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 +; GFX10-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX10-NEXT: s_lshl_b32 s5, s5, 16 -; GFX10-NEXT: s_or_b32 s2, s2, s4 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 -; GFX10-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX10-NEXT: s_or_b32 s2, s2, s4 +; GFX10-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 ; GFX10-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 ; GFX10-NEXT: s_or_b32 s3, s3, s5 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 -; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 ; GFX10-NEXT: s_and_b32 s7, 0xffff, s7 +; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 ; GFX10-NEXT: s_lshl_b32 s4, s6, 17 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1 -; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v0 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo -; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GFX10-NEXT: s_or_b32 s0, s4, s0 ; GFX10-NEXT: s_lshl_b32 s1, s1, 1 -; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 -; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v1 +; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1 -; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s2 +; GFX10-NEXT: s_or_b32 s0, s4, s0 +; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 +; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v0 +; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX10-NEXT: v_lshrrev_b32_e64 v1, v1, s2 ; GFX10-NEXT: s_lshl_b32 s2, s7, 17 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX10-NEXT: v_lshrrev_b32_e64 v1, v1, s3 -; GFX10-NEXT: v_lshl_or_b32 v0, s0, v3, v0 +; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s3 +; GFX10-NEXT: v_lshl_or_b32 v1, s0, v3, v1 ; GFX10-NEXT: s_or_b32 s0, s2, s1 -; GFX10-NEXT: v_lshl_or_b32 v1, s0, v2, v1 +; GFX10-NEXT: v_lshl_or_b32 v0, s0, v2, v0 ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: s_mov_b32 s0, 16 -; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v1 -; GFX10-NEXT: v_bfe_u32 v4, v1, 8, 8 -; GFX10-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX10-NEXT: v_and_or_b32 v2, v0, 0xff, v2 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v0 +; GFX10-NEXT: v_bfe_u32 v4, v0, 8, 8 +; GFX10-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX10-NEXT: v_and_or_b32 v2, v1, 0xff, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX10-NEXT: v_lshl_or_b32 v1, v1, 8, v4 -; GFX10-NEXT: v_or3_b32 v0, v2, v0, v3 -; GFX10-NEXT: v_readfirstlane_b32 s1, v1 -; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: v_lshl_or_b32 v0, v0, 8, v4 +; GFX10-NEXT: v_or3_b32 v1, v2, v1, v3 +; GFX10-NEXT: v_readfirstlane_b32 s1, v0 +; GFX10-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: s_fshr_v2i24: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 ; GFX11-NEXT: s_lshr_b32 s6, s0, 8 ; GFX11-NEXT: s_lshr_b32 s7, s0, 16 ; GFX11-NEXT: s_and_b32 s6, s6, 0xff -; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX11-NEXT: s_lshr_b32 s8, s0, 24 +; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX11-NEXT: s_and_b32 s0, s0, 0xff ; GFX11-NEXT: s_lshl_b32 s6, s6, 8 ; GFX11-NEXT: s_lshr_b32 s9, s1, 8 @@ -2334,122 +2324,124 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX11-NEXT: s_and_b32 s6, s7, 0xff ; GFX11-NEXT: s_and_b32 s7, s9, 0xff ; GFX11-NEXT: s_lshr_b32 s9, s4, 8 -; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_dual_mul_f32 v0, 0x4f7ffffe, v0 :: v_dual_mul_f32 v1, 0x4f7ffffe, v1 ; GFX11-NEXT: s_lshr_b32 s10, s4, 16 ; GFX11-NEXT: s_and_b32 s9, s9, 0xff +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX11-NEXT: s_and_b32 s11, s4, 0xff -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX11-NEXT: s_lshl_b32 s9, s9, 8 ; GFX11-NEXT: s_and_b32 s10, s10, 0xff ; GFX11-NEXT: s_or_b32 s9, s11, s9 -; GFX11-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0 -; GFX11-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1 +; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX11-NEXT: s_and_b32 s10, 0xffff, s10 ; GFX11-NEXT: s_and_b32 s9, 0xffff, s9 ; GFX11-NEXT: s_lshl_b32 s10, s10, 16 ; GFX11-NEXT: s_lshr_b32 s11, s5, 8 -; GFX11-NEXT: s_or_b32 s9, s9, s10 +; GFX11-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v1 ; GFX11-NEXT: s_and_b32 s5, s5, 0xff -; GFX11-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX11-NEXT: s_lshr_b32 s4, s4, 24 +; GFX11-NEXT: s_or_b32 s9, s9, s10 ; GFX11-NEXT: s_lshl_b32 s5, s5, 8 ; GFX11-NEXT: s_and_b32 s10, s11, 0xff ; GFX11-NEXT: s_or_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s5, 0xffff, s10 +; GFX11-NEXT: v_mul_hi_u32 v2, v1, v2 +; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX11-NEXT: s_lshl_b32 s5, s5, 16 -; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v2 -; GFX11-NEXT: v_mul_hi_u32 v2, v1, v3 -; GFX11-NEXT: s_or_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff -; GFX11-NEXT: s_lshr_b32 s10, s2, 16 -; GFX11-NEXT: v_mul_hi_u32 v0, s9, v0 +; GFX11-NEXT: s_or_b32 s4, s4, s5 ; GFX11-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-NEXT: s_lshr_b32 s5, s2, 24 -; GFX11-NEXT: s_or_b32 s1, s8, s1 +; GFX11-NEXT: s_lshr_b32 s10, s2, 16 ; GFX11-NEXT: v_add_nc_u32_e32 v1, v1, v2 +; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX11-NEXT: s_or_b32 s1, s8, s1 ; GFX11-NEXT: s_lshr_b32 s8, s2, 8 -; GFX11-NEXT: s_and_b32 s2, s2, 0xff +; GFX11-NEXT: s_lshr_b32 s5, s2, 24 +; GFX11-NEXT: v_mul_hi_u32 v1, s9, v1 +; GFX11-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v0 ; GFX11-NEXT: s_and_b32 s8, s8, 0xff -; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24 -; GFX11-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX11-NEXT: s_and_b32 s2, s2, 0xff ; GFX11-NEXT: s_lshl_b32 s8, s8, 8 ; GFX11-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX11-NEXT: s_or_b32 s2, s2, s8 ; GFX11-NEXT: s_and_b32 s8, s10, 0xff -; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 -; GFX11-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX11-NEXT: v_sub_nc_u32_e32 v0, s9, v0 ; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24 +; GFX11-NEXT: v_mul_hi_u32 v2, v0, v3 +; GFX11-NEXT: s_and_b32 s8, 0xffff, s8 +; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX11-NEXT: s_lshl_b32 s8, s8, 16 +; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX11-NEXT: s_or_b32 s2, s2, s8 +; GFX11-NEXT: s_lshl_b32 s0, s0, 1 +; GFX11-NEXT: v_sub_nc_u32_e32 v1, s9, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v2 ; GFX11-NEXT: s_lshr_b32 s9, s3, 8 ; GFX11-NEXT: s_and_b32 s3, s3, 0xff -; GFX11-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 -; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 +; GFX11-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 ; GFX11-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-NEXT: s_or_b32 s2, s2, s8 -; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1 -; GFX11-NEXT: s_and_b32 s4, s9, 0xff -; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX11-NEXT: s_and_b32 s7, 0xffff, s7 ; GFX11-NEXT: s_or_b32 s3, s5, s3 -; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 -; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 -; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 -; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 -; GFX11-NEXT: s_and_b32 s3, 0xffff, s3 -; GFX11-NEXT: s_lshl_b32 s4, s4, 16 ; GFX11-NEXT: s_lshl_b32 s5, s6, 17 -; GFX11-NEXT: s_lshl_b32 s0, s0, 1 -; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 +; GFX11-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX11-NEXT: s_or_b32 s0, s5, s0 -; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 -; GFX11-NEXT: s_and_b32 s7, 0xffff, s7 +; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24 ; GFX11-NEXT: s_lshl_b32 s1, s1, 1 -; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: v_sub_nc_u32_e32 v0, s4, v0 +; GFX11-NEXT: s_and_b32 s4, s9, 0xff +; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX11-NEXT: s_lshl_b32 s4, s4, 16 +; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX11-NEXT: v_sub_nc_u32_e32 v2, 23, v1 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1 -; GFX11-NEXT: v_sub_nc_u32_e32 v2, 23, v0 -; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s2 -; GFX11-NEXT: s_or_b32 s2, s3, s4 ; GFX11-NEXT: v_lshrrev_b32_e64 v1, v1, s2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: v_lshl_or_b32 v0, s0, v2, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX11-NEXT: s_or_b32 s2, s3, s4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_lshl_or_b32 v1, s0, v2, v1 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v3 +; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s2 ; GFX11-NEXT: s_lshl_b32 s0, s7, 17 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: s_or_b32 s0, s0, s1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_bfe_u32 v3, v0, 8, 8 -; GFX11-NEXT: v_lshl_or_b32 v1, s0, v2, v1 +; GFX11-NEXT: v_bfe_u32 v3, v1, 8, 8 +; GFX11-NEXT: v_lshl_or_b32 v0, s0, v2, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 8, v3 -; GFX11-NEXT: v_bfe_u32 v3, v0, 16, 8 -; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v1 +; GFX11-NEXT: v_bfe_u32 v3, v1, 16, 8 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_and_or_b32 v0, v0, 0xff, v2 +; GFX11-NEXT: v_and_or_b32 v1, v1, 0xff, v2 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_lshlrev_b32_e32 v3, 24, v4 -; GFX11-NEXT: v_bfe_u32 v4, v1, 8, 8 -; GFX11-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX11-NEXT: v_or3_b32 v0, v0, v2, v3 +; GFX11-NEXT: v_bfe_u32 v4, v0, 8, 8 +; GFX11-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX11-NEXT: v_or3_b32 v1, v1, v2, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_lshl_or_b32 v1, v1, 8, v4 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-NEXT: v_lshl_or_b32 v0, v0, 8, v4 +; GFX11-NEXT: v_readfirstlane_b32 s0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s1, v0 ; GFX11-NEXT: ; return to shader part epilog %lhs = bitcast i48 %lhs.arg to <2 x i24> %rhs = bitcast i48 %rhs.arg to <2 x i24> @@ -2465,42 +2457,40 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX6-NEXT: v_mov_b32_e32 v7, 0xffffffe8 +; GFX6-NEXT: v_mov_b32_e32 v8, 0xffffffe8 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 +; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5 +; GFX6-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX6-NEXT: v_cvt_u32_f32_e32 v7, v7 ; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_mul_lo_u32 v9, v7, v8 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX6-NEXT: v_mul_lo_u32 v8, v6, v7 +; GFX6-NEXT: v_mul_lo_u32 v8, v6, v8 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX6-NEXT: v_mul_hi_u32 v9, v7, v9 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3 ; GFX6-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; GFX6-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v8, v9 -; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX6-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 -; GFX6-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 -; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 +; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GFX6-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX6-NEXT: v_mul_lo_u32 v7, v7, 24 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v7 +; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX6-NEXT: v_mul_lo_u32 v6, v8, v7 +; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 23, v4 ; GFX6-NEXT: v_and_b32_e32 v7, 0xffffff, v7 -; GFX6-NEXT: v_mul_hi_u32 v6, v8, v6 +; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v7, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 -; GFX6-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6 ; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 @@ -2521,42 +2511,40 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX8-NEXT: v_mov_b32_e32 v7, 0xffffffe8 +; GFX8-NEXT: v_mov_b32_e32 v8, 0xffffffe8 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 +; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v5 +; GFX8-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX8-NEXT: v_cvt_u32_f32_e32 v7, v7 ; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v5 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX8-NEXT: v_mul_lo_u32 v9, v7, v8 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX8-NEXT: v_mul_lo_u32 v8, v6, v7 +; GFX8-NEXT: v_mul_lo_u32 v8, v6, v8 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX8-NEXT: v_mul_hi_u32 v9, v7, v9 ; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v3 ; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 -; GFX8-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v8, v9 -; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX8-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 -; GFX8-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v6 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 +; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v9 +; GFX8-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX8-NEXT: v_mul_lo_u32 v7, v7, 24 +; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v7 +; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX8-NEXT: v_mul_lo_u32 v6, v8, v7 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 +; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX8-NEXT: v_sub_u32_e32 v7, vcc, 23, v4 ; GFX8-NEXT: v_and_b32_e32 v7, 0xffffff, v7 -; GFX8-NEXT: v_mul_hi_u32 v6, v8, v6 +; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v7, v0 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v8, v6 -; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6 ; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 @@ -2577,42 +2565,40 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v9, v9 -; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8 -; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 -; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v9 +; GFX9-NEXT: v_mov_b32_e32 v8, 0xffffffe8 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX9-NEXT: v_mul_lo_u32 v8, v6, v7 ; GFX9-NEXT: v_and_b32_e32 v5, 0xffffff, v5 -; GFX9-NEXT: v_mul_lo_u32 v7, v9, v7 +; GFX9-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX9-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX9-NEXT: v_mul_lo_u32 v9, v7, v8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX9-NEXT: v_mul_hi_u32 v7, v9, v7 +; GFX9-NEXT: v_mul_lo_u32 v8, v6, v8 ; GFX9-NEXT: v_and_b32_e32 v3, 0xffffff, v3 -; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 -; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX9-NEXT: v_add_u32_e32 v7, v9, v7 -; GFX9-NEXT: v_mul_hi_u32 v7, v5, v7 +; GFX9-NEXT: v_mul_hi_u32 v9, v7, v9 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 1, v1 -; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX9-NEXT: v_add_u32_e32 v7, v7, v9 +; GFX9-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 +; GFX9-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 -; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 -; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 +; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX9-NEXT: v_sub_u32_e32 v4, v4, v7 +; GFX9-NEXT: v_subrev_u32_e32 v7, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX9-NEXT: v_subrev_u32_e32 v7, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX9-NEXT: v_sub_u32_e32 v6, 23, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX9-NEXT: v_sub_u32_e32 v7, 23, v4 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX9-NEXT: v_and_b32_e32 v6, 0xffffff, v6 +; GFX9-NEXT: v_and_b32_e32 v7, 0xffffff, v7 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v4, v2 -; GFX9-NEXT: v_lshl_or_b32 v0, v0, v6, v2 -; GFX9-NEXT: v_sub_u32_e32 v2, v5, v7 +; GFX9-NEXT: v_lshl_or_b32 v0, v0, v7, v2 +; GFX9-NEXT: v_sub_u32_e32 v2, v5, v6 ; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc @@ -2630,31 +2616,29 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 -; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffffff, v5 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v7, v7 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX10-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 ; GFX10-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; GFX10-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 -; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GFX10-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6 -; GFX10-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7 -; GFX10-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX10-NEXT: v_mul_hi_u32 v9, v7, v9 -; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v8 -; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v9 -; GFX10-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX10-NEXT: v_mul_hi_u32 v7, v5, v7 -; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX10-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v7 +; GFX10-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v6 +; GFX10-NEXT: v_mul_hi_u32 v8, v7, v8 +; GFX10-NEXT: v_mul_hi_u32 v9, v6, v9 +; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v8 +; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v9 +; GFX10-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX10-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24 -; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v6 -; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v7 +; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v7 +; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v6 ; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 ; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 @@ -2683,40 +2667,37 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 -; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 1, v1 ; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX11-NEXT: v_rcp_iflag_f32_e32 v7, v7 -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_dual_mul_f32 v6, 0x4f7ffffe, v6 :: v_dual_lshlrev_b32 v1, 1, v1 -; GFX11-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX11-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX11-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_hi_u32 v9, v6, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v6, v6, v9 ; GFX11-NEXT: v_cvt_u32_f32_e32 v7, v7 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6 -; GFX11-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7 +; GFX11-NEXT: v_mul_hi_u32 v6, v5, v6 +; GFX11-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v7 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX11-NEXT: v_mul_hi_u32 v9, v7, v9 +; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX11-NEXT: v_mul_hi_u32 v8, v7, v8 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_add_nc_u32_e32 v6, v6, v8 -; GFX11-NEXT: v_add_nc_u32_e32 v7, v7, v9 +; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v7, v7, v8 +; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_mul_hi_u32 v7, v5, v7 +; GFX11-NEXT: v_mul_hi_u32 v7, v4, v7 ; GFX11-NEXT: v_mul_lo_u32 v7, v7, 24 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v7 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_sub_nc_u32_e32 v4, v4, v7 ; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 -; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_sub_nc_u32_e32 v4, v4, v6 ; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) @@ -2729,20 +2710,20 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 ; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_sub_nc_u32_e32 v6, 23, v4 ; GFX11-NEXT: v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_sub_nc_u32_e32 v7, 23, v5 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_lshrrev_b32_e32 v2, v4, v2 -; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v7 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v7 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, v5, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_lshl_or_b32 v0, v0, v6, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_lshl_or_b32 v1, v1, v4, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll index b98b366b01a468..da17977602cb1d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll @@ -17,7 +17,7 @@ define ptr addrspace(4) @external_constant_got() { ; GCN-LABEL: name: external_constant_got ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant + 4, target-flags(amdgpu-gotprel32-hi) @external_constant + 12, implicit-def $scc + ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant, target-flags(amdgpu-gotprel32-hi) @external_constant, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p4) from got, addrspace 4) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p4) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -38,7 +38,7 @@ define ptr addrspace(1) @external_global_got() { ; GCN-LABEL: name: external_global_got ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 12, implicit-def $scc + ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global, target-flags(amdgpu-gotprel32-hi) @external_global, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p1) from got, addrspace 4) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -59,7 +59,7 @@ define ptr addrspace(999) @external_other_got() { ; GCN-LABEL: name: external_other_got ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other + 4, target-flags(amdgpu-gotprel32-hi) @external_other + 12, implicit-def $scc + ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other, target-flags(amdgpu-gotprel32-hi) @external_other, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p999) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p999) from got, addrspace 4) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p999) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -80,7 +80,7 @@ define ptr addrspace(4) @internal_constant_pcrel() { ; GCN-LABEL: name: internal_constant_pcrel ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant + 4, target-flags(amdgpu-rel32-hi) @internal_constant + 12, implicit-def $scc + ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant, target-flags(amdgpu-rel32-hi) @internal_constant, implicit-def $scc ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -100,7 +100,7 @@ define ptr addrspace(1) @internal_global_pcrel() { ; GCN-LABEL: name: internal_global_pcrel ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 12, implicit-def $scc + ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global, target-flags(amdgpu-rel32-hi) @internal_global, implicit-def $scc ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -120,7 +120,7 @@ define ptr addrspace(999) @internal_other_pcrel() { ; GCN-LABEL: name: internal_other_pcrel ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other + 4, target-flags(amdgpu-rel32-hi) @internal_other + 12, implicit-def $scc + ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other, target-flags(amdgpu-rel32-hi) @internal_other, implicit-def $scc ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p999) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -140,7 +140,7 @@ define ptr addrspace(6) @external_constant32_got() { ; GCN-LABEL: name: external_constant32_got ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant32 + 4, target-flags(amdgpu-gotprel32-hi) @external_constant32 + 12, implicit-def $scc + ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant32, target-flags(amdgpu-gotprel32-hi) @external_constant32, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p4) from got, addrspace 4) ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[LOAD]](p4), 0 ; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6) @@ -158,7 +158,7 @@ define ptr addrspace(6) @internal_constant32_pcrel() { ; GCN-LABEL: name: internal_constant32_pcrel ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant32 + 4, target-flags(amdgpu-rel32-hi) @internal_constant32 + 12, implicit-def $scc + ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant32, target-flags(amdgpu-rel32-hi) @internal_constant32, implicit-def $scc ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0 ; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6) ; GCN-NEXT: SI_RETURN implicit $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir index 4bf6b6a623d826..ebd5e2e085632c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir @@ -25,12 +25,14 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -63,12 +65,14 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -101,12 +105,14 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -139,12 +145,14 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX9-LABEL: name: load_private_p3_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX11-LABEL: name: load_private_p3_from_4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -177,12 +185,14 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p5), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX9-LABEL: name: load_private_p5_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p5), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX11-LABEL: name: load_private_p5_from_4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -216,12 +226,14 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX9-LABEL: name: load_private_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX11-LABEL: name: load_private_v2s16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -260,12 +272,14 @@ body: | ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_2047 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_2047 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -296,14 +310,6 @@ body: | bb.0: liveins: $vgpr0 - ; GFX6-LABEL: name: load_private_s32_from_1_gep_2047_known_bits - ; GFX6: liveins: $vgpr0 - ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - ; GFX6-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_2047_known_bits ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -312,6 +318,7 @@ body: | ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_2047_known_bits ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -353,12 +360,14 @@ body: | ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_2048 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2048, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_2048 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -397,6 +406,7 @@ body: | ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2047 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -405,6 +415,7 @@ body: | ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m2047 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -443,6 +454,7 @@ body: | ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2048 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -451,6 +463,7 @@ body: | ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m2048 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -489,12 +502,14 @@ body: | ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_4095 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_4095 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -533,6 +548,7 @@ body: | ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_4096 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -541,6 +557,7 @@ body: | ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_4096 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -579,6 +596,7 @@ body: | ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4095 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -587,6 +605,7 @@ body: | ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m4095 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -625,6 +644,7 @@ body: | ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4096 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -633,6 +653,7 @@ body: | ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m4096 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -671,6 +692,7 @@ body: | ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_8191 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -679,6 +701,7 @@ body: | ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_8191 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -717,6 +740,7 @@ body: | ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_8192 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -725,6 +749,7 @@ body: | ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_8192 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -763,6 +788,7 @@ body: | ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8191 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -771,6 +797,7 @@ body: | ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m8191 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -809,6 +836,7 @@ body: | ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8192 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -817,6 +845,7 @@ body: | ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m8192 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -849,9 +878,11 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_4_constant_0 ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; ; GFX9-LABEL: name: load_private_s32_from_4_constant_0 ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; ; GFX11-LABEL: name: load_private_s32_from_4_constant_0 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) @@ -878,9 +909,11 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_4_constant_sgpr_16 ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; ; GFX9-LABEL: name: load_private_s32_from_4_constant_sgpr_16 ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; ; GFX11-LABEL: name: load_private_s32_from_4_constant_sgpr_16 ; GFX11: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 16 ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR [[S_MOV_B32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) @@ -907,9 +940,11 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_constant_4095 ; GFX6: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_constant_4095 ; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_constant_4095 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) @@ -937,10 +972,12 @@ body: | ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_constant_4096 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_constant_4096 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) @@ -969,9 +1006,11 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_fi ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_fi ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_fi ; GFX11: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD_SADDR]] @@ -998,9 +1037,11 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_4095 ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4095 ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_4095 ; GFX11: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SADDR]] @@ -1030,9 +1071,11 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 ; GFX11: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SADDR]] @@ -1066,12 +1109,14 @@ body: | ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4096 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_4096 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE_SVS:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SVS [[V_MOV_B32_e32_]], %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) @@ -1102,10 +1147,12 @@ body: | ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_neg1 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_neg1 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir index 98b29199dcc6d4..7faf05e0aaf309 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir @@ -18,6 +18,7 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; ; GFX8-LABEL: name: test_add_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -25,6 +26,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GFX8-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; ; GFX9-LABEL: name: test_add_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -55,6 +57,7 @@ body: | ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_add_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -66,6 +69,7 @@ body: | ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_add_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -98,6 +102,7 @@ body: | ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; GFX6-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX8-LABEL: name: test_add_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -108,6 +113,7 @@ body: | ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; ; GFX9-LABEL: name: test_add_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -152,6 +158,7 @@ body: | ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: test_add_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -174,6 +181,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_add_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -208,6 +216,7 @@ body: | ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY5]] ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ADD2]](s32) ; GFX6-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16) + ; ; GFX8-LABEL: name: test_add_v3s16 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX8-NEXT: {{ $}} @@ -227,6 +236,7 @@ body: | ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] ; GFX8-NEXT: S_ENDPGM 0, implicit [[ADD]](s16), implicit [[ADD1]](s16), implicit [[ADD2]](s16) + ; ; GFX9-LABEL: name: test_add_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} @@ -315,6 +325,7 @@ body: | ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: test_add_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -355,6 +366,7 @@ body: | ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_add_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -389,6 +401,7 @@ body: | ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX8-LABEL: name: test_add_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -400,6 +413,7 @@ body: | ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_add_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -432,6 +446,7 @@ body: | ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; GFX6-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX8-LABEL: name: test_add_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -440,10 +455,11 @@ body: | ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] ; GFX8-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_add_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -452,8 +468,8 @@ body: | ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 @@ -478,6 +494,7 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; ; GFX8-LABEL: name: test_add_s24 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -485,6 +502,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GFX8-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; ; GFX9-LABEL: name: test_add_s24 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -517,6 +535,7 @@ body: | ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX8-LABEL: name: test_add_s33 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -528,6 +547,7 @@ body: | ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_add_s33 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -566,6 +586,7 @@ body: | ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV5]], [[UADDE1]] ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; ; GFX8-LABEL: name: test_add_s96 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} @@ -578,6 +599,7 @@ body: | ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV5]], [[UADDE1]] ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; ; GFX9-LABEL: name: test_add_s96 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir index 60f626491c6b4c..66e95921679e50 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -469,10 +469,9 @@ body: | ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) @@ -531,10 +530,9 @@ body: | ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -543,38 +541,34 @@ body: | ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL1]] ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL3]] ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND5]](<4 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND3]](<4 x s16>) ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL5]] ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[UV13]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir index 023617350e52f8..59adc212386a20 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir @@ -620,8 +620,8 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] @@ -631,13 +631,12 @@ body: | ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C4]](s16) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND4]], [[C4]](s16) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[SHL2]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[OR2]](s16) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) @@ -685,8 +684,7 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir index 68ce57cf51c6f2..f4aaab745e03b5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -18,6 +18,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s32_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]](s32) ; VI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s32_s32 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -51,6 +53,7 @@ body: | ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[TRUNC]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; ; VI-LABEL: name: test_ashr_s64_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -59,6 +62,7 @@ body: | ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[TRUNC]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; ; GFX9PLUS-LABEL: name: test_ashr_s64_s64 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} @@ -85,6 +89,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[COPY1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; ; VI-LABEL: name: test_ashr_s64_s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -92,6 +97,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[COPY1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; ; GFX9PLUS-LABEL: name: test_ashr_s64_s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9PLUS-NEXT: {{ $}} @@ -119,6 +125,7 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[AND]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; ; VI-LABEL: name: test_ashr_s64_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -128,6 +135,7 @@ body: | ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[AND]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; ; GFX9PLUS-LABEL: name: test_ashr_s64_s16 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9PLUS-NEXT: {{ $}} @@ -158,6 +166,7 @@ body: | ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[COPY1]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s16_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -168,6 +177,7 @@ body: | ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s16_s32 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -202,6 +212,7 @@ body: | ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s16_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -212,6 +223,7 @@ body: | ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s16_s16 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -247,26 +259,28 @@ body: | ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s16_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[AND]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s16_i8 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[AND]](s16) ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) @@ -296,13 +310,14 @@ body: | ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_i8_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 @@ -311,13 +326,14 @@ body: | ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[AND]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_i8_i8 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9PLUS-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) @@ -349,13 +365,14 @@ body: | ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s7_s7 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 @@ -364,13 +381,14 @@ body: | ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[AND]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s7_s7 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9PLUS-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) @@ -402,6 +420,7 @@ body: | ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 24 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s24_s24 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -412,6 +431,7 @@ body: | ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 24 ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) ; VI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s24_s24 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -446,6 +466,7 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s32_s24 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -455,6 +476,7 @@ body: | ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[AND]](s32) ; VI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s32_s24 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -488,6 +510,7 @@ body: | ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[UV3]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_ashr_v2s32_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -499,6 +522,7 @@ body: | ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[UV3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v2s32_v2s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} @@ -534,6 +558,7 @@ body: | ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[UV5]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_ashr_v3s32_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -546,6 +571,7 @@ body: | ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[UV5]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v3s32_v3s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} @@ -581,6 +607,7 @@ body: | ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV3]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_ashr_v2s64_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -592,6 +619,7 @@ body: | ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v2s64_v2s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} @@ -629,6 +657,7 @@ body: | ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_ashr_v3s64_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} @@ -643,6 +672,7 @@ body: | ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v3s64_v3s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9PLUS-NEXT: {{ $}} @@ -686,15 +716,15 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[AND1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[LSHR1]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_ashr_v2s16_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -717,6 +747,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v2s16_v2s16 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -756,6 +787,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; VI-LABEL: name: test_ashr_v2s16_v2s32 ; VI: liveins: $vgpr0, $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -777,6 +809,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v2s16_v2s32 ; GFX9PLUS: liveins: $vgpr0, $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -823,34 +856,33 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[AND1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[LSHR1]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[AND2]](s32) + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[AND1]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C1]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL1]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL1]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL2]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_ashr_v3s16_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -890,13 +922,13 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v3s16_v3s16 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} @@ -967,27 +999,26 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[AND1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[LSHR2]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[AND2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[AND1]](s32) ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[AND3]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; SI-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[LSHR3]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C1]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL1]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL1]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_ashr_v4s16_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -1028,6 +1059,7 @@ body: | ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v4s16_v4s16 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} @@ -1056,13 +1088,13 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C1]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C1]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[COPY1]](s32) ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) @@ -1075,18 +1107,19 @@ body: | ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s128 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C1]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C1]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[COPY1]](s32) ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) @@ -1099,18 +1132,19 @@ body: | ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s128 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C1]] - ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C1]] - ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[COPY1]](s32) ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) @@ -1160,6 +1194,7 @@ body: | ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s132 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1184,6 +1219,7 @@ body: | ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s132 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1225,11 +1261,13 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s32_0 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_0 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1261,6 +1299,7 @@ body: | ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s32_23 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1274,6 +1313,7 @@ body: | ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_23 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1312,6 +1352,7 @@ body: | ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s32_31 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1325,6 +1366,7 @@ body: | ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_31 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1362,6 +1404,7 @@ body: | ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s32_32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1374,6 +1417,7 @@ body: | ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1411,6 +1455,7 @@ body: | ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s32_33 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1424,6 +1469,7 @@ body: | ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_33 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1459,6 +1505,7 @@ body: | ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[ASHR]](s64), [[ASHR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s32_127 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1469,6 +1516,7 @@ body: | ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[ASHR]](s64), [[ASHR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_127 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1496,71 +1544,71 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; SI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[COPY1]](s32) ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C4]](s32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C3]](s32) ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[SUB2]](s32) ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[ASHR2]] ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[ASHR]], [[ASHR1]] ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; SI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL1]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] + ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C4]] ; SI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; SI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; SI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; SI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL3]] ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] + ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C4]] ; SI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; SI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; SI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; SI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; SI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) - ; SI-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) + ; SI-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) + ; SI-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) ; SI-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; SI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; SI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; SI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; SI-NEXT: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB]](s32) ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV10]], [[SUB]](s32) ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV11]], [[SUB9]](s32) ; SI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL5]] - ; SI-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C4]](s32) + ; SI-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C3]](s32) ; SI-NEXT: [[ASHR7:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB8]](s32) ; SI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[ASHR7]] ; SI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT9]] @@ -1576,76 +1624,77 @@ body: | ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) ; SI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; ; VI-LABEL: name: test_ashr_s256_s256 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; VI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[COPY1]](s32) ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C4]](s32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C3]](s32) ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[SUB2]](s32) ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[ASHR2]] ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[ASHR]], [[ASHR1]] ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; VI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL1]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; VI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; VI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] + ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C4]] ; VI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; VI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; VI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; VI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL3]] ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] + ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C4]] ; VI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; VI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; VI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; VI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; VI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) - ; VI-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) + ; VI-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) + ; VI-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) ; VI-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; VI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; VI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; VI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; VI-NEXT: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB]](s32) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV10]], [[SUB]](s32) ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV11]], [[SUB9]](s32) ; VI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL5]] - ; VI-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C4]](s32) + ; VI-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C3]](s32) ; VI-NEXT: [[ASHR7:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB8]](s32) ; VI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[ASHR7]] ; VI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT9]] @@ -1661,76 +1710,77 @@ body: | ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) ; VI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; ; GFX9PLUS-LABEL: name: test_ashr_s256_s256 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9PLUS-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; GFX9PLUS-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; GFX9PLUS-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; GFX9PLUS-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9PLUS-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; GFX9PLUS-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; GFX9PLUS-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; GFX9PLUS-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[COPY1]](s32) ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C4]](s32) + ; GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C3]](s32) ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[SUB2]](s32) ; GFX9PLUS-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[ASHR2]] ; GFX9PLUS-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] ; GFX9PLUS-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[ASHR]], [[ASHR1]] ; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9PLUS-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; GFX9PLUS-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; GFX9PLUS-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; GFX9PLUS-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9PLUS-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; GFX9PLUS-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; GFX9PLUS-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; GFX9PLUS-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) ; GFX9PLUS-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) ; GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) ; GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL1]] - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9PLUS-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; GFX9PLUS-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; GFX9PLUS-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; GFX9PLUS-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] + ; GFX9PLUS-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C4]] ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9PLUS-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; GFX9PLUS-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; GFX9PLUS-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; GFX9PLUS-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; GFX9PLUS-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; GFX9PLUS-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; GFX9PLUS-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; GFX9PLUS-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; GFX9PLUS-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL3]] ; GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; GFX9PLUS-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] + ; GFX9PLUS-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C4]] ; GFX9PLUS-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; GFX9PLUS-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; GFX9PLUS-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9PLUS-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) - ; GFX9PLUS-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) + ; GFX9PLUS-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) + ; GFX9PLUS-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) ; GFX9PLUS-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9PLUS-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; GFX9PLUS-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; GFX9PLUS-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; GFX9PLUS-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; GFX9PLUS-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; GFX9PLUS-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; GFX9PLUS-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; GFX9PLUS-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; GFX9PLUS-NEXT: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB]](s32) ; GFX9PLUS-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV10]], [[SUB]](s32) ; GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV11]], [[SUB9]](s32) ; GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL5]] - ; GFX9PLUS-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C4]](s32) + ; GFX9PLUS-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C3]](s32) ; GFX9PLUS-NEXT: [[ASHR7:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB8]](s32) ; GFX9PLUS-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[ASHR7]] ; GFX9PLUS-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT9]] @@ -1801,6 +1851,7 @@ body: | ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT4]](s64), [[SELECT5]](s64) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; ; VI-LABEL: name: test_ashr_v2s128_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -1843,6 +1894,7 @@ body: | ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT4]](s64), [[SELECT5]](s64) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v2s128_v2s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} @@ -1927,6 +1979,7 @@ body: | ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; VI-LABEL: name: test_ashr_s65_s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} @@ -1957,6 +2010,7 @@ body: | ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; GFX9PLUS-LABEL: name: test_ashr_s65_s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9PLUS-NEXT: {{ $}} @@ -2030,6 +2084,7 @@ body: | ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; VI-LABEL: name: test_ashr_s65_s32_constant8 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -2059,6 +2114,7 @@ body: | ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; GFX9PLUS-LABEL: name: test_ashr_s65_s32_constant8 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9PLUS-NEXT: {{ $}} @@ -2133,6 +2189,7 @@ body: | ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; ; VI-LABEL: name: test_ashr_s65_s32_known_pow2 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} @@ -2164,6 +2221,7 @@ body: | ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; ; GFX9PLUS-LABEL: name: test_ashr_s65_s32_known_pow2 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9PLUS-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir index 18a7032300afe4..8b5c27288453e6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir @@ -440,8 +440,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -529,8 +529,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -968,8 +968,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -1645,8 +1645,8 @@ body: | ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] @@ -2018,8 +2018,8 @@ body: | ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] @@ -2285,60 +2285,54 @@ body: | ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR3]](s16) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C2]] ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT2]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR5]](s16) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL3]] + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL3]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C2]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C2]] ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR8]](s16) ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR9]](s16) ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL5]] + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL5]] ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C2]] ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR10]](s16) ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT6]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL6]] + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C2]] ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR11]](s16) ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL7]] + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[LSHR7]], [[SHL7]] ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C2]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C2]] ; CHECK-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR14]](s16) ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT8]], [[C]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL8]] + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C2]] ; CHECK-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR15]](s16) ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL9]] + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[LSHR12]], [[SHL9]] ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C2]] ; CHECK-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR16]](s16) ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT10]], [[C]](s32) - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL10]] + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL10]] ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C2]] ; CHECK-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR17]](s16) ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C]](s32) - ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL11]] + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[LSHR13]], [[SHL11]] ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<24 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<24 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitreverse.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitreverse.mir index 5a147ca27d3400..92eb440fbd8ea6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitreverse.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitreverse.mir @@ -98,11 +98,8 @@ body: | ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITREVERSE]], [[C]](s32) ; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:_(s32) = G_BITREVERSE [[LSHR]] ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITREVERSE1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir index 63235842de57bf..2794a3fa31daa9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir @@ -23,6 +23,7 @@ body: | ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]] ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: bswap_s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -50,6 +51,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]] ; GFX7-NEXT: $vgpr0 = COPY [[BSWAP]](s32) + ; ; GFX8-LABEL: name: bswap_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -87,8 +89,7 @@ body: | ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[COPY2]](s32) ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]] ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -97,6 +98,7 @@ body: | ; GFX7-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX7-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX8-LABEL: name: bswap_v2s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -151,6 +153,7 @@ body: | ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX7-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) ; GFX7-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; ; GFX8-LABEL: name: bswap_v3s16 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} @@ -224,24 +227,22 @@ body: | ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[COPY2]](s32) ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX7-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; GFX7-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]] ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[COPY4]](s32) ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; GFX7-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; GFX7-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) ; GFX7-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC4]] ; GFX7-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; GFX7-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY6]](s32) ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; GFX7-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX7-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; GFX7-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY7]](s32) ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) ; GFX7-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC7]], [[TRUNC6]] ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -256,6 +257,7 @@ body: | ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: bswap_v4s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -285,6 +287,7 @@ body: | ; GFX7-NEXT: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]] ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BSWAP]](s32), [[BSWAP1]](s32) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: bswap_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -314,6 +317,7 @@ body: | ; GFX7-NEXT: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]] ; GFX7-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX8-LABEL: name: bswap_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -349,6 +353,7 @@ body: | ; GFX7-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP2]](s32), [[BSWAP3]](s32) ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: bswap_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir index c82de08bb6e50d..e9f8180aae66bb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir @@ -156,14 +156,13 @@ body: | ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND1]](s32) + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[LSHR]](s32) ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF1]], [[C]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -214,8 +213,8 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64) ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[CTLZ_ZERO_UNDEF]], [[UV]] - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[USUBO]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[USUBO]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s33) = G_TRUNC %0 %2:_(s33) = G_CTLZ_ZERO_UNDEF %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir index 318960e578f447..dd116927779b5f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir @@ -72,9 +72,7 @@ body: | ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UMIN]], [[C1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_CTLZ %0 %2:_(s32) = G_ZEXT %1 @@ -174,15 +172,14 @@ body: | ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C2]] ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UMIN]], [[C]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[AND1]](s32) + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[LSHR]](s32) ; CHECK-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C2]] ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UMIN1]], [[C]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -237,8 +234,8 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UMIN]], [[UV]] - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[USUBO]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[USUBO]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s33) = G_TRUNC %0 %2:_(s33) = G_CTLZ %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir index cacf2f449a6233..f183271cd5f279 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir @@ -14,8 +14,7 @@ body: | ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s8) = G_TRUNC %0 %2:_(s8) = G_CTPOP %1 @@ -36,8 +35,7 @@ body: | ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s9) = G_TRUNC %0 %2:_(s9) = G_CTPOP %1 @@ -108,9 +106,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[CTPOP]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[CTPOP]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_CTPOP %0 %2:_(s32) = G_ZEXT %1 @@ -131,8 +127,7 @@ body: | ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(s16) = G_CTPOP %1 @@ -197,13 +192,10 @@ body: | ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[AND1]](s32) + ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[LSHR]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTPOP1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -226,8 +218,7 @@ body: | ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s7) = G_TRUNC %0 %2:_(s7) = G_CTPOP %1 @@ -267,12 +258,12 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s64) @@ -295,12 +286,12 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s64) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir index 39d51cdd6a7431..0ef31a602961cb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir @@ -72,9 +72,7 @@ body: | ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBL_B32 [[COPY]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UMIN]], [[C1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_CTTZ %0 %2:_(s32) = G_ZEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir index 4a85edd75a9dc5..801d2e918f0877 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir @@ -18,12 +18,14 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] ; SI-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; ; VI-LABEL: name: test_fabs_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] ; VI-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; ; GFX9-LABEL: name: test_fabs_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -47,12 +49,14 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[COPY]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FABS]](s64) + ; ; VI-LABEL: name: test_fabs_s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[COPY]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FABS]](s64) + ; ; GFX9-LABEL: name: test_fabs_s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -77,6 +81,7 @@ body: | ; SI-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[TRUNC]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FABS]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fabs_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -85,6 +90,7 @@ body: | ; VI-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[TRUNC]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FABS]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fabs_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -115,6 +121,7 @@ body: | ; SI-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[UV1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FABS]](s32), [[FABS1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fabs_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -124,6 +131,7 @@ body: | ; VI-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[UV1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FABS]](s32), [[FABS1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fabs_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -154,6 +162,7 @@ body: | ; SI-NEXT: [[FABS2:%[0-9]+]]:_(s32) = G_FABS [[UV2]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FABS]](s32), [[FABS1]](s32), [[FABS2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fabs_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -164,6 +173,7 @@ body: | ; VI-NEXT: [[FABS2:%[0-9]+]]:_(s32) = G_FABS [[UV2]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FABS]](s32), [[FABS1]](s32), [[FABS2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fabs_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -194,6 +204,7 @@ body: | ; SI-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[UV1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FABS]](s64), [[FABS1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_fabs_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -203,6 +214,7 @@ body: | ; VI-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[UV1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FABS]](s64), [[FABS1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_fabs_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -229,12 +241,14 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[COPY]] ; SI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; ; VI-LABEL: name: test_fabs_v2s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[COPY]] ; VI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fabs_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -268,19 +282,19 @@ body: | ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL3]] ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_fabs_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) @@ -298,19 +312,19 @@ body: | ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL3]] ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_fabs_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) @@ -353,6 +367,7 @@ body: | ; SI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV1]] ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fabs_v4s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -362,6 +377,7 @@ body: | ; VI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV1]] ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fabs_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir index 92cde9de38cb79..8b6e53cb782677 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -18,6 +18,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; ; VI-LABEL: name: test_fadd_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; ; GFX9-LABEL: name: test_fadd_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -51,6 +53,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; ; VI-LABEL: name: test_fadd_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -58,6 +61,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; ; GFX9-LABEL: name: test_fadd_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -90,6 +94,7 @@ body: | ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fadd_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -100,6 +105,7 @@ body: | ; VI-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fadd_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -137,6 +143,7 @@ body: | ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fadd_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -148,6 +155,7 @@ body: | ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fadd_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -182,6 +190,7 @@ body: | ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = nnan G_FADD [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fadd_v2s32_flags ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -193,6 +202,7 @@ body: | ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = nnan G_FADD [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fadd_v2s32_flags ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -228,6 +238,7 @@ body: | ; SI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[UV2]], [[UV5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fadd_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -240,6 +251,7 @@ body: | ; VI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[UV2]], [[UV5]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fadd_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -275,6 +287,7 @@ body: | ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_fadd_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: {{ $}} @@ -286,6 +299,7 @@ body: | ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_fadd_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -337,6 +351,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_fadd_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -359,6 +374,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fadd_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -425,13 +441,13 @@ body: | ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_fadd_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -471,13 +487,13 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_fadd_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -583,6 +599,7 @@ body: | ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fadd_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -623,6 +640,7 @@ body: | ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fadd_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir index 6f086c613eba8e..ac72bf1dbbb6fa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir @@ -25,6 +25,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_copysign_s16_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -39,6 +40,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_copysign_s16_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -79,6 +81,7 @@ body: | ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_copysign_s32_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -90,6 +93,7 @@ body: | ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_copysign_s32_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -124,6 +128,7 @@ body: | ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; VI-LABEL: name: test_copysign_s64_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -135,6 +140,7 @@ body: | ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; GFX9-LABEL: name: test_copysign_s64_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -172,6 +178,7 @@ body: | ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; VI-LABEL: name: test_copysign_s64_s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -186,6 +193,7 @@ body: | ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; GFX9-LABEL: name: test_copysign_s64_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -226,6 +234,7 @@ body: | ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_copysign_s32_s64 ; VI: liveins: $vgpr0, $vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -240,6 +249,7 @@ body: | ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_copysign_s32_s64 ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -282,6 +292,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_copysign_s16_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -298,6 +309,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_copysign_s16_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -343,6 +355,7 @@ body: | ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND2]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_copysign_s32_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -358,6 +371,7 @@ body: | ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND2]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_copysign_s32_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -394,14 +408,15 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[C3]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND2]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; VI-LABEL: name: test_copysign_s64_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -410,14 +425,15 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[C3]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND2]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; GFX9-LABEL: name: test_copysign_s64_s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -426,8 +442,8 @@ body: | ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[C3]](s32) @@ -463,6 +479,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_copysign_s16_s64 ; VI: liveins: $vgpr0, $vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -479,6 +496,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_copysign_s16_s64 ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -527,6 +545,7 @@ body: | ; SI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY1]], [[BITCAST]] ; SI-NEXT: [[OR2:%[0-9]+]]:_(<2 x s16>) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[OR2]](<2 x s16>) + ; ; VI-LABEL: name: test_copysign_v2s16_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -545,6 +564,7 @@ body: | ; VI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY1]], [[BITCAST]] ; VI-NEXT: [[OR2:%[0-9]+]]:_(<2 x s16>) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_copysign_v2s16_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -583,6 +603,7 @@ body: | ; SI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY1]], [[BUILD_VECTOR]] ; SI-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) + ; ; VI-LABEL: name: test_copysign_v2s32_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -596,6 +617,7 @@ body: | ; VI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY1]], [[BUILD_VECTOR]] ; VI-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_copysign_v2s32_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -638,6 +660,7 @@ body: | ; SI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[AND3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_copysign_v2s64_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: {{ $}} @@ -655,6 +678,7 @@ body: | ; VI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[AND3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_copysign_v2s64_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -707,6 +731,7 @@ body: | ; SI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[AND3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_copysign_v2s64_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -730,6 +755,7 @@ body: | ; VI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[AND3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_copysign_v2s64_v2s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -787,6 +813,7 @@ body: | ; SI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR]] ; SI-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) + ; ; VI-LABEL: name: test_copysign_v2s32_v2s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -808,6 +835,7 @@ body: | ; VI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR]] ; VI-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_copysign_v2s32_v2s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -852,6 +880,7 @@ body: | ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = nnan G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_copysign_s32_s32_flagss ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -863,6 +892,7 @@ body: | ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = nnan G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_copysign_s32_s32_flagss ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -901,6 +931,7 @@ body: | ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = nnan G_OR [[AND]], [[AND2]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_copysign_s32_s16_flags ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -916,6 +947,7 @@ body: | ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = nnan G_OR [[AND]], [[AND2]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_copysign_s32_s16_flags ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -961,6 +993,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = nnan G_OR [[AND]], [[AND1]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_copysign_s16_s32_flags ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -977,6 +1010,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = nnan G_OR [[AND]], [[AND1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_copysign_s16_s32_flags ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir index 77a3e47d6deee6..2ccdc5d9c6b73d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -19,6 +19,7 @@ body: | ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; SI-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; ; VI-LABEL: name: test_fma_s32 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -27,6 +28,7 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; VI-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; ; GFX9-LABEL: name: test_fma_s32 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -55,6 +57,7 @@ body: | ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FMA]](s64) + ; ; VI-LABEL: name: test_fma_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -63,6 +66,7 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FMA]](s64) + ; ; GFX9-LABEL: name: test_fma_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -100,6 +104,7 @@ body: | ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fma_s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -112,6 +117,7 @@ body: | ; VI-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fma_s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -155,6 +161,7 @@ body: | ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fma_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -168,6 +175,7 @@ body: | ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fma_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -208,6 +216,7 @@ body: | ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fma_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; VI-NEXT: {{ $}} @@ -222,6 +231,7 @@ body: | ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fma_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; GFX9-NEXT: {{ $}} @@ -264,6 +274,7 @@ body: | ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_fma_v4s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; VI-NEXT: {{ $}} @@ -279,6 +290,7 @@ body: | ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_fma_v4s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX9-NEXT: {{ $}} @@ -320,6 +332,7 @@ body: | ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_fma_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; VI-NEXT: {{ $}} @@ -333,6 +346,7 @@ body: | ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_fma_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX9-NEXT: {{ $}} @@ -394,6 +408,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; ; VI-LABEL: name: test_fma_v2s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -421,6 +436,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fma_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -501,13 +517,13 @@ body: | ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_fma_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; VI-NEXT: {{ $}} @@ -555,13 +571,13 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_fma_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; GFX9-NEXT: {{ $}} @@ -688,6 +704,7 @@ body: | ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fma_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -738,6 +755,7 @@ body: | ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fma_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir index 2b42ee80b45e61..78bed9e19c65e9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -23,6 +23,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fmaxnum_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -32,6 +33,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -63,6 +65,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) + ; ; VI-LABEL: name: test_fmaxnum_s32_ieee_mode_off ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -70,6 +73,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_s32_ieee_mode_off ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -96,6 +100,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fmaxnum_s32_nnan ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -103,6 +108,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_s32_nnan ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -131,6 +137,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fmaxnum_s32_nnan_lhs ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -139,6 +146,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_lhs ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -168,6 +176,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fmaxnum_s32_nnan_rhs ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -176,6 +185,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_rhs ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -203,6 +213,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fmaxnum_s32_nnan_lhs_rhs ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -210,6 +221,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_lhs_rhs ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -238,6 +250,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) + ; ; VI-LABEL: name: test_fmaxnum_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -247,6 +260,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) + ; ; GFX9-LABEL: name: test_fmaxnum_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -281,6 +295,7 @@ body: | ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fmaxnum_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -293,6 +308,7 @@ body: | ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMAXNUM_IEEE]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -335,6 +351,7 @@ body: | ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fmaxnum_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -350,6 +367,7 @@ body: | ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fmaxnum_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -405,6 +423,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_fmaxnum_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -431,6 +450,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fmaxnum_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -499,13 +519,13 @@ body: | ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_fmaxnum_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -551,13 +571,13 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_fmaxnum_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -667,6 +687,7 @@ body: | ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fmaxnum_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -715,6 +736,7 @@ body: | ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fmaxnum_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -758,6 +780,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; ; VI-LABEL: name: test_fmaxnum_with_fmaxnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -771,6 +794,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_with_fmaxnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -812,6 +836,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; ; VI-LABEL: name: test_fmaxnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -823,6 +848,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -864,6 +890,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fmaxnum_with_fminnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -877,6 +904,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_with_fminnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -918,6 +946,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fmaxnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -929,6 +958,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -965,6 +995,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fmaxnum_with_constant_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -973,6 +1004,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_with_constant_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1020,6 +1052,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; VI-LABEL: name: test_fmaxnum_with_constant_vector_argument_v2s16_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -1042,6 +1075,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fmaxnum_with_constant_vector_argument_v2s16_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir index d32be74e2c9963..a20c2fa21eb1e2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir @@ -23,6 +23,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fminnum_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -32,6 +33,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fminnum_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -63,6 +65,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM]](s32) + ; ; VI-LABEL: name: test_fminnum_s32_ieee_mode_off ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -70,6 +73,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM]](s32) + ; ; GFX9-LABEL: name: test_fminnum_s32_ieee_mode_off ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -96,6 +100,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fminnum_s32_nnan ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -103,6 +108,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fminnum_s32_nnan ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -131,6 +137,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fminnum_s32_nnan_lhs ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -139,6 +146,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fminnum_s32_nnan_lhs ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -168,6 +176,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fminnum_s32_nnan_rhs ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -176,6 +185,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fminnum_s32_nnan_rhs ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -203,6 +213,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fminnum_s32_nnan_lhs_rhs ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -210,6 +221,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fminnum_s32_nnan_lhs_rhs ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -238,6 +250,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) + ; ; VI-LABEL: name: test_fminnum_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -247,6 +260,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) + ; ; GFX9-LABEL: name: test_fminnum_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -281,6 +295,7 @@ body: | ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fminnum_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -293,6 +308,7 @@ body: | ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fminnum_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -335,6 +351,7 @@ body: | ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fminnum_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -350,6 +367,7 @@ body: | ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fminnum_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -405,6 +423,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_fminnum_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -431,6 +450,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fminnum_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -499,13 +519,13 @@ body: | ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_fminnum_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -551,13 +571,13 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_fminnum_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -667,6 +687,7 @@ body: | ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fminnum_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -715,6 +736,7 @@ body: | ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fminnum_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -758,6 +780,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; ; VI-LABEL: name: test_fminnum_with_fminnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -771,6 +794,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; ; GFX9-LABEL: name: test_fminnum_with_fminnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -812,6 +836,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; ; VI-LABEL: name: test_fminnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -823,6 +848,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; ; GFX9-LABEL: name: test_fminnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -864,6 +890,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fminnum_with_fmaxnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -877,6 +904,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fminnum_with_fmaxnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -918,6 +946,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fminnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -929,6 +958,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fminnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -965,6 +995,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fminnum_with_constant_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -973,6 +1004,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fminnum_with_constant_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1020,6 +1052,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; VI-LABEL: name: test_fminnum_with_constant_vector_argument_v2s16_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -1042,6 +1075,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fminnum_with_constant_vector_argument_v2s16_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir index 74c8ff83f318e1..acfc6f69d8f796 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir @@ -18,6 +18,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FMUL]](s32) + ; ; VI-LABEL: name: test_fmul_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FMUL]](s32) + ; ; GFX9PLUS-LABEL: name: test_fmul_s32 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -50,6 +52,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FMUL]](s64) + ; ; VI-LABEL: name: test_fmul_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -57,6 +60,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FMUL]](s64) + ; ; GFX9PLUS-LABEL: name: test_fmul_s64 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} @@ -89,6 +93,7 @@ body: | ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fmul_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -99,6 +104,7 @@ body: | ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9PLUS-LABEL: name: test_fmul_s16 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -136,6 +142,7 @@ body: | ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fmul_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -147,6 +154,7 @@ body: | ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9PLUS-LABEL: name: test_fmul_v2s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} @@ -181,6 +189,7 @@ body: | ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fmul_v2s32_flags ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -192,6 +201,7 @@ body: | ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9PLUS-LABEL: name: test_fmul_v2s32_flags ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} @@ -227,6 +237,7 @@ body: | ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fmul_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -239,6 +250,7 @@ body: | ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9PLUS-LABEL: name: test_fmul_v3s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} @@ -274,6 +286,7 @@ body: | ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMUL]](s64), [[FMUL1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_fmul_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: {{ $}} @@ -285,6 +298,7 @@ body: | ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMUL]](s64), [[FMUL1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9PLUS-LABEL: name: test_fmul_v2s64 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9PLUS-NEXT: {{ $}} @@ -336,6 +350,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_fmul_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -358,6 +373,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9PLUS-LABEL: name: test_fmul_v2s16 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -425,13 +441,13 @@ body: | ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_fmul_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -471,13 +487,13 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9PLUS-LABEL: name: test_fmul_v3s16 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} @@ -583,6 +599,7 @@ body: | ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fmul_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -623,6 +640,7 @@ body: | ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9PLUS-LABEL: name: test_fmul_v4s16 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir index eddc0cdbc61224..49cd1f79861290 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir @@ -17,12 +17,14 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] ; SI-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; ; VI-LABEL: name: test_fneg_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] ; VI-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; ; GFX9-LABEL: name: test_fneg_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -45,12 +47,14 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FNEG]](s64) + ; ; VI-LABEL: name: test_fneg_s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FNEG]](s64) + ; ; GFX9-LABEL: name: test_fneg_s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -75,6 +79,7 @@ body: | ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FNEG]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fneg_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -83,6 +88,7 @@ body: | ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FNEG]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fneg_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -113,6 +119,7 @@ body: | ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[UV1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FNEG]](s32), [[FNEG1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fneg_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -122,6 +129,7 @@ body: | ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[UV1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FNEG]](s32), [[FNEG1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fneg_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -152,6 +160,7 @@ body: | ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[UV2]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FNEG]](s32), [[FNEG1]](s32), [[FNEG2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fneg_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -162,6 +171,7 @@ body: | ; VI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[UV2]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FNEG]](s32), [[FNEG1]](s32), [[FNEG2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fneg_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -192,6 +202,7 @@ body: | ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[UV1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FNEG]](s64), [[FNEG1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_fneg_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -201,6 +212,7 @@ body: | ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[UV1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FNEG]](s64), [[FNEG1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_fneg_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -227,12 +239,14 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[COPY]] ; SI-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; ; VI-LABEL: name: test_fneg_v2s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[COPY]] ; VI-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fneg_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -266,10 +280,10 @@ body: | ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND1]](s32), [[LSHR]](s32), [[AND2]](s32) ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fneg_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) @@ -287,10 +301,10 @@ body: | ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND1]](s32), [[LSHR]](s32), [[AND2]](s32) ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fneg_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) @@ -306,9 +320,8 @@ body: | ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32), [[AND1]](s32) ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR1]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FNEG %0 @@ -331,6 +344,7 @@ body: | ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV1]] ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fneg_v4s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -340,6 +354,7 @@ body: | ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV1]] ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fneg_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir index a6ebd27c6b03b1..70ed6ffcd2aa91 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir @@ -256,22 +256,19 @@ body: | ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C4]] ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[ZEXT1]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY3]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR5]], [[ZEXT3]](s32) ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC4]], [[TRUNC5]] ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -441,15 +438,12 @@ body: | ; SI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND2]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY3]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C2]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) ; @@ -464,20 +458,19 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; VI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C2]] ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[AND3]](s16) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[C3]](s16) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C2]] - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND6]], [[AND5]](s16) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C2]] + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[AND4]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] @@ -494,20 +487,19 @@ body: | ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C2]] ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[AND3]](s16) + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[C3]](s16) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C2]] - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND6]], [[AND5]](s16) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C2]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[AND4]](s16) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] @@ -539,33 +531,31 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) ; SI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] ; SI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; SI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; SI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; SI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C5]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) @@ -582,33 +572,31 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) ; VI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] ; VI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; VI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; VI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; VI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C5]](s32) ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) @@ -625,33 +613,31 @@ body: | ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C5]](s32) ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) @@ -709,36 +695,32 @@ body: | ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[ZEXT1]](s32) ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC4]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY6]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY6]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR5]], [[ZEXT3]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC6]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; SI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C2]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY7]](s32) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY7]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR7]], [[ZEXT5]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC7]], [[TRUNC8]] ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -752,14 +734,13 @@ body: | ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL3]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL4]] ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]] - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL5]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR9]], [[SHL5]] ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) @@ -833,10 +814,9 @@ body: | ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]] - ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR9]], [[SHL5]] ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) @@ -957,50 +937,44 @@ body: | ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[ZEXT1]](s32) ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC4]], [[TRUNC5]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]] - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR2]], [[COPY3]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LSHR8]], [[ZEXT3]](s32) ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; SI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C2]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY4]](s32) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C4]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY4]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR10]], [[ZEXT5]](s32) ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC3]], [[C2]] - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C1]] - ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C1]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[ZEXT6]](s32) ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY5]](s32) - ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) - ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C4]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY5]](s32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[ZEXT7]](s32) ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir index f3d284c7e00f50..4502629afd421f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir @@ -215,22 +215,19 @@ body: | ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR2]], [[ZEXT1]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY3]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[ZEXT3]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) @@ -249,34 +246,32 @@ body: | ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY8]](s32) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY8]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LSHR8]], [[ZEXT5]](s32) ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY9]](s32) - ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) - ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C5]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY9]](s32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR10]], [[ZEXT7]](s32) ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) @@ -472,14 +467,11 @@ body: | ; SI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY3]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[AND]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) ; @@ -494,19 +486,18 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; VI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C2]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND2]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND3]](s16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND2]](s16) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[AND4]](s16) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[AND3]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] @@ -523,19 +514,18 @@ body: | ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C2]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND2]](s16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND3]](s16) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND2]](s16) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[AND4]](s16) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[AND3]](s16) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] @@ -567,30 +557,28 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) ; SI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] ; SI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; SI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; SI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; SI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C5]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] @@ -609,30 +597,28 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) ; VI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] ; VI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; VI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; VI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; VI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C5]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] @@ -651,30 +637,28 @@ body: | ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C5]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] @@ -729,22 +713,19 @@ body: | ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C5]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR2]], [[ZEXT1]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY6]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY6]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[ZEXT3]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) @@ -761,48 +742,45 @@ body: | ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY11]](s32) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C5]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY11]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR7]], [[ZEXT5]](s32) ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY12]](s32) - ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) - ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C5]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY12]](s32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[ZEXT7]](s32) ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] - ; SI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] + ; SI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] ; SI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] - ; SI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] - ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[AND11]](s16) ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT8]](s32) ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[COPY13]](s32) - ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[AND17]](s16) - ; SI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C5]] - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND19]], [[ZEXT9]](s32) + ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]] + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY13]](s32) + ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LSHR11]], [[ZEXT9]](s32) ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) ; SI-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C4]](s32) @@ -815,19 +793,18 @@ body: | ; SI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY5]], [[BITCAST6]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; SI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C2]] + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C2]] ; SI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC12]], [[C3]] - ; SI-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C2]] - ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) + ; SI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C2]] + ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND14]](s16) ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR5]](s16) ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT10]](s32) ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[SHL8]], [[C5]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[COPY17]](s32) - ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND21]](s16) - ; SI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C5]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND23]], [[ZEXT11]](s32) + ; SI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[SHL8]], [[C5]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND16]], [[COPY17]](s32) + ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND15]](s16) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR13]], [[ZEXT11]](s32) ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR14]](s32) ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[TRUNC13]], [[TRUNC14]] ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -841,14 +818,13 @@ body: | ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL11]] ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; SI-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C5]] - ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C]](s32) + ; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C5]] + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL12]] ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; SI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C5]] - ; SI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C5]] - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL13]] + ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C5]] + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C]](s32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[LSHR15]], [[SHL13]] ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) ; SI-NEXT: $vgpr1 = COPY [[BITCAST11]](<2 x s16>) @@ -957,10 +933,9 @@ body: | ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL12]] ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C4]] - ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C4]] - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL13]] + ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C4]] + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[LSHR15]], [[SHL13]] ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) ; VI-NEXT: $vgpr1 = COPY [[BITCAST11]](<2 x s16>) @@ -1070,22 +1045,19 @@ body: | ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR2]], [[ZEXT1]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY3]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[ZEXT3]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) @@ -1104,34 +1076,32 @@ body: | ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY8]](s32) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY8]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LSHR8]], [[ZEXT5]](s32) ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY9]](s32) - ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) - ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C5]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY9]](s32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR10]], [[ZEXT7]](s32) ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) @@ -1143,32 +1113,29 @@ body: | ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32) ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32) - ; SI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] ; SI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] - ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] + ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND11]](s16) ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[BITCAST6]], [[ZEXT10]](s32) ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C5]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[COPY10]](s32) - ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND17]](s16) - ; SI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C5]] - ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND19]], [[ZEXT11]](s32) + ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C5]] + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY10]](s32) + ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LSHR14]], [[ZEXT11]](s32) ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) ; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] - ; SI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] ; SI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C2]] - ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) + ; SI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C2]] + ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[AND14]](s16) ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR12]], [[ZEXT12]](s32) ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C5]] - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[COPY11]](s32) - ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[AND21]](s16) - ; SI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C5]] - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND23]], [[ZEXT13]](s32) + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LSHR13]], [[COPY11]](s32) + ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[AND15]](s16) + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LSHR16]], [[ZEXT13]](s32) ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR17]](s32) ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[TRUNC12]], [[TRUNC13]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) @@ -1187,34 +1154,32 @@ body: | ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) ; SI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR19]](s32) - ; SI-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C2]] + ; SI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C2]] ; SI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C3]] - ; SI-NEXT: [[AND25:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C2]] - ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[AND24]](s16) + ; SI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C2]] + ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR6]](s16) ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT14]](s32) ; SI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[SHL10]], [[C5]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND26]], [[COPY16]](s32) - ; SI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[AND25]](s16) - ; SI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C5]] - ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[AND27]], [[ZEXT15]](s32) + ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[SHL10]], [[C5]] + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[COPY16]](s32) + ; SI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[AND17]](s16) + ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LSHR20]], [[ZEXT15]](s32) ; SI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) ; SI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[TRUNC16]], [[TRUNC17]] - ; SI-NEXT: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C2]] + ; SI-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C2]] ; SI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C3]] - ; SI-NEXT: [[AND29:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C2]] - ; SI-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[AND28]](s16) + ; SI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C2]] + ; SI-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[AND19]](s16) ; SI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR7]](s16) ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT3]], [[ZEXT16]](s32) ; SI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND30:%[0-9]+]]:_(s32) = G_AND [[SHL11]], [[C5]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND30]], [[COPY17]](s32) - ; SI-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[AND29]](s16) - ; SI-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C5]] - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND31]], [[ZEXT17]](s32) + ; SI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[SHL11]], [[C5]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND21]], [[COPY17]](s32) + ; SI-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LSHR22]], [[ZEXT17]](s32) ; SI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR23]](s32) ; SI-NEXT: [[OR10:%[0-9]+]]:_(s16) = G_OR [[TRUNC18]], [[TRUNC19]] ; SI-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir index 8e999e400a6665..206ad3e24ee023 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir @@ -18,6 +18,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FSUB]](s32) + ; ; VI-LABEL: name: test_fsub_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FSUB]](s32) + ; ; GFX9-LABEL: name: test_fsub_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -51,6 +53,7 @@ body: | ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; ; VI-LABEL: name: test_fsub_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -59,6 +62,7 @@ body: | ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] ; VI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; ; GFX9-LABEL: name: test_fsub_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -87,6 +91,7 @@ body: | ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; ; VI-LABEL: name: test_fsub_s64_fmf ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -95,6 +100,7 @@ body: | ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] ; VI-NEXT: [[FADD:%[0-9]+]]:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; ; GFX9-LABEL: name: test_fsub_s64_fmf ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -129,6 +135,7 @@ body: | ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fsub_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -139,6 +146,7 @@ body: | ; VI-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[TRUNC1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSUB]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fsub_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -176,6 +184,7 @@ body: | ; SI-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fsub_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -187,6 +196,7 @@ body: | ; VI-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fsub_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -221,6 +231,7 @@ body: | ; SI-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = nnan G_FSUB [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fsub_v2s32_flags ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -232,6 +243,7 @@ body: | ; VI-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = nnan G_FSUB [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fsub_v2s32_flags ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -267,6 +279,7 @@ body: | ; SI-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[UV2]], [[UV5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32), [[FSUB2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fsub_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -279,6 +292,7 @@ body: | ; VI-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[UV2]], [[UV5]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32), [[FSUB2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fsub_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -316,6 +330,7 @@ body: | ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_fsub_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: {{ $}} @@ -329,6 +344,7 @@ body: | ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_fsub_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -383,6 +399,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_fsub_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -406,6 +423,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fsub_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -476,13 +494,13 @@ body: | ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_fsub_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -522,13 +540,13 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_fsub_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -638,6 +656,7 @@ body: | ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fsub_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -678,6 +697,7 @@ body: | ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fsub_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir index a166c2d45abbca..a655f47a8969ea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir @@ -18,6 +18,7 @@ body: | ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX7-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; ; GFX8-LABEL: name: test_icmp_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -26,6 +27,7 @@ body: | ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; ; GFX9-LABEL: name: test_icmp_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -54,6 +56,7 @@ body: | ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s64), [[COPY]] ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX8-LABEL: name: test_icmp_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -62,6 +65,7 @@ body: | ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s64), [[COPY]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX9-LABEL: name: test_icmp_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -95,6 +99,7 @@ body: | ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[TRUNC]] ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: test_icmp_s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -105,6 +110,7 @@ body: | ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[TRUNC]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_icmp_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -135,37 +141,36 @@ body: | ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[AND]] ; GFX7-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[TRUNC]] ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: test_icmp_s8 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[AND]] ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[TRUNC]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_icmp_s8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[AND]] ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[TRUNC]] @@ -191,31 +196,30 @@ body: | ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[AND]] ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX7-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; ; GFX8-LABEL: name: test_icmp_s24 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[AND]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; ; GFX9-LABEL: name: test_icmp_s24 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[AND]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT]](s32) %0:_(s24) = G_CONSTANT i24 0 @@ -247,6 +251,7 @@ body: | ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_icmp_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -262,6 +267,7 @@ body: | ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_icmp_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -270,8 +276,8 @@ body: | ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV]] ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] @@ -310,6 +316,7 @@ body: | ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C]] ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX8-LABEL: name: test_icmp_v3s32 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -329,6 +336,7 @@ body: | ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_icmp_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -339,8 +347,8 @@ body: | ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] @@ -384,6 +392,7 @@ body: | ; GFX7-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C]] ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX8-LABEL: name: test_icmp_v4s32 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -407,6 +416,7 @@ body: | ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_icmp_v4s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -419,8 +429,8 @@ body: | ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV5]] ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV6]] ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV3]](s32), [[UV7]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] @@ -452,6 +462,7 @@ body: | ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[COPY1]] ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX8-LABEL: name: test_icmp_p0 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -460,6 +471,7 @@ body: | ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[COPY1]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_icmp_p0 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -488,6 +500,7 @@ body: | ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p1), [[COPY1]] ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX8-LABEL: name: test_icmp_p1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -496,6 +509,7 @@ body: | ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p1), [[COPY1]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_icmp_p1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -525,6 +539,7 @@ body: | ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p2), [[COPY1]] ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX8-LABEL: name: test_icmp_p2 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -533,6 +548,7 @@ body: | ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p2), [[COPY1]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_icmp_p2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -562,6 +578,7 @@ body: | ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[COPY1]] ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX8-LABEL: name: test_icmp_p3 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -570,6 +587,7 @@ body: | ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[COPY1]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_icmp_p3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -598,6 +616,7 @@ body: | ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p4), [[COPY1]] ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX8-LABEL: name: test_icmp_p4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -606,6 +625,7 @@ body: | ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p4), [[COPY1]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_icmp_p4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -635,6 +655,7 @@ body: | ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[COPY1]] ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX8-LABEL: name: test_icmp_p5 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -643,6 +664,7 @@ body: | ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[COPY1]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_icmp_p5 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -671,6 +693,7 @@ body: | ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p999), [[COPY1]] ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX8-LABEL: name: test_icmp_p999 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -679,6 +702,7 @@ body: | ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p999), [[COPY1]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_icmp_p999 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -714,6 +738,7 @@ body: | ; GFX7-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_icmp_v2p3 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -729,6 +754,7 @@ body: | ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_icmp_v2p3 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -771,6 +797,7 @@ body: | ; GFX7-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_icmp_v2p999 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -786,6 +813,7 @@ body: | ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_icmp_v2p999 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -829,15 +857,14 @@ body: | ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND]](s32), [[AND1]] - ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX7-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND2]](s32), [[AND3]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LSHR]](s32), [[LSHR1]] ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] ; GFX7-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV1]], [[UV3]] ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_icmp_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} @@ -862,6 +889,7 @@ body: | ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV1]], [[UV3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_icmp_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -904,25 +932,21 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[COPY1]] + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s64), [[C]] ; GFX7-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) + ; ; GFX8-LABEL: name: test_icmp_s33 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[COPY1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s64), [[C]] ; GFX8-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) + ; ; GFX9-LABEL: name: test_icmp_s33 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[COPY1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s64), [[C]] ; GFX9-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s33) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir index ff2e6296aa4b2c..b9edfbfa6d0a95 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -441,14 +441,12 @@ body: | ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) @@ -490,19 +488,16 @@ body: | ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[UV6]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir index 90c2b0a69b1f8f..d8d0f9b9cd898d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir @@ -890,8 +890,7 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -977,14 +976,12 @@ body: | ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) @@ -1023,8 +1020,7 @@ body: | ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) @@ -1057,8 +1053,7 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) @@ -1094,14 +1089,12 @@ body: | ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) @@ -1139,10 +1132,8 @@ body: | ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) @@ -1175,14 +1166,12 @@ body: | ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) @@ -1219,10 +1208,8 @@ body: | ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) @@ -1251,8 +1238,7 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[UV1]](<2 x s16>) @@ -1308,8 +1294,7 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) @@ -1365,8 +1350,7 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[UV1]](<2 x s16>) @@ -1400,10 +1384,8 @@ body: | ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) @@ -1429,8 +1411,7 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) @@ -1459,8 +1440,7 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST2]](<2 x s16>) @@ -1495,10 +1475,9 @@ body: | ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) @@ -1524,8 +1503,7 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[UV1]](<2 x s16>) @@ -1558,10 +1536,8 @@ body: | ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) @@ -1586,8 +1562,7 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST]](<2 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir index b97d04e809a6f7..79c726fc800493 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir @@ -811,10 +811,9 @@ body: | ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) ; GFX6-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C6]] - ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C6]] - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; GFX6-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL2]] + ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C6]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; GFX6-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) @@ -879,10 +878,9 @@ body: | ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) ; GFX8-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C6]] - ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C6]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; GFX8-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL2]] + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C6]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; GFX8-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll index 29bf366d13587f..155cc06a43ea35 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll @@ -235,14 +235,13 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX81-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX81-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; GFX81-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX81-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX81-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; GFX81-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX81-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX81-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; GFX81-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; GFX81-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX81-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; GFX81-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX81-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; GFX81-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX81-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] ; GFX81-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir index a5a58bb01df958..420c55f8f6da21 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir @@ -165,30 +165,24 @@ body: | ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GCN-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; GCN-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C2]](s32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]] ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GCN-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C4]] - ; GCN-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GCN-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; GCN-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GCN-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C4]] + ; GCN-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; GCN-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GCN-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GCN-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]] - ; GCN-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] - ; GCN-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) - ; GCN-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GCN-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C2]](s32) + ; GCN-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL3]] ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; GCN-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C4]] - ; GCN-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] - ; GCN-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) - ; GCN-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; GCN-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C4]] + ; GCN-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] + ; GCN-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; GCN-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] ; GCN-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GCN-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]] - ; GCN-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]] - ; GCN-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) - ; GCN-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; GCN-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR8]], [[C2]](s32) + ; GCN-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR7]], [[SHL5]] ; GCN-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GCN-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<12 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir index 2ce09043b15b64..a63df136e003c3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -19,6 +19,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_constant_s1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -27,6 +28,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -55,6 +57,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_constant_s2_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -63,6 +66,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s2_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -89,12 +93,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -119,12 +125,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -149,12 +157,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -179,12 +189,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -215,6 +227,7 @@ body: | ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_constant_s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -227,6 +240,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -257,12 +271,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -292,6 +308,7 @@ body: | ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_constant_s32_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -304,6 +321,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s32_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -349,6 +367,7 @@ body: | ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_constant_s32_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -371,6 +390,7 @@ body: | ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s32_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -410,12 +430,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), align 8, addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_s24_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), align 8, addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s24_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -440,12 +462,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_s24_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s24_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -476,6 +500,7 @@ body: | ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_constant_s24_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -488,6 +513,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s24_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -530,6 +556,7 @@ body: | ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; VI-LABEL: name: test_load_constant_s24_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -548,6 +575,7 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s24_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -586,6 +614,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; CI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] ; CI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; ; VI-LABEL: name: test_load_constant_s48_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -594,6 +623,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; ; GFX9-LABEL: name: test_load_constant_s48_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -620,12 +650,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; VI-LABEL: name: test_load_constant_s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-LABEL: name: test_load_constant_s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -649,12 +681,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), align 4, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; VI-LABEL: name: test_load_constant_s64_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), align 4, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-LABEL: name: test_load_constant_s64_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -696,6 +730,7 @@ body: | ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; CI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; VI-LABEL: name: test_load_constant_s64_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -720,6 +755,7 @@ body: | ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; GFX9-LABEL: name: test_load_constant_s64_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -797,6 +833,7 @@ body: | ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; CI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; VI-LABEL: name: test_load_constant_s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -839,6 +876,7 @@ body: | ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; GFX9-LABEL: name: test_load_constant_s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -899,6 +937,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_constant_s96_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -906,6 +945,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_constant_s96_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -931,6 +971,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 8, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_constant_s96_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -938,6 +979,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 8, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_constant_s96_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -963,6 +1005,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_constant_s96_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -970,6 +1013,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_constant_s96_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1016,6 +1060,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_constant_s96_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1044,6 +1089,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_constant_s96_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1137,6 +1183,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_constant_s96_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1191,6 +1238,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_constant_s96_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1268,6 +1316,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; CI-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; ; VI-LABEL: name: test_load_constant_s160_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1280,6 +1329,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; VI-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; ; GFX9-LABEL: name: test_load_constant_s160_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1318,6 +1368,7 @@ body: | ; CI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; ; VI-LABEL: name: test_load_constant_s224_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1333,6 +1384,7 @@ body: | ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; ; GFX9-LABEL: name: test_load_constant_s224_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1369,6 +1421,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_constant_s128_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1376,6 +1429,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_constant_s128_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1401,6 +1455,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_constant_s128_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1408,6 +1463,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_constant_s128_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1495,6 +1551,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_constant_s128_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1564,6 +1621,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_constant_s128_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1651,6 +1709,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), align 16, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; ; VI-LABEL: name: test_load_constant_s256_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1658,6 +1717,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), align 16, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; ; GFX9-LABEL: name: test_load_constant_s256_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1682,12 +1742,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; VI-LABEL: name: test_load_constant_p1_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-LABEL: name: test_load_constant_p1_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1711,12 +1773,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), align 4, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; VI-LABEL: name: test_load_constant_p1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), align 4, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-LABEL: name: test_load_constant_p1_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1777,6 +1841,7 @@ body: | ; CI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; VI-LABEL: name: test_load_constant_p1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1820,6 +1885,7 @@ body: | ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX9-LABEL: name: test_load_constant_p1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1880,12 +1946,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load (p3), addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; VI-LABEL: name: test_load_constant_p3_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load (p3), addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX9-LABEL: name: test_load_constant_p3_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1909,12 +1977,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; VI-LABEL: name: test_load_constant_p4_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; GFX9-LABEL: name: test_load_constant_p4_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1938,12 +2008,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), align 4, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; VI-LABEL: name: test_load_constant_p4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), align 4, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; GFX9-LABEL: name: test_load_constant_p4_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1986,6 +2058,7 @@ body: | ; CI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; VI-LABEL: name: test_load_constant_p4_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2011,6 +2084,7 @@ body: | ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; GFX9-LABEL: name: test_load_constant_p4_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2090,6 +2164,7 @@ body: | ; CI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; VI-LABEL: name: test_load_constant_p4_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2133,6 +2208,7 @@ body: | ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; GFX9-LABEL: name: test_load_constant_p4_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2193,12 +2269,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load (p5), addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; VI-LABEL: name: test_load_constant_p5_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load (p5), addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX9-LABEL: name: test_load_constant_p5_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2229,6 +2307,7 @@ body: | ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_constant_p5_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2242,6 +2321,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-LABEL: name: test_load_constant_p5_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2289,6 +2369,7 @@ body: | ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_constant_p5_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2312,6 +2393,7 @@ body: | ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-LABEL: name: test_load_constant_p5_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2352,12 +2434,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_v2s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_v2s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2383,12 +2467,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_v2s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_v2s8_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2420,6 +2506,7 @@ body: | ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_constant_v2s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2432,6 +2519,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_constant_v2s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2467,8 +2555,8 @@ body: | ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -2488,6 +2576,7 @@ body: | ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_constant_v3s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2498,8 +2587,8 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -2517,6 +2606,7 @@ body: | ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_constant_v3s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2527,8 +2617,8 @@ body: | ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -2579,8 +2669,8 @@ body: | ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -2600,6 +2690,7 @@ body: | ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; CI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; VI-LABEL: name: test_load_constant_v3s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2620,8 +2711,8 @@ body: | ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -2639,6 +2730,7 @@ body: | ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; VI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX9-LABEL: name: test_load_constant_v3s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2659,8 +2751,8 @@ body: | ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -2697,12 +2789,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_v4s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_v4s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2733,6 +2827,7 @@ body: | ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_constant_v4s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2745,6 +2840,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_constant_v4s8_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2791,6 +2887,7 @@ body: | ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_constant_v4s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2813,6 +2910,7 @@ body: | ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_constant_v4s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2853,12 +2951,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_load_constant_v8s8_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v8s8_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2908,6 +3008,7 @@ body: | ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) + ; ; VI-LABEL: name: test_load_constant_v16s8_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2939,6 +3040,7 @@ body: | ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) + ; ; GFX9-LABEL: name: test_load_constant_v16s8_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3056,6 +3158,7 @@ body: | ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x s32>) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) + ; ; VI-LABEL: name: test_load_constant_v32s8_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3107,6 +3210,7 @@ body: | ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x s32>) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) + ; ; GFX9-LABEL: name: test_load_constant_v32s8_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3224,12 +3328,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load (<2 x s16>), addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; VI-LABEL: name: test_load_constant_v2s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load (<2 x s16>), addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v2s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3263,6 +3369,7 @@ body: | ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_constant_v2s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3279,6 +3386,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v2s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3328,6 +3436,7 @@ body: | ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_constant_v2s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3354,6 +3463,7 @@ body: | ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v2s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3406,13 +3516,13 @@ body: | ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_constant_v3s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3432,13 +3542,13 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v3s16_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3501,13 +3611,13 @@ body: | ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_constant_v3s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3536,13 +3646,13 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v3s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3612,13 +3722,13 @@ body: | ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_constant_v3s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3647,13 +3757,13 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v3s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3737,13 +3847,13 @@ body: | ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_constant_v3s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3786,13 +3896,13 @@ body: | ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v3s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3854,12 +3964,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; VI-LABEL: name: test_load_constant_v4s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v4s16_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3883,12 +3995,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), align 4, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; VI-LABEL: name: test_load_constant_v4s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), align 4, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v4s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3934,6 +4048,7 @@ body: | ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_constant_v4s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3962,6 +4077,7 @@ body: | ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v4s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4041,6 +4157,7 @@ body: | ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_constant_v4s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4087,6 +4204,7 @@ body: | ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v4s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4145,6 +4263,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; VI-LABEL: name: test_load_constant_v8s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4152,6 +4271,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v8s16_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4176,12 +4296,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_load_constant_v2s32_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v2s32_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4205,12 +4327,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_load_constant_v2s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v2s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4248,6 +4372,7 @@ body: | ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_constant_v2s32_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4268,6 +4393,7 @@ body: | ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v2s32_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4337,6 +4463,7 @@ body: | ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_constant_v2s32_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4375,6 +4502,7 @@ body: | ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v2s32_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4430,12 +4558,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; VI-LABEL: name: test_load_constant_v3s32_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v3s32_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4461,12 +4591,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; VI-LABEL: name: test_load_constant_v3s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v3s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4490,12 +4622,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_load_constant_v4s32_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v4s32_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4519,12 +4653,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_load_constant_v4s32_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v4s32_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4548,12 +4684,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_load_constant_v4s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v4s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4577,12 +4715,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; VI-LABEL: name: test_load_constant_v8s32_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v8s32_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4606,12 +4746,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; ; VI-LABEL: name: test_load_constant_v16s32_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v16s32_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4635,12 +4777,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; ; VI-LABEL: name: test_load_constant_v16s32_align32_extload_from_v16s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v16s32_align32_extload_from_v16s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4664,12 +4808,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; VI-LABEL: name: test_load_constant_v2s64_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v2s64_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4693,12 +4839,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; VI-LABEL: name: test_load_constant_v2s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v2s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4722,12 +4870,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 4, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; VI-LABEL: name: test_load_constant_v2s64_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 4, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v2s64_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4788,6 +4938,7 @@ body: | ; CI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_constant_v2s64_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4831,6 +4982,7 @@ body: | ; VI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v2s64_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4962,6 +5114,7 @@ body: | ; CI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_constant_v2s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5039,6 +5192,7 @@ body: | ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v2s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5137,6 +5291,7 @@ body: | ; CI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_constant_v3s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5147,6 +5302,7 @@ body: | ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v3s64_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5183,6 +5339,7 @@ body: | ; CI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_constant_v3s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5196,6 +5353,7 @@ body: | ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v3s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5335,6 +5493,7 @@ body: | ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_constant_v3s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5448,6 +5607,7 @@ body: | ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v3s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5580,12 +5740,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; VI-LABEL: name: test_load_constant_v4s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v4s64_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5609,12 +5771,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), align 8, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; VI-LABEL: name: test_load_constant_v4s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), align 8, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v4s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5777,6 +5941,7 @@ body: | ; CI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_constant_v4s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5922,6 +6087,7 @@ body: | ; VI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v4s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6085,6 +6251,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; ; VI-LABEL: name: test_load_constant_v2s128_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6092,6 +6259,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; ; GFX9-LABEL: name: test_load_constant_v2s128_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6117,6 +6285,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_constant_v2p1_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6124,6 +6293,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_constant_v2p1_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6149,6 +6319,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_constant_v2p1_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6156,6 +6327,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_constant_v2p1_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6181,6 +6353,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_constant_v2p1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6188,6 +6361,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_constant_v2p1_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6275,6 +6449,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_constant_v2p1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6344,6 +6519,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_constant_v2p1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6430,12 +6606,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; VI-LABEL: name: test_load_constant_v2p3_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX9-LABEL: name: test_load_constant_v2p3_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6459,12 +6637,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; VI-LABEL: name: test_load_constant_v2p3_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX9-LABEL: name: test_load_constant_v2p3_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6522,6 +6702,7 @@ body: | ; CI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; VI-LABEL: name: test_load_constant_v2p3_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6562,6 +6743,7 @@ body: | ; VI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; GFX9-LABEL: name: test_load_constant_v2p3_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6619,12 +6801,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_ext_load_constant_s32_from_1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_ext_load_constant_s32_from_1_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6648,12 +6832,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_ext_load_constant_s32_from_2_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_ext_load_constant_s32_from_2_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6679,6 +6865,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_constant_s64_from_1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6686,6 +6873,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_constant_s64_from_1_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6711,6 +6899,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_constant_s64_from_2_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6718,6 +6907,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_constant_s64_from_2_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6743,6 +6933,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_constant_s64_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6750,6 +6941,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_constant_s64_from_4_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6778,6 +6970,7 @@ body: | ; CI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; VI-LABEL: name: test_ext_load_constant_s128_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6788,6 +6981,7 @@ body: | ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX9-LABEL: name: test_ext_load_constant_s128_from_4_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6816,6 +7010,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_constant_s64_from_2_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6823,6 +7018,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_constant_s64_from_2_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6848,6 +7044,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_constant_s64_from_1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6855,6 +7052,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_constant_s64_from_1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6911,6 +7109,7 @@ body: | ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_constant_v2s32_from_4_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6949,6 +7148,7 @@ body: | ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_extload_constant_v2s32_from_4_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7018,6 +7218,7 @@ body: | ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_constant_v2s32_from_4_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7038,6 +7239,7 @@ body: | ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_extload_constant_v2s32_from_4_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7075,12 +7277,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 1) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_constant_v2s32_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_extload_constant_v2s32_from_4_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7104,12 +7308,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; VI-LABEL: name: test_extload_constant_v3s32_from_6_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-LABEL: name: test_extload_constant_v3s32_from_6_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7133,12 +7339,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 1) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_extload_constant_v4s32_from_8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-LABEL: name: test_extload_constant_v4s32_from_8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7258,6 +7466,7 @@ body: | ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7360,6 +7569,7 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7528,6 +7738,7 @@ body: | ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7580,6 +7791,7 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7660,6 +7872,7 @@ body: | ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7674,6 +7887,7 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7716,6 +7930,7 @@ body: | ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7730,6 +7945,7 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7765,6 +7981,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s512) = G_BITCAST [[LOAD]](<16 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](s512) + ; ; VI-LABEL: name: test_load_constant_s512_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7772,6 +7989,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s512) = G_BITCAST [[LOAD]](<16 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](s512) + ; ; GFX9-LABEL: name: test_load_constant_s512_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7797,6 +8015,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>) + ; ; VI-LABEL: name: test_load_constant_v4s128_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7804,6 +8023,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>) + ; ; GFX9-LABEL: name: test_load_constant_v4s128_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir index d8600768ebb6bd..324839482976e9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -19,6 +19,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_flat_s1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -27,6 +28,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -55,6 +57,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_flat_s2_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -63,6 +66,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s2_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -89,12 +93,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_flat_s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -119,12 +125,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_flat_s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -149,12 +157,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_flat_s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -179,12 +189,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_flat_s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -215,6 +227,7 @@ body: | ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_flat_s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -227,6 +240,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -257,12 +271,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_flat_s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -292,6 +308,7 @@ body: | ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_flat_s32_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -304,6 +321,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s32_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -349,6 +367,7 @@ body: | ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_flat_s32_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -371,6 +390,7 @@ body: | ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s32_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -416,17 +436,17 @@ body: | ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C4]] - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[AND3]](s64) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C4]] + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[AND2]](s64) + ; ; VI-LABEL: name: test_load_flat_s48_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -439,17 +459,17 @@ body: | ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C4]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[AND3]](s64) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C4]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[AND2]](s64) + ; ; GFX9-LABEL: name: test_load_flat_s48_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -480,6 +500,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; VI-LABEL: name: test_load_flat_s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -490,6 +511,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_load_flat_s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -517,6 +539,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; VI-LABEL: name: test_load_flat_s64_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -527,6 +550,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_load_flat_s64_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -564,6 +588,7 @@ body: | ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; VI-LABEL: name: test_load_flat_s64_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -584,6 +609,7 @@ body: | ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_load_flat_s64_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -657,6 +683,7 @@ body: | ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; VI-LABEL: name: test_load_flat_s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -695,6 +722,7 @@ body: | ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_load_flat_s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -762,6 +790,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_flat_s96_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -776,6 +805,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_flat_s96_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -808,6 +838,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_flat_s96_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -822,6 +853,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_flat_s96_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -854,6 +886,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_flat_s96_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -868,6 +901,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_flat_s96_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -914,6 +948,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_flat_s96_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -942,6 +977,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_flat_s96_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1035,6 +1071,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_flat_s96_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1089,6 +1126,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_flat_s96_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1174,6 +1212,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; CI-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; ; VI-LABEL: name: test_load_flat_s160_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1194,6 +1233,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; VI-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; ; GFX9-LABEL: name: test_load_flat_s160_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1245,6 +1285,7 @@ body: | ; CI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; ; VI-LABEL: name: test_load_flat_s224_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1273,6 +1314,7 @@ body: | ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; ; GFX9-LABEL: name: test_load_flat_s224_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1319,6 +1361,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_flat_s128_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1336,6 +1379,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_flat_s128_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1371,6 +1415,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_flat_s128_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1388,6 +1433,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_flat_s128_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1475,6 +1521,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_flat_s128_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1544,6 +1591,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_flat_s128_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1653,6 +1701,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; ; VI-LABEL: name: test_load_flat_s256_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1682,6 +1731,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; ; GFX9-LABEL: name: test_load_flat_s256_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1714,6 +1764,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; VI-LABEL: name: test_load_flat_p1_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1724,6 +1775,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX9-LABEL: name: test_load_flat_p1_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1751,6 +1803,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; VI-LABEL: name: test_load_flat_p1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1761,6 +1814,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX9-LABEL: name: test_load_flat_p1_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1816,6 +1870,7 @@ body: | ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; VI-LABEL: name: test_load_flat_p1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1854,6 +1909,7 @@ body: | ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX9-LABEL: name: test_load_flat_p1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1914,12 +1970,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3)) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; VI-LABEL: name: test_load_flat_p3_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3)) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX9-LABEL: name: test_load_flat_p3_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1947,6 +2005,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) + ; ; VI-LABEL: name: test_load_flat_p4_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1957,6 +2016,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) + ; ; GFX9-LABEL: name: test_load_flat_p4_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1984,6 +2044,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) + ; ; VI-LABEL: name: test_load_flat_p4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1994,6 +2055,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) + ; ; GFX9-LABEL: name: test_load_flat_p4_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2031,6 +2093,7 @@ body: | ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) + ; ; VI-LABEL: name: test_load_flat_p4_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2051,6 +2114,7 @@ body: | ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) + ; ; GFX9-LABEL: name: test_load_flat_p4_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2125,6 +2189,7 @@ body: | ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) + ; ; VI-LABEL: name: test_load_flat_p4_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2163,6 +2228,7 @@ body: | ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) + ; ; GFX9-LABEL: name: test_load_flat_p4_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2223,12 +2289,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5)) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; VI-LABEL: name: test_load_flat_p5_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5)) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX9-LABEL: name: test_load_flat_p5_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2259,6 +2327,7 @@ body: | ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_flat_p5_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2272,6 +2341,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-LABEL: name: test_load_flat_p5_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2319,6 +2389,7 @@ body: | ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_flat_p5_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2342,6 +2413,7 @@ body: | ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-LABEL: name: test_load_flat_p5_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2382,12 +2454,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_flat_v2s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_flat_v2s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2413,12 +2487,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_flat_v2s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_flat_v2s8_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2450,6 +2526,7 @@ body: | ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_flat_v2s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2462,6 +2539,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_flat_v2s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2497,8 +2575,8 @@ body: | ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -2518,6 +2596,7 @@ body: | ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_flat_v3s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2528,8 +2607,8 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -2547,6 +2626,7 @@ body: | ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_flat_v3s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2557,8 +2637,8 @@ body: | ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -2609,8 +2689,8 @@ body: | ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -2630,6 +2710,7 @@ body: | ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; CI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; VI-LABEL: name: test_load_flat_v3s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2650,8 +2731,8 @@ body: | ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -2669,6 +2750,7 @@ body: | ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; VI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX9-LABEL: name: test_load_flat_v3s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2689,8 +2771,8 @@ body: | ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -2727,12 +2809,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_flat_v4s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_flat_v4s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2763,6 +2847,7 @@ body: | ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_flat_v4s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2775,6 +2860,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_flat_v4s8_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2821,6 +2907,7 @@ body: | ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_flat_v4s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2843,6 +2930,7 @@ body: | ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_flat_v4s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2887,6 +2975,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_flat_v8s8_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2897,6 +2986,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v8s8_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2931,6 +3021,7 @@ body: | ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_flat_v16s8_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2947,6 +3038,7 @@ body: | ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v16s8_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2993,6 +3085,7 @@ body: | ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; ; VI-LABEL: name: test_load_flat_v32s8_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3021,6 +3114,7 @@ body: | ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v32s8_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3050,12 +3144,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>)) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; VI-LABEL: name: test_load_flat_v2s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>)) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v2s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3089,6 +3185,7 @@ body: | ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_flat_v2s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3105,6 +3202,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v2s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3154,6 +3252,7 @@ body: | ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_flat_v2s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3180,6 +3279,7 @@ body: | ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v2s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3231,22 +3331,21 @@ body: | ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_flat_v3s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3265,22 +3364,21 @@ body: | ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v3s16_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3333,22 +3431,21 @@ body: | ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_flat_v3s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3367,22 +3464,21 @@ body: | ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v3s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3452,13 +3548,13 @@ body: | ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_flat_v3s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3487,13 +3583,13 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v3s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3577,13 +3673,13 @@ body: | ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_flat_v3s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3626,13 +3722,13 @@ body: | ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v3s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3698,6 +3794,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s16>) from unknown-address + 4) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_flat_v4s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3708,6 +3805,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s16>) from unknown-address + 4) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v4s16_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3735,6 +3833,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s16>) from unknown-address + 4) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_flat_v4s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3745,6 +3844,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s16>) from unknown-address + 4) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v4s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3789,6 +3889,7 @@ body: | ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_flat_v4s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3816,6 +3917,7 @@ body: | ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v4s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3894,6 +3996,7 @@ body: | ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_flat_v4s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3939,6 +4042,7 @@ body: | ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v4s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4007,6 +4111,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; VI-LABEL: name: test_load_flat_v8s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4024,6 +4129,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v8s16_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4052,6 +4158,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_flat_v2s32_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4062,6 +4169,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v2s32_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4089,6 +4197,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_flat_v2s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4099,6 +4208,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v2s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4127,6 +4237,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_flat_v2s32_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4137,6 +4248,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v2s32_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4167,6 +4279,7 @@ body: | ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_load_flat_v3s32_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4180,6 +4293,7 @@ body: | ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v3s32_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4212,6 +4326,7 @@ body: | ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_load_flat_v3s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4225,6 +4340,7 @@ body: | ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v3s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4258,6 +4374,7 @@ body: | ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_flat_v4s32_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4274,6 +4391,7 @@ body: | ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v4s32_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4307,6 +4425,7 @@ body: | ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_flat_v4s32_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4323,6 +4442,7 @@ body: | ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v4s32_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4356,6 +4476,7 @@ body: | ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_flat_v4s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4372,6 +4493,7 @@ body: | ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v4s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4417,6 +4539,7 @@ body: | ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; ; VI-LABEL: name: test_load_flat_v8s32_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4445,6 +4568,7 @@ body: | ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v8s32_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4518,6 +4642,7 @@ body: | ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s32) from unknown-address + 60) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; ; VI-LABEL: name: test_load_flat_v16s32_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4570,6 +4695,7 @@ body: | ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s32) from unknown-address + 60) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v16s32_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4614,6 +4740,7 @@ body: | ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_flat_v2s64_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4631,6 +4758,7 @@ body: | ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v2s64_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4665,6 +4793,7 @@ body: | ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_flat_v2s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4682,6 +4811,7 @@ body: | ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v2s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4716,6 +4846,7 @@ body: | ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_flat_v2s64_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4733,6 +4864,7 @@ body: | ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v2s64_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4785,6 +4917,7 @@ body: | ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_flat_v2s64_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4820,6 +4953,7 @@ body: | ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v2s64_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4943,6 +5077,7 @@ body: | ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_flat_v2s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5012,6 +5147,7 @@ body: | ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v2s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5125,6 +5261,7 @@ body: | ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_flat_v3s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5150,6 +5287,7 @@ body: | ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v3s64_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5201,6 +5339,7 @@ body: | ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_flat_v3s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5226,6 +5365,7 @@ body: | ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v3s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5353,6 +5493,7 @@ body: | ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_flat_v3s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5454,6 +5595,7 @@ body: | ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v3s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5609,6 +5751,7 @@ body: | ; CI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_flat_v4s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5638,6 +5781,7 @@ body: | ; VI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v4s64_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5688,6 +5832,7 @@ body: | ; CI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_flat_v4s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5717,6 +5862,7 @@ body: | ; VI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v4s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5867,6 +6013,7 @@ body: | ; CI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR20]](s32), [[OR23]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_flat_v4s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5996,6 +6143,7 @@ body: | ; VI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR20]](s32), [[OR23]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v4s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6182,6 +6330,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; ; VI-LABEL: name: test_load_flat_v2s128_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6211,6 +6360,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; ; GFX9-LABEL: name: test_load_flat_v2s128_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6250,6 +6400,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_flat_v2p1_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6267,6 +6418,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_flat_v2p1_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6302,6 +6454,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_flat_v2p1_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6319,6 +6472,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_flat_v2p1_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6354,6 +6508,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_flat_v2p1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6371,6 +6526,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_flat_v2p1_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6458,6 +6614,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_flat_v2p1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6527,6 +6684,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_flat_v2p1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6617,6 +6775,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p0) :: (load (p3) from unknown-address + 4) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; VI-LABEL: name: test_load_flat_v2p3_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6627,6 +6786,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p0) :: (load (p3) from unknown-address + 4) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; GFX9-LABEL: name: test_load_flat_v2p3_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6654,6 +6814,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p0) :: (load (p3) from unknown-address + 4) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; VI-LABEL: name: test_load_flat_v2p3_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6664,6 +6825,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p0) :: (load (p3) from unknown-address + 4) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; GFX9-LABEL: name: test_load_flat_v2p3_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6721,6 +6883,7 @@ body: | ; CI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; VI-LABEL: name: test_load_flat_v2p3_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6761,6 +6924,7 @@ body: | ; VI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; GFX9-LABEL: name: test_load_flat_v2p3_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6818,12 +6982,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_ext_load_flat_s32_from_1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_ext_load_flat_s32_from_1_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6847,12 +7013,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_ext_load_flat_s32_from_2_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_ext_load_flat_s32_from_2_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6878,6 +7046,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_flat_s64_from_1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6885,6 +7054,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_flat_s64_from_1_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6910,6 +7080,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_flat_s64_from_2_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6917,6 +7088,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_flat_s64_from_2_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6942,6 +7114,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_flat_s64_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6949,6 +7122,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_flat_s64_from_4_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6977,6 +7151,7 @@ body: | ; CI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; VI-LABEL: name: test_ext_load_flat_s128_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6987,6 +7162,7 @@ body: | ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX9-LABEL: name: test_ext_load_flat_s128_from_4_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7015,6 +7191,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_flat_s64_from_2_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7022,6 +7199,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_flat_s64_from_2_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7047,6 +7225,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_flat_s64_from_1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7054,6 +7233,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_flat_s64_from_1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7079,12 +7259,14 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_flat_s32_align536870912 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s32_align536870912 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir index fcbcfddef1580a..c0fb02fb184cf9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -39,6 +39,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s1_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -47,6 +48,7 @@ body: | ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-HSA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s1_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -55,6 +57,7 @@ body: | ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-MESA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_global_s1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -63,6 +66,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s1_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -71,6 +75,7 @@ body: | ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX9-HSA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s1_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -99,6 +104,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s2_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -107,6 +113,7 @@ body: | ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-HSA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s2_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -115,6 +122,7 @@ body: | ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-MESA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_global_s2_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -123,6 +131,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s2_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -131,6 +140,7 @@ body: | ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX9-HSA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s2_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -157,30 +167,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s8_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s8_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s8_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s8_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -205,30 +220,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s8_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s8_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s8_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s8_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -253,30 +273,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -301,30 +326,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -355,12 +385,14 @@ body: | ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -373,6 +405,7 @@ body: | ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_global_s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -385,12 +418,14 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -421,30 +456,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s32_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s32_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s32_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s32_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -474,12 +514,14 @@ body: | ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s32_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 2, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s32_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -492,6 +534,7 @@ body: | ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_global_s32_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -504,12 +547,14 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s32_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 2, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s32_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -555,12 +600,14 @@ body: | ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s32_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s32_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -583,6 +630,7 @@ body: | ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_global_s32_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -605,12 +653,14 @@ body: | ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s32_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s32_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -650,30 +700,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s24_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s24_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_s24_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s24_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s24_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -698,30 +753,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s24_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s24_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_s24_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s24_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s24_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -752,6 +812,7 @@ body: | ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s24_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -764,6 +825,7 @@ body: | ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s24_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -776,6 +838,7 @@ body: | ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_global_s24_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -788,6 +851,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s24_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -800,6 +864,7 @@ body: | ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s24_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -842,6 +907,7 @@ body: | ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; SI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s24_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -854,6 +920,7 @@ body: | ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s24_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -872,6 +939,7 @@ body: | ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; VI-LABEL: name: test_load_global_s24_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -890,6 +958,7 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s24_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -902,6 +971,7 @@ body: | ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s24_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -940,6 +1010,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; ; CI-HSA-LABEL: name: test_load_global_s48_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -948,6 +1019,7 @@ body: | ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; ; CI-MESA-LABEL: name: test_load_global_s48_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -956,6 +1028,7 @@ body: | ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; ; VI-LABEL: name: test_load_global_s48_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -964,6 +1037,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; ; GFX9-HSA-LABEL: name: test_load_global_s48_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -972,6 +1046,7 @@ body: | ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; ; GFX9-MESA-LABEL: name: test_load_global_s48_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -998,30 +1073,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-HSA-LABEL: name: test_load_global_s64_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-MESA-LABEL: name: test_load_global_s64_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; VI-LABEL: name: test_load_global_s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-HSA-LABEL: name: test_load_global_s64_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-MESA-LABEL: name: test_load_global_s64_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -1045,30 +1125,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-HSA-LABEL: name: test_load_global_s64_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-MESA-LABEL: name: test_load_global_s64_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; VI-LABEL: name: test_load_global_s64_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-HSA-LABEL: name: test_load_global_s64_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-MESA-LABEL: name: test_load_global_s64_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -1110,12 +1195,14 @@ body: | ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; CI-HSA-LABEL: name: test_load_global_s64_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 2, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-MESA-LABEL: name: test_load_global_s64_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -1140,6 +1227,7 @@ body: | ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; VI-LABEL: name: test_load_global_s64_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1164,12 +1252,14 @@ body: | ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; GFX9-HSA-LABEL: name: test_load_global_s64_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 2, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-MESA-LABEL: name: test_load_global_s64_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -1247,12 +1337,14 @@ body: | ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; SI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; CI-HSA-LABEL: name: test_load_global_s64_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-MESA-LABEL: name: test_load_global_s64_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -1295,6 +1387,7 @@ body: | ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; VI-LABEL: name: test_load_global_s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1337,12 +1430,14 @@ body: | ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; GFX9-HSA-LABEL: name: test_load_global_s64_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-MESA-LABEL: name: test_load_global_s64_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -1405,6 +1500,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-HSA-LABEL: name: test_load_global_s96_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -1412,6 +1508,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-MESA-LABEL: name: test_load_global_s96_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -1419,6 +1516,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_global_s96_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1426,6 +1524,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-HSA-LABEL: name: test_load_global_s96_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -1433,6 +1532,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-MESA-LABEL: name: test_load_global_s96_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -1463,6 +1563,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-HSA-LABEL: name: test_load_global_s96_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -1470,6 +1571,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-MESA-LABEL: name: test_load_global_s96_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -1477,6 +1579,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_global_s96_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1484,6 +1587,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-HSA-LABEL: name: test_load_global_s96_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -1491,6 +1595,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-MESA-LABEL: name: test_load_global_s96_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -1521,6 +1626,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-HSA-LABEL: name: test_load_global_s96_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -1528,6 +1634,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-MESA-LABEL: name: test_load_global_s96_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -1535,6 +1642,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_global_s96_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1542,6 +1650,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-HSA-LABEL: name: test_load_global_s96_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -1549,6 +1658,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-MESA-LABEL: name: test_load_global_s96_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -1595,6 +1705,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-HSA-LABEL: name: test_load_global_s96_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -1602,6 +1713,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-MESA-LABEL: name: test_load_global_s96_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -1630,6 +1742,7 @@ body: | ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_global_s96_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1658,6 +1771,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-HSA-LABEL: name: test_load_global_s96_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -1665,6 +1779,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-MESA-LABEL: name: test_load_global_s96_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -1758,6 +1873,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-HSA-LABEL: name: test_load_global_s96_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -1765,6 +1881,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-MESA-LABEL: name: test_load_global_s96_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -1819,6 +1936,7 @@ body: | ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_global_s96_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1873,6 +1991,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-HSA-LABEL: name: test_load_global_s96_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -1880,6 +1999,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-MESA-LABEL: name: test_load_global_s96_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -1957,6 +2077,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; SI-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; ; CI-HSA-LABEL: name: test_load_global_s160_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -1969,6 +2090,7 @@ body: | ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; CI-HSA-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; ; CI-MESA-LABEL: name: test_load_global_s160_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -1981,6 +2103,7 @@ body: | ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; CI-MESA-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; ; VI-LABEL: name: test_load_global_s160_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1993,6 +2116,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; VI-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; ; GFX9-HSA-LABEL: name: test_load_global_s160_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -2005,6 +2129,7 @@ body: | ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; GFX9-HSA-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; ; GFX9-MESA-LABEL: name: test_load_global_s160_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2046,6 +2171,7 @@ body: | ; SI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; SI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; ; CI-HSA-LABEL: name: test_load_global_s224_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -2061,6 +2187,7 @@ body: | ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; ; CI-MESA-LABEL: name: test_load_global_s224_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -2076,6 +2203,7 @@ body: | ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; ; VI-LABEL: name: test_load_global_s224_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2091,6 +2219,7 @@ body: | ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; ; GFX9-HSA-LABEL: name: test_load_global_s224_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -2106,6 +2235,7 @@ body: | ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; ; GFX9-MESA-LABEL: name: test_load_global_s224_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2142,6 +2272,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-HSA-LABEL: name: test_load_global_s128_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -2149,6 +2280,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-MESA-LABEL: name: test_load_global_s128_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -2156,6 +2288,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_global_s128_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2163,6 +2296,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-HSA-LABEL: name: test_load_global_s128_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -2170,6 +2304,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-MESA-LABEL: name: test_load_global_s128_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2195,6 +2330,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-HSA-LABEL: name: test_load_global_s128_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -2202,6 +2338,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-MESA-LABEL: name: test_load_global_s128_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -2209,6 +2346,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_global_s128_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2216,6 +2354,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-HSA-LABEL: name: test_load_global_s128_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -2223,6 +2362,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-MESA-LABEL: name: test_load_global_s128_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2310,6 +2450,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-HSA-LABEL: name: test_load_global_s128_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -2317,6 +2458,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-MESA-LABEL: name: test_load_global_s128_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -2386,6 +2528,7 @@ body: | ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_global_s128_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2455,6 +2598,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-HSA-LABEL: name: test_load_global_s128_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -2462,6 +2606,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-MESA-LABEL: name: test_load_global_s128_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2549,6 +2694,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; ; CI-HSA-LABEL: name: test_load_global_s256_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -2556,6 +2702,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; ; CI-MESA-LABEL: name: test_load_global_s256_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -2563,6 +2710,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; ; VI-LABEL: name: test_load_global_s256_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2570,6 +2718,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; ; GFX9-HSA-LABEL: name: test_load_global_s256_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -2577,6 +2726,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; ; GFX9-MESA-LABEL: name: test_load_global_s256_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2601,30 +2751,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; CI-HSA-LABEL: name: test_load_global_p1_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; CI-MESA-LABEL: name: test_load_global_p1_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; VI-LABEL: name: test_load_global_p1_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-HSA-LABEL: name: test_load_global_p1_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-MESA-LABEL: name: test_load_global_p1_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2648,30 +2803,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; CI-HSA-LABEL: name: test_load_global_p1_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; CI-MESA-LABEL: name: test_load_global_p1_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; VI-LABEL: name: test_load_global_p1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-HSA-LABEL: name: test_load_global_p1_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-MESA-LABEL: name: test_load_global_p1_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2732,12 +2892,14 @@ body: | ; SI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; CI-HSA-LABEL: name: test_load_global_p1_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; CI-MESA-LABEL: name: test_load_global_p1_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -2781,6 +2943,7 @@ body: | ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; VI-LABEL: name: test_load_global_p1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2824,12 +2987,14 @@ body: | ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX9-HSA-LABEL: name: test_load_global_p1_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-MESA-LABEL: name: test_load_global_p1_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2890,30 +3055,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; CI-HSA-LABEL: name: test_load_global_p3_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; CI-MESA-LABEL: name: test_load_global_p3_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; VI-LABEL: name: test_load_global_p3_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX9-HSA-LABEL: name: test_load_global_p3_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX9-MESA-LABEL: name: test_load_global_p3_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2937,30 +3107,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; CI-HSA-LABEL: name: test_load_global_p4_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; CI-MESA-LABEL: name: test_load_global_p4_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; VI-LABEL: name: test_load_global_p4_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; GFX9-HSA-LABEL: name: test_load_global_p4_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; GFX9-MESA-LABEL: name: test_load_global_p4_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2984,30 +3159,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; CI-HSA-LABEL: name: test_load_global_p4_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; CI-MESA-LABEL: name: test_load_global_p4_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; VI-LABEL: name: test_load_global_p4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; GFX9-HSA-LABEL: name: test_load_global_p4_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; GFX9-MESA-LABEL: name: test_load_global_p4_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3050,12 +3230,14 @@ body: | ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; CI-HSA-LABEL: name: test_load_global_p4_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 2, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; CI-MESA-LABEL: name: test_load_global_p4_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -3081,6 +3263,7 @@ body: | ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; VI-LABEL: name: test_load_global_p4_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3106,12 +3289,14 @@ body: | ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; GFX9-HSA-LABEL: name: test_load_global_p4_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 2, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; GFX9-MESA-LABEL: name: test_load_global_p4_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3191,12 +3376,14 @@ body: | ; SI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; CI-HSA-LABEL: name: test_load_global_p4_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; CI-MESA-LABEL: name: test_load_global_p4_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -3240,6 +3427,7 @@ body: | ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; VI-LABEL: name: test_load_global_p4_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3283,12 +3471,14 @@ body: | ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; GFX9-HSA-LABEL: name: test_load_global_p4_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; GFX9-MESA-LABEL: name: test_load_global_p4_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3349,30 +3539,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; CI-HSA-LABEL: name: test_load_global_p5_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; CI-MESA-LABEL: name: test_load_global_p5_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; VI-LABEL: name: test_load_global_p5_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX9-HSA-LABEL: name: test_load_global_p5_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX9-MESA-LABEL: name: test_load_global_p5_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3403,12 +3598,14 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; CI-HSA-LABEL: name: test_load_global_p5_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 2, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; CI-MESA-LABEL: name: test_load_global_p5_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -3422,6 +3619,7 @@ body: | ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_global_p5_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3435,12 +3633,14 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-HSA-LABEL: name: test_load_global_p5_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 2, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX9-MESA-LABEL: name: test_load_global_p5_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3488,12 +3688,14 @@ body: | ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; CI-HSA-LABEL: name: test_load_global_p5_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; CI-MESA-LABEL: name: test_load_global_p5_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -3517,6 +3719,7 @@ body: | ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_global_p5_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3540,12 +3743,14 @@ body: | ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-HSA-LABEL: name: test_load_global_p5_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX9-MESA-LABEL: name: test_load_global_p5_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3586,30 +3791,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_v2s8_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_v2s8_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_v2s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3635,30 +3845,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_v2s8_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_v2s8_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_v2s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3690,12 +3905,14 @@ body: | ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_v2s8_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_v2s8_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -3708,6 +3925,7 @@ body: | ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_global_v2s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3720,12 +3938,14 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3761,8 +3981,8 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -3782,6 +4002,7 @@ body: | ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_v3s8_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -3792,8 +4013,8 @@ body: | ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -3813,6 +4034,7 @@ body: | ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; CI-HSA-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_v3s8_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -3823,8 +4045,8 @@ body: | ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -3844,6 +4066,7 @@ body: | ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_global_v3s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3854,8 +4077,8 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -3873,6 +4096,7 @@ body: | ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s8_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -3883,8 +4107,8 @@ body: | ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-HSA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -3902,6 +4126,7 @@ body: | ; GFX9-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX9-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s8_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3912,8 +4137,8 @@ body: | ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -3965,8 +4190,8 @@ body: | ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -3986,6 +4211,7 @@ body: | ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; SI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_v3s8_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -4001,8 +4227,8 @@ body: | ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -4022,6 +4248,7 @@ body: | ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] ; CI-HSA-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_v3s8_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -4042,8 +4269,8 @@ body: | ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -4063,6 +4290,7 @@ body: | ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; VI-LABEL: name: test_load_global_v3s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4083,8 +4311,8 @@ body: | ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -4102,6 +4330,7 @@ body: | ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; VI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s8_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -4117,8 +4346,8 @@ body: | ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-HSA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] @@ -4136,6 +4365,7 @@ body: | ; GFX9-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX9-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s8_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -4156,8 +4386,8 @@ body: | ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -4202,6 +4432,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s8_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -4216,6 +4447,7 @@ body: | ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s8_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -4230,6 +4462,7 @@ body: | ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; VI-LABEL: name: test_load_global_v4s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4244,6 +4477,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -4264,6 +4498,7 @@ body: | ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC4]](<4 x s8>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -4314,6 +4549,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s8_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -4328,6 +4564,7 @@ body: | ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s8_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -4347,6 +4584,7 @@ body: | ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; VI-LABEL: name: test_load_global_v4s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4366,6 +4604,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -4386,6 +4625,7 @@ body: | ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC4]](<4 x s8>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -4450,6 +4690,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s8_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -4464,6 +4705,7 @@ body: | ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s8_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -4492,6 +4734,7 @@ body: | ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; VI-LABEL: name: test_load_global_v4s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4520,6 +4763,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -4540,6 +4784,7 @@ body: | ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC4]](<4 x s8>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -4591,30 +4836,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v8s8_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v8s8_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_load_global_v8s8_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v8s8_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v8s8_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -4639,30 +4889,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v16s8_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v16s8_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_load_global_v16s8_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v16s8_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v16s8_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -4687,30 +4942,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v32s8_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v32s8_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; VI-LABEL: name: test_load_global_v32s8_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v32s8_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v32s8_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -4736,30 +4996,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v2s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -4793,12 +5058,14 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -4815,6 +5082,7 @@ body: | ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v2s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4831,12 +5099,14 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -4886,12 +5156,14 @@ body: | ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -4918,6 +5190,7 @@ body: | ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v2s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4944,12 +5217,14 @@ body: | ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -5002,13 +5277,13 @@ body: | ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v3s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -5028,13 +5303,13 @@ body: | ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v3s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -5054,13 +5329,13 @@ body: | ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_global_v3s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5080,13 +5355,13 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -5108,6 +5383,7 @@ body: | ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -5170,13 +5446,13 @@ body: | ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v3s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -5205,13 +5481,13 @@ body: | ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v3s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -5240,13 +5516,13 @@ body: | ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_global_v3s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5275,13 +5551,13 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -5310,6 +5586,7 @@ body: | ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -5379,13 +5656,13 @@ body: | ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v3s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -5414,13 +5691,13 @@ body: | ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v3s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -5449,13 +5726,13 @@ body: | ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_global_v3s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5484,13 +5761,13 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -5519,6 +5796,7 @@ body: | ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -5602,13 +5880,13 @@ body: | ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v3s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -5637,13 +5915,13 @@ body: | ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v3s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -5686,13 +5964,13 @@ body: | ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_global_v3s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5735,13 +6013,13 @@ body: | ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -5770,6 +6048,7 @@ body: | ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -5831,30 +6110,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; VI-LABEL: name: test_load_global_v4s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -5878,30 +6162,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; VI-LABEL: name: test_load_global_v4s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -5947,12 +6236,14 @@ body: | ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 2, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -5981,6 +6272,7 @@ body: | ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_global_v4s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6009,12 +6301,14 @@ body: | ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 2, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -6094,12 +6388,14 @@ body: | ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -6146,6 +6442,7 @@ body: | ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_global_v4s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6192,12 +6489,14 @@ body: | ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -6270,6 +6569,7 @@ body: | ; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -6291,6 +6591,7 @@ body: | ; CI-HSA-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; CI-HSA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -6312,6 +6613,7 @@ body: | ; CI-MESA-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; CI-MESA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v5s16_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6333,6 +6635,7 @@ body: | ; VI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; VI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -6352,6 +6655,7 @@ body: | ; GFX9-HSA-NEXT: $vgpr0 = COPY [[UV4]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -6410,6 +6714,7 @@ body: | ; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[BITCAST1]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -6450,6 +6755,7 @@ body: | ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -6490,6 +6796,7 @@ body: | ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v5s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6530,6 +6837,7 @@ body: | ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -6562,6 +6870,7 @@ body: | ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -6633,6 +6942,7 @@ body: | ; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[BITCAST1]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -6673,6 +6983,7 @@ body: | ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -6713,6 +7024,7 @@ body: | ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v5s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6753,6 +7065,7 @@ body: | ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -6785,6 +7098,7 @@ body: | ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -6874,6 +7188,7 @@ body: | ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -6914,6 +7229,7 @@ body: | ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -6954,6 +7270,7 @@ body: | ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v5s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6994,6 +7311,7 @@ body: | ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -7026,6 +7344,7 @@ body: | ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -7137,6 +7456,7 @@ body: | ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -7177,6 +7497,7 @@ body: | ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -7239,6 +7560,7 @@ body: | ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v5s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7301,6 +7623,7 @@ body: | ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -7333,6 +7656,7 @@ body: | ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -7413,6 +7737,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v6s16_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -7420,6 +7745,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v6s16_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -7427,6 +7753,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; VI-LABEL: name: test_load_global_v6s16_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7434,6 +7761,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -7441,6 +7769,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -7471,6 +7800,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v6s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -7478,6 +7808,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v6s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -7485,6 +7816,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; VI-LABEL: name: test_load_global_v6s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7492,6 +7824,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -7499,6 +7832,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -7529,6 +7863,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v6s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -7536,6 +7871,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v6s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -7543,6 +7879,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; VI-LABEL: name: test_load_global_v6s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7550,6 +7887,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -7557,6 +7895,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -7603,6 +7942,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v6s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -7610,6 +7950,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v6s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -7638,6 +7979,7 @@ body: | ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; VI-LABEL: name: test_load_global_v6s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7666,6 +8008,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -7673,6 +8016,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -7766,6 +8110,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v6s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -7773,6 +8118,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v6s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -7827,6 +8173,7 @@ body: | ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; VI-LABEL: name: test_load_global_v6s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7881,6 +8228,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -7888,6 +8236,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -7975,6 +8324,7 @@ body: | ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) ; SI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -7997,6 +8347,7 @@ body: | ; CI-HSA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -8019,6 +8370,7 @@ body: | ; CI-MESA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v7s16_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -8041,6 +8393,7 @@ body: | ; VI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) ; VI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -8062,6 +8415,7 @@ body: | ; GFX9-HSA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[UV14]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -8153,6 +8507,7 @@ body: | ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -8205,6 +8560,7 @@ body: | ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -8257,6 +8613,7 @@ body: | ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v7s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -8309,6 +8666,7 @@ body: | ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -8351,6 +8709,7 @@ body: | ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -8463,6 +8822,7 @@ body: | ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -8515,6 +8875,7 @@ body: | ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -8567,6 +8928,7 @@ body: | ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v7s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -8619,6 +8981,7 @@ body: | ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -8661,6 +9024,7 @@ body: | ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -8773,6 +9137,7 @@ body: | ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -8825,6 +9190,7 @@ body: | ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -8877,6 +9243,7 @@ body: | ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v7s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -8929,6 +9296,7 @@ body: | ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -8971,6 +9339,7 @@ body: | ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -9113,6 +9482,7 @@ body: | ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -9165,6 +9535,7 @@ body: | ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -9247,6 +9618,7 @@ body: | ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v7s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -9329,6 +9701,7 @@ body: | ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -9371,6 +9744,7 @@ body: | ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -9468,6 +9842,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v8s16_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -9475,6 +9850,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v8s16_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -9482,6 +9858,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; VI-LABEL: name: test_load_global_v8s16_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -9489,6 +9866,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v8s16_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -9496,6 +9874,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v8s16_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -9521,6 +9900,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v8s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -9528,6 +9908,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v8s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -9535,6 +9916,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; VI-LABEL: name: test_load_global_v8s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -9542,6 +9924,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v8s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -9549,6 +9932,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v8s16_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -9573,30 +9957,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s32_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s32_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_load_global_v2s32_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -9620,30 +10009,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s32_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s32_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_load_global_v2s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -9681,12 +10075,14 @@ body: | ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s32_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 2, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s32_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -9707,6 +10103,7 @@ body: | ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_global_v2s32_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -9727,12 +10124,14 @@ body: | ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 2, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -9802,12 +10201,14 @@ body: | ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s32_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s32_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -9846,6 +10247,7 @@ body: | ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_global_v2s32_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -9884,12 +10286,14 @@ body: | ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -9947,30 +10351,35 @@ body: | ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v3s32_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v3s32_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; VI-LABEL: name: test_load_global_v3s32_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s32_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s32_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -9999,30 +10408,35 @@ body: | ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v3s32_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v3s32_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; VI-LABEL: name: test_load_global_v3s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s32_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s32_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10046,30 +10460,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s32_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s32_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_load_global_v4s32_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s32_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s32_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10093,30 +10512,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s32_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s32_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_load_global_v4s32_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s32_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s32_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10140,30 +10564,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s32_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s32_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_load_global_v4s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s32_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s32_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10187,30 +10616,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v8s32_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v8s32_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; VI-LABEL: name: test_load_global_v8s32_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v8s32_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v8s32_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10234,30 +10668,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v16s32_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v16s32_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; ; VI-LABEL: name: test_load_global_v16s32_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v16s32_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v16s32_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10281,30 +10720,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s64_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s64_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; VI-LABEL: name: test_load_global_v2s64_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10328,30 +10772,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s64_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s64_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; VI-LABEL: name: test_load_global_v2s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10375,30 +10824,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s64_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s64_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; VI-LABEL: name: test_load_global_v2s64_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10459,12 +10913,14 @@ body: | ; SI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s64_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 2, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s64_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -10508,6 +10964,7 @@ body: | ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_global_v2s64_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -10551,12 +11008,14 @@ body: | ; VI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 2, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10688,12 +11147,14 @@ body: | ; SI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s64_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s64_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -10771,6 +11232,7 @@ body: | ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_global_v2s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -10848,12 +11310,14 @@ body: | ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10949,6 +11413,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-HSA-LABEL: name: test_load_global_v2sp1_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -10956,6 +11421,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-MESA-LABEL: name: test_load_global_v2sp1_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -10963,6 +11429,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_global_v2sp1_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -10970,6 +11437,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2sp1_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -10977,6 +11445,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2sp1_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -11005,6 +11474,7 @@ body: | ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v3s64_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -11015,6 +11485,7 @@ body: | ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v3s64_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -11025,6 +11496,7 @@ body: | ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_global_v3s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -11035,6 +11507,7 @@ body: | ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -11045,6 +11518,7 @@ body: | ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -11081,6 +11555,7 @@ body: | ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v3s64_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -11094,6 +11569,7 @@ body: | ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v3s64_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -11107,6 +11583,7 @@ body: | ; CI-MESA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_global_v3s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -11120,6 +11597,7 @@ body: | ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -11133,6 +11611,7 @@ body: | ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -11272,6 +11751,7 @@ body: | ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v3s64_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -11285,6 +11765,7 @@ body: | ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v3s64_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -11398,6 +11879,7 @@ body: | ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_global_v3s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -11511,6 +11993,7 @@ body: | ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -11524,6 +12007,7 @@ body: | ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -11656,30 +12140,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s64_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s64_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; VI-LABEL: name: test_load_global_v4s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s64_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s64_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -11703,30 +12192,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s64_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s64_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; VI-LABEL: name: test_load_global_v4s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s64_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s64_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -11889,12 +12383,14 @@ body: | ; SI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s64_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s64_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -12040,6 +12536,7 @@ body: | ; CI-MESA-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_global_v4s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -12185,12 +12682,14 @@ body: | ; VI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s64_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s64_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -12354,6 +12853,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s128_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -12361,6 +12861,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s128_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -12368,6 +12869,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; ; VI-LABEL: name: test_load_global_v2s128_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -12375,6 +12877,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s128_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -12382,6 +12885,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s128_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -12407,6 +12911,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-HSA-LABEL: name: test_load_global_v2p1_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -12414,6 +12919,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-MESA-LABEL: name: test_load_global_v2p1_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -12421,6 +12927,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_global_v2p1_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -12428,6 +12935,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -12435,6 +12943,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -12460,6 +12969,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-HSA-LABEL: name: test_load_global_v2p1_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -12467,6 +12977,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-MESA-LABEL: name: test_load_global_v2p1_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -12474,6 +12985,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_global_v2p1_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -12481,6 +12993,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -12488,6 +13001,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -12513,6 +13027,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-HSA-LABEL: name: test_load_global_v2p1_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -12520,6 +13035,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-MESA-LABEL: name: test_load_global_v2p1_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -12527,6 +13043,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_global_v2p1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -12534,6 +13051,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -12541,6 +13059,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -12628,6 +13147,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-HSA-LABEL: name: test_load_global_v2p1_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -12635,6 +13155,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-MESA-LABEL: name: test_load_global_v2p1_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -12704,6 +13225,7 @@ body: | ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_global_v2p1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -12773,6 +13295,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -12780,6 +13303,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -12867,6 +13391,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; CI-HSA-LABEL: name: test_load_global_v4p1_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -12874,6 +13399,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; CI-MESA-LABEL: name: test_load_global_v4p1_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -12881,6 +13407,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; VI-LABEL: name: test_load_global_v4p1_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -12888,6 +13415,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4p1_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -12895,6 +13423,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4p1_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -12919,30 +13448,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; CI-HSA-LABEL: name: test_load_global_v2p3_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; CI-MESA-LABEL: name: test_load_global_v2p3_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; VI-LABEL: name: test_load_global_v2p3_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2p3_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2p3_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -12966,30 +13500,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; CI-HSA-LABEL: name: test_load_global_v2p3_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; CI-MESA-LABEL: name: test_load_global_v2p3_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; VI-LABEL: name: test_load_global_v2p3_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2p3_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2p3_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13047,12 +13586,14 @@ body: | ; SI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; CI-HSA-LABEL: name: test_load_global_v2p3_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; CI-MESA-LABEL: name: test_load_global_v2p3_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -13093,6 +13634,7 @@ body: | ; CI-MESA-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; VI-LABEL: name: test_load_global_v2p3_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -13133,12 +13675,14 @@ body: | ; VI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2p3_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2p3_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13196,30 +13740,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_1_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_1_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_ext_load_global_s32_from_1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_1_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_1_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13243,30 +13792,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_2_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_2_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_ext_load_global_s32_from_2_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_2_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_2_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13302,6 +13856,7 @@ body: | ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; SI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -13314,6 +13869,7 @@ body: | ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -13332,6 +13888,7 @@ body: | ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; VI-LABEL: name: test_ext_load_global_s32_from_s24_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -13350,6 +13907,7 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -13362,6 +13920,7 @@ body: | ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13402,6 +13961,7 @@ body: | ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -13414,6 +13974,7 @@ body: | ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -13426,6 +13987,7 @@ body: | ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_ext_load_global_s32_from_s24_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -13438,6 +14000,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -13450,6 +14013,7 @@ body: | ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13479,30 +14043,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_ext_load_global_s32_from_s24_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13528,6 +14097,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -13535,6 +14105,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -13542,6 +14113,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_global_s64_from_1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -13549,6 +14121,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -13556,6 +14129,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13581,6 +14155,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -13588,6 +14163,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -13595,6 +14171,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_global_s64_from_2_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -13602,6 +14179,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -13609,6 +14187,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13634,6 +14213,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -13641,6 +14221,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -13648,6 +14229,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_global_s64_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -13655,6 +14237,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -13662,6 +14245,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13690,6 +14274,7 @@ body: | ; SI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -13700,6 +14285,7 @@ body: | ; CI-HSA-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -13710,6 +14296,7 @@ body: | ; CI-MESA-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; VI-LABEL: name: test_ext_load_global_s128_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -13720,6 +14307,7 @@ body: | ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -13730,6 +14318,7 @@ body: | ; GFX9-HSA-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13758,6 +14347,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -13765,6 +14355,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -13772,6 +14363,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_global_s64_from_2_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -13779,6 +14371,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -13786,6 +14379,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13811,6 +14405,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -13818,6 +14413,7 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -13825,6 +14421,7 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_global_s64_from_1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -13832,6 +14429,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -13839,6 +14437,7 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13863,30 +14462,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13910,30 +14514,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13957,30 +14566,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14004,30 +14618,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; CI-HSA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; CI-MESA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; VI-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-HSA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-MESA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14051,30 +14670,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-HSA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-MESA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-HSA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-MESA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14194,6 +14818,7 @@ body: | ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-HSA-LABEL: name: test_global_v2s96_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -14208,6 +14833,7 @@ body: | ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-MESA-LABEL: name: test_global_v2s96_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -14310,6 +14936,7 @@ body: | ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_global_v2s96_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -14412,6 +15039,7 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-HSA-LABEL: name: test_global_v2s96_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -14426,6 +15054,7 @@ body: | ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-MESA-LABEL: name: test_global_v2s96_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14594,6 +15223,7 @@ body: | ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-HSA-LABEL: name: test_global_v2s96_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -14608,6 +15238,7 @@ body: | ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-MESA-LABEL: name: test_global_v2s96_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -14660,6 +15291,7 @@ body: | ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_global_v2s96_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -14712,6 +15344,7 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-HSA-LABEL: name: test_global_v2s96_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -14726,6 +15359,7 @@ body: | ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-MESA-LABEL: name: test_global_v2s96_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14815,6 +15449,7 @@ body: | ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-HSA-LABEL: name: test_global_v2s96_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -14829,6 +15464,7 @@ body: | ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-MESA-LABEL: name: test_global_v2s96_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -14843,6 +15479,7 @@ body: | ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_global_v2s96_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -14857,6 +15494,7 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-HSA-LABEL: name: test_global_v2s96_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -14871,6 +15509,7 @@ body: | ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-MESA-LABEL: name: test_global_v2s96_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14920,6 +15559,7 @@ body: | ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-HSA-LABEL: name: test_global_v2s96_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -14934,6 +15574,7 @@ body: | ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-MESA-LABEL: name: test_global_v2s96_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -14948,6 +15589,7 @@ body: | ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_global_v2s96_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -14962,6 +15604,7 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-HSA-LABEL: name: test_global_v2s96_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -14976,6 +15619,7 @@ body: | ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-MESA-LABEL: name: test_global_v2s96_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -15074,6 +15718,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; ; CI-HSA-LABEL: name: test_load_global_v32s1_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -15144,6 +15789,7 @@ body: | ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32) ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; ; CI-MESA-LABEL: name: test_load_global_v32s1_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -15214,6 +15860,7 @@ body: | ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32) ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; ; VI-LABEL: name: test_load_global_v32s1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -15284,6 +15931,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v32s1_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -15402,6 +16050,7 @@ body: | ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>), [[BUILD_VECTOR8]](<2 x s16>), [[BUILD_VECTOR9]](<2 x s16>), [[BUILD_VECTOR10]](<2 x s16>), [[BUILD_VECTOR11]](<2 x s16>), [[BUILD_VECTOR12]](<2 x s16>), [[BUILD_VECTOR13]](<2 x s16>), [[BUILD_VECTOR14]](<2 x s16>), [[BUILD_VECTOR15]](<2 x s16>) ; GFX9-HSA-NEXT: [[TRUNC32:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[CONCAT_VECTORS]](<32 x s16>) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC32]](<32 x s1>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v32s1_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -15553,6 +16202,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; ; CI-HSA-LABEL: name: test_load_global_v8s4_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -15575,6 +16225,7 @@ body: | ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32) ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; ; CI-MESA-LABEL: name: test_load_global_v8s4_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -15597,6 +16248,7 @@ body: | ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32) ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; ; VI-LABEL: name: test_load_global_v8s4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -15619,6 +16271,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v8s4_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -15653,6 +16306,7 @@ body: | ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX9-HSA-NEXT: [[TRUNC8:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC8]](<8 x s4>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v8s4_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -15705,30 +16359,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s32_align536870912 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s32_align536870912 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_s32_align536870912 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s32_align536870912 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s32_align536870912 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir index 46a6125225dfb1..a94df99206304a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -24,6 +24,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-LABEL: name: test_load_local_s1_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -32,6 +33,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s1_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -40,6 +42,7 @@ body: | ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-DS128-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_local_s1_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -48,6 +51,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_load_local_s1_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -56,6 +60,7 @@ body: | ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s1_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -64,6 +69,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX10-LABEL: name: test_load_local_s1_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -72,6 +78,7 @@ body: | ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s1_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -80,6 +87,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX11-LABEL: name: test_load_local_s1_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -88,6 +96,7 @@ body: | ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX11-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s1_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -116,6 +125,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-LABEL: name: test_load_local_s2_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -124,6 +134,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s2_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -132,6 +143,7 @@ body: | ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-DS128-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_local_s2_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -140,6 +152,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_load_local_s2_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -148,6 +161,7 @@ body: | ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s2_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -156,6 +170,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX10-LABEL: name: test_load_local_s2_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -164,6 +179,7 @@ body: | ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s2_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -172,6 +188,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX11-LABEL: name: test_load_local_s2_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -180,6 +197,7 @@ body: | ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX11-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s2_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -206,54 +224,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_local_s8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s8_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_local_s8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_local_s8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s8_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s8_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s8_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -278,54 +305,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_local_s8_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s8_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_local_s8_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_local_s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s8_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s8_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s8_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s8_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s8_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -350,54 +386,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_local_s16_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s16_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_local_s16_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_local_s16_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s16_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s16_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s16_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s16_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s16_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -422,54 +467,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_local_s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s16_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_local_s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_local_s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s16_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s16_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s16_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -500,6 +554,7 @@ body: | ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-LABEL: name: test_load_local_s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -512,6 +567,7 @@ body: | ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s16_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -524,6 +580,7 @@ body: | ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-DS128-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_local_s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -536,6 +593,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_local_s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -548,12 +606,14 @@ body: | ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s16_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -566,12 +626,14 @@ body: | ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s16_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -584,6 +646,7 @@ body: | ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s16_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -608,54 +671,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_local_s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s32_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_local_s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_local_s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s32_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s32_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s32_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -685,6 +757,7 @@ body: | ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-LABEL: name: test_load_local_s32_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -697,6 +770,7 @@ body: | ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s32_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -709,6 +783,7 @@ body: | ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-DS128-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_local_s32_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -721,6 +796,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_local_s32_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -733,12 +809,14 @@ body: | ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s32_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s32_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -751,12 +829,14 @@ body: | ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s32_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s32_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -769,6 +849,7 @@ body: | ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s32_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -808,6 +889,7 @@ body: | ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; CI-LABEL: name: test_load_local_s32_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -830,6 +912,7 @@ body: | ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s32_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -852,6 +935,7 @@ body: | ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-DS128-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_local_s32_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -874,6 +958,7 @@ body: | ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_local_s32_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -896,12 +981,14 @@ body: | ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s32_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s32_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -924,12 +1011,14 @@ body: | ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s32_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s32_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -952,6 +1041,7 @@ body: | ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX11-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s32_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -975,54 +1065,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_local_s24_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s24_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_local_s24_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_local_s24_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s24_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s24_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s24_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -1047,54 +1146,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_local_s24_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s24_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_local_s24_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_local_s24_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s24_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s24_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s24_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -1125,6 +1233,7 @@ body: | ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-LABEL: name: test_load_local_s24_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1137,6 +1246,7 @@ body: | ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s24_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -1149,6 +1259,7 @@ body: | ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-DS128-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_local_s24_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1161,6 +1272,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_local_s24_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1173,6 +1285,7 @@ body: | ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -1185,6 +1298,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX10-LABEL: name: test_load_local_s24_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1197,6 +1311,7 @@ body: | ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -1209,6 +1324,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX11-LABEL: name: test_load_local_s24_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1221,6 +1337,7 @@ body: | ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s24_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -1263,6 +1380,7 @@ body: | ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; SI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; CI-LABEL: name: test_load_local_s24_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1281,6 +1399,7 @@ body: | ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s24_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -1299,6 +1418,7 @@ body: | ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; CI-DS128-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; VI-LABEL: name: test_load_local_s24_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1317,6 +1437,7 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX9-LABEL: name: test_load_local_s24_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1335,6 +1456,7 @@ body: | ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; GFX9-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -1347,6 +1469,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX10-LABEL: name: test_load_local_s24_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1365,6 +1488,7 @@ body: | ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; GFX10-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -1377,6 +1501,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX11-LABEL: name: test_load_local_s24_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1395,6 +1520,7 @@ body: | ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; GFX11-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s24_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -1425,54 +1551,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-LABEL: name: test_load_local_s48_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-DS128-LABEL: name: test_load_local_s48_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; VI-LABEL: name: test_load_local_s48_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-LABEL: name: test_load_local_s48_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s48_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX10-LABEL: name: test_load_local_s48_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s48_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX11-LABEL: name: test_load_local_s48_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s48_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -1497,54 +1632,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-LABEL: name: test_load_local_s64_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-DS128-LABEL: name: test_load_local_s64_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; VI-LABEL: name: test_load_local_s64_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-LABEL: name: test_load_local_s64_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX10-LABEL: name: test_load_local_s64_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX11-LABEL: name: test_load_local_s64_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s64_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -1568,54 +1712,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-LABEL: name: test_load_local_s64_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-DS128-LABEL: name: test_load_local_s64_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; VI-LABEL: name: test_load_local_s64_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-LABEL: name: test_load_local_s64_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX10-LABEL: name: test_load_local_s64_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX11-LABEL: name: test_load_local_s64_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s64_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -1657,6 +1810,7 @@ body: | ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; CI-LABEL: name: test_load_local_s64_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1681,6 +1835,7 @@ body: | ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; CI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; CI-DS128-LABEL: name: test_load_local_s64_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -1705,6 +1860,7 @@ body: | ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; VI-LABEL: name: test_load_local_s64_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1729,6 +1885,7 @@ body: | ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; GFX9-LABEL: name: test_load_local_s64_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1753,12 +1910,14 @@ body: | ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX10-LABEL: name: test_load_local_s64_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1783,6 +1942,7 @@ body: | ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -1797,6 +1957,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; GFX11-LABEL: name: test_load_local_s64_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1821,6 +1982,7 @@ body: | ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s64_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -1880,6 +2042,7 @@ body: | ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; SI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; CI-LABEL: name: test_load_local_s64_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1922,6 +2085,7 @@ body: | ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; CI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; CI-DS128-LABEL: name: test_load_local_s64_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -1964,6 +2128,7 @@ body: | ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; VI-LABEL: name: test_load_local_s64_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2006,6 +2171,7 @@ body: | ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; GFX9-LABEL: name: test_load_local_s64_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2048,12 +2214,14 @@ body: | ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX10-LABEL: name: test_load_local_s64_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -2096,6 +2264,7 @@ body: | ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -2110,6 +2279,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; GFX11-LABEL: name: test_load_local_s64_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -2152,6 +2322,7 @@ body: | ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s64_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -2222,6 +2393,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_local_s96_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -2275,6 +2447,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-DS128-LABEL: name: test_load_local_s96_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -2328,6 +2501,7 @@ body: | ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_local_s96_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2381,6 +2555,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_local_s96_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2434,6 +2609,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -2441,6 +2617,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_local_s96_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -2494,6 +2671,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -2508,6 +2686,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_local_s96_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -2561,6 +2740,7 @@ body: | ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -2591,6 +2771,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_local_s96_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -2603,6 +2784,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-DS128-LABEL: name: test_load_local_s96_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -2617,6 +2799,7 @@ body: | ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_local_s96_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2631,6 +2814,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_local_s96_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2645,6 +2829,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -2652,6 +2837,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 8, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_local_s96_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -2666,6 +2852,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -2680,6 +2867,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_local_s96_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -2694,6 +2882,7 @@ body: | ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -2724,6 +2913,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_local_s96_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -2736,6 +2926,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-DS128-LABEL: name: test_load_local_s96_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -2750,6 +2941,7 @@ body: | ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_local_s96_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2764,6 +2956,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_local_s96_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2778,6 +2971,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -2785,6 +2979,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_local_s96_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -2799,6 +2994,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -2813,6 +3009,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_local_s96_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -2827,6 +3024,7 @@ body: | ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -2873,6 +3071,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_local_s96_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -2901,6 +3100,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-DS128-LABEL: name: test_load_local_s96_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -2929,6 +3129,7 @@ body: | ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_local_s96_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2957,6 +3158,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_local_s96_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2985,6 +3187,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -2992,6 +3195,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_local_s96_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -3020,6 +3224,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -3034,6 +3239,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_local_s96_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -3062,6 +3268,7 @@ body: | ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -3133,6 +3340,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_local_s96_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -3186,6 +3394,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-DS128-LABEL: name: test_load_local_s96_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -3239,6 +3448,7 @@ body: | ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_local_s96_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -3292,6 +3502,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_local_s96_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -3345,6 +3556,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -3352,6 +3564,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_local_s96_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -3405,6 +3618,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -3419,6 +3633,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_local_s96_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -3472,6 +3687,7 @@ body: | ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -3559,6 +3775,7 @@ body: | ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_local_s128_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -3628,6 +3845,7 @@ body: | ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-DS128-LABEL: name: test_load_local_s128_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -3696,6 +3914,7 @@ body: | ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_local_s128_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -3764,6 +3983,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_local_s128_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -3832,6 +4052,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -3839,6 +4060,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_local_s128_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -3907,6 +4129,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -3924,6 +4147,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_local_s128_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -3992,6 +4216,7 @@ body: | ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -4021,6 +4246,7 @@ body: | ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_local_s128_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4032,6 +4258,7 @@ body: | ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-DS128-LABEL: name: test_load_local_s128_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -4039,6 +4266,7 @@ body: | ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_local_s128_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4046,6 +4274,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_local_s128_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4053,6 +4282,7 @@ body: | ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -4060,6 +4290,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_local_s128_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4077,6 +4308,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -4094,6 +4326,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_local_s128_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4101,6 +4334,7 @@ body: | ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -4130,6 +4364,7 @@ body: | ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_local_s128_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4141,6 +4376,7 @@ body: | ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-DS128-LABEL: name: test_load_local_s128_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -4158,6 +4394,7 @@ body: | ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_local_s128_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4175,6 +4412,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_local_s128_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4192,6 +4430,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -4199,6 +4438,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_local_s128_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4216,6 +4456,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -4233,6 +4474,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_local_s128_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4250,6 +4492,7 @@ body: | ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -4304,6 +4547,7 @@ body: | ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_local_s128_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4340,6 +4584,7 @@ body: | ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-DS128-LABEL: name: test_load_local_s128_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -4375,6 +4620,7 @@ body: | ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_local_s128_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4410,6 +4656,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_local_s128_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4445,6 +4692,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -4452,6 +4700,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_local_s128_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4487,6 +4736,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -4504,6 +4754,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_local_s128_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4539,6 +4790,7 @@ body: | ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -4626,6 +4878,7 @@ body: | ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_local_s128_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4695,6 +4948,7 @@ body: | ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-DS128-LABEL: name: test_load_local_s128_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -4763,6 +5017,7 @@ body: | ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_local_s128_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4831,6 +5086,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_local_s128_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4899,6 +5155,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -4906,6 +5163,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_local_s128_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4974,6 +5232,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -4991,6 +5250,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_local_s128_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5059,6 +5319,7 @@ body: | ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -5083,54 +5344,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; CI-LABEL: name: test_load_local_p1_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; CI-DS128-LABEL: name: test_load_local_p1_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; VI-LABEL: name: test_load_local_p1_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-LABEL: name: test_load_local_p1_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX10-LABEL: name: test_load_local_p1_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX11-LABEL: name: test_load_local_p1_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p1_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -5163,36 +5433,42 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; CI-LABEL: name: test_load_local_p1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; CI-DS128-LABEL: name: test_load_local_p1_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; VI-LABEL: name: test_load_local_p1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-LABEL: name: test_load_local_p1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX10-LABEL: name: test_load_local_p1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -5208,6 +5484,7 @@ body: | ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -5223,12 +5500,14 @@ body: | ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX11-LABEL: name: test_load_local_p1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p1_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -5271,6 +5550,7 @@ body: | ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; CI-LABEL: name: test_load_local_p1_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -5296,6 +5576,7 @@ body: | ; CI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; CI-DS128-LABEL: name: test_load_local_p1_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -5321,6 +5602,7 @@ body: | ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; VI-LABEL: name: test_load_local_p1_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -5346,6 +5628,7 @@ body: | ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX9-LABEL: name: test_load_local_p1_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -5371,12 +5654,14 @@ body: | ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX10-LABEL: name: test_load_local_p1_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -5402,6 +5687,7 @@ body: | ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -5417,6 +5703,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX11-LABEL: name: test_load_local_p1_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5442,6 +5729,7 @@ body: | ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p1_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -5502,6 +5790,7 @@ body: | ; SI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; CI-LABEL: name: test_load_local_p1_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -5545,6 +5834,7 @@ body: | ; CI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; CI-DS128-LABEL: name: test_load_local_p1_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -5588,6 +5878,7 @@ body: | ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; VI-LABEL: name: test_load_local_p1_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -5631,6 +5922,7 @@ body: | ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX9-LABEL: name: test_load_local_p1_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -5674,12 +5966,14 @@ body: | ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX10-LABEL: name: test_load_local_p1_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -5723,6 +6017,7 @@ body: | ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -5738,6 +6033,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX11-LABEL: name: test_load_local_p1_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5781,6 +6077,7 @@ body: | ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p1_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -5804,54 +6101,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; CI-LABEL: name: test_load_local_p3_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; CI-DS128-LABEL: name: test_load_local_p3_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; VI-LABEL: name: test_load_local_p3_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX9-LABEL: name: test_load_local_p3_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p3_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX10-LABEL: name: test_load_local_p3_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p3_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX11-LABEL: name: test_load_local_p3_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p3_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -5882,6 +6188,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; CI-LABEL: name: test_load_local_p3_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -5895,6 +6202,7 @@ body: | ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; CI-DS128-LABEL: name: test_load_local_p3_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -5908,6 +6216,7 @@ body: | ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; CI-DS128-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; VI-LABEL: name: test_load_local_p3_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -5921,6 +6230,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX9-LABEL: name: test_load_local_p3_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -5934,12 +6244,14 @@ body: | ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p3_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX10-LABEL: name: test_load_local_p3_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -5953,12 +6265,14 @@ body: | ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p3_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX11-LABEL: name: test_load_local_p3_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5972,6 +6286,7 @@ body: | ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p3_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -6012,6 +6327,7 @@ body: | ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; CI-LABEL: name: test_load_local_p3_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6035,6 +6351,7 @@ body: | ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; CI-DS128-LABEL: name: test_load_local_p3_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -6058,6 +6375,7 @@ body: | ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; CI-DS128-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; VI-LABEL: name: test_load_local_p3_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6081,6 +6399,7 @@ body: | ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX9-LABEL: name: test_load_local_p3_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6104,12 +6423,14 @@ body: | ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p3_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX10-LABEL: name: test_load_local_p3_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6133,12 +6454,14 @@ body: | ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p3_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX11-LABEL: name: test_load_local_p3_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6162,6 +6485,7 @@ body: | ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p3_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -6185,54 +6509,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; CI-LABEL: name: test_load_local_p5_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; CI-DS128-LABEL: name: test_load_local_p5_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; VI-LABEL: name: test_load_local_p5_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX9-LABEL: name: test_load_local_p5_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p5_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX10-LABEL: name: test_load_local_p5_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p5_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX11-LABEL: name: test_load_local_p5_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p5_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -6263,6 +6596,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; CI-LABEL: name: test_load_local_p5_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6276,6 +6610,7 @@ body: | ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; CI-DS128-LABEL: name: test_load_local_p5_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -6289,6 +6624,7 @@ body: | ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; CI-DS128-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_local_p5_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6302,6 +6638,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-LABEL: name: test_load_local_p5_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6315,12 +6652,14 @@ body: | ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p5_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX10-LABEL: name: test_load_local_p5_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6334,12 +6673,14 @@ body: | ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p5_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX11-LABEL: name: test_load_local_p5_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6353,6 +6694,7 @@ body: | ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p5_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -6393,6 +6735,7 @@ body: | ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; CI-LABEL: name: test_load_local_p5_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6416,6 +6759,7 @@ body: | ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; CI-DS128-LABEL: name: test_load_local_p5_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -6439,6 +6783,7 @@ body: | ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; CI-DS128-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_local_p5_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6462,6 +6807,7 @@ body: | ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-LABEL: name: test_load_local_p5_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6485,12 +6831,14 @@ body: | ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p5_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX10-LABEL: name: test_load_local_p5_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6514,12 +6862,14 @@ body: | ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p5_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX11-LABEL: name: test_load_local_p5_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6543,6 +6893,7 @@ body: | ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p5_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -6566,54 +6917,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_local_v2s8_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_v2s8_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_local_v2s8_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_local_v2s8_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s8_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_v2s8_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s8_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_v2s8_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s8_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -6647,6 +7007,7 @@ body: | ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-LABEL: name: test_load_local_v2s8_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6661,6 +7022,7 @@ body: | ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s8_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -6675,6 +7037,7 @@ body: | ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_local_v2s8_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6689,6 +7052,7 @@ body: | ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v2s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6703,6 +7067,7 @@ body: | ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s8_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -6712,6 +7077,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v2s8_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6726,6 +7092,7 @@ body: | ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s8_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -6735,6 +7102,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v2s8_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6749,6 +7117,7 @@ body: | ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s8_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -6780,8 +7149,8 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -6801,6 +7170,7 @@ body: | ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; CI-LABEL: name: test_load_local_v3s8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6811,8 +7181,8 @@ body: | ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -6832,6 +7202,7 @@ body: | ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_v3s8_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -6842,8 +7213,8 @@ body: | ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-DS128-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -6863,6 +7234,7 @@ body: | ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; CI-DS128-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_local_v3s8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6873,8 +7245,8 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -6892,6 +7264,7 @@ body: | ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_local_v3s8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6902,8 +7275,8 @@ body: | ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -6921,6 +7294,7 @@ body: | ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s8_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -6931,8 +7305,8 @@ body: | ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -6950,6 +7324,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX9-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX10-LABEL: name: test_load_local_v3s8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6960,8 +7335,8 @@ body: | ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -6979,6 +7354,7 @@ body: | ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s8_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -6989,8 +7365,8 @@ body: | ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -7008,6 +7384,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX11-LABEL: name: test_load_local_v3s8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -7018,8 +7395,8 @@ body: | ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -7037,6 +7414,7 @@ body: | ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX11-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s8_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -7047,8 +7425,8 @@ body: | ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX11-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX11-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -7099,8 +7477,8 @@ body: | ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -7120,6 +7498,7 @@ body: | ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; SI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; CI-LABEL: name: test_load_local_v3s8_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -7140,8 +7519,8 @@ body: | ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -7161,6 +7540,7 @@ body: | ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; CI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_v3s8_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -7181,8 +7561,8 @@ body: | ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; CI-DS128-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -7202,6 +7582,7 @@ body: | ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; CI-DS128-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; VI-LABEL: name: test_load_local_v3s8_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -7222,8 +7603,8 @@ body: | ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -7241,6 +7622,7 @@ body: | ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; VI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX9-LABEL: name: test_load_local_v3s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -7261,8 +7643,8 @@ body: | ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -7280,6 +7662,7 @@ body: | ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; GFX9-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s8_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -7295,8 +7678,8 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) ; GFX9-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX9-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] @@ -7314,6 +7697,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX9-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; ; GFX10-LABEL: name: test_load_local_v3s8_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -7334,8 +7718,8 @@ body: | ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -7353,6 +7737,7 @@ body: | ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; GFX10-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s8_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -7368,8 +7753,8 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) ; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] @@ -7387,6 +7772,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; ; GFX11-LABEL: name: test_load_local_v3s8_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -7407,8 +7793,8 @@ body: | ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -7426,6 +7812,7 @@ body: | ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; GFX11-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s8_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -7441,8 +7828,8 @@ body: | ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) ; GFX11-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX11-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX11-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] @@ -7479,54 +7866,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_local_v4s8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_v4s8_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_local_v4s8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_local_v4s8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s8_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_v4s8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s8_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_v4s8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s8_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -7551,54 +7947,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-LABEL: name: test_load_local_v8s8_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v8s8_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_load_local_v8s8_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v8s8_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v8s8_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v8s8_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v8s8_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v8s8_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v8s8_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -7683,6 +8088,7 @@ body: | ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; CI-LABEL: name: test_load_local_v16s8_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -7749,6 +8155,7 @@ body: | ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v16s8_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -7816,6 +8223,7 @@ body: | ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_local_v16s8_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -7883,6 +8291,7 @@ body: | ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v16s8_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -7950,12 +8359,14 @@ body: | ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v16s8_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v16s8_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8023,6 +8434,7 @@ body: | ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v16s8_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -8039,6 +8451,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v16s8_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -8106,6 +8519,7 @@ body: | ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v16s8_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -8130,54 +8544,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; CI-LABEL: name: test_load_local_v2s16_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s16_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; VI-LABEL: name: test_load_local_v2s16_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v2s16_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s16_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v2s16_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s16_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v2s16_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s16_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -8211,6 +8634,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; CI-LABEL: name: test_load_local_v2s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -8227,6 +8651,7 @@ body: | ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s16_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -8243,6 +8668,7 @@ body: | ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-DS128-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_local_v2s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8259,6 +8685,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v2s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -8271,12 +8698,14 @@ body: | ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s16_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v2s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8289,12 +8718,14 @@ body: | ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s16_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v2s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -8307,6 +8738,7 @@ body: | ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s16_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -8350,6 +8782,7 @@ body: | ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; CI-LABEL: name: test_load_local_v2s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -8376,6 +8809,7 @@ body: | ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s16_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -8402,6 +8836,7 @@ body: | ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-DS128-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_local_v2s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8428,6 +8863,7 @@ body: | ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v2s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -8450,12 +8886,14 @@ body: | ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s16_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v2s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8478,12 +8916,14 @@ body: | ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s16_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v2s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -8506,6 +8946,7 @@ body: | ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s16_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -8542,13 +8983,13 @@ body: | ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-LABEL: name: test_load_local_v3s16_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -8568,13 +9009,13 @@ body: | ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v3s16_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -8594,13 +9035,13 @@ body: | ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_local_v3s16_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8620,13 +9061,13 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v3s16_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -8648,6 +9089,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -8669,6 +9111,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v3s16_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8690,6 +9133,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -8711,6 +9155,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v3s16_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -8732,6 +9177,7 @@ body: | ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -8794,13 +9240,13 @@ body: | ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-LABEL: name: test_load_local_v3s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -8829,13 +9275,13 @@ body: | ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v3s16_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -8864,13 +9310,13 @@ body: | ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_local_v3s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8899,13 +9345,13 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v3s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -8934,6 +9380,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -8962,6 +9409,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v3s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8990,6 +9438,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -9018,6 +9467,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v3s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -9046,6 +9496,7 @@ body: | ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s16_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -9129,13 +9580,13 @@ body: | ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-LABEL: name: test_load_local_v3s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -9178,13 +9629,13 @@ body: | ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v3s16_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -9227,13 +9678,13 @@ body: | ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_local_v3s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -9276,13 +9727,13 @@ body: | ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v3s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -9325,6 +9776,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -9353,6 +9805,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v3s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -9395,6 +9848,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -9423,6 +9877,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v3s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -9465,6 +9920,7 @@ body: | ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s16_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -9511,54 +9967,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; CI-LABEL: name: test_load_local_v4s16_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s16_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; VI-LABEL: name: test_load_local_v4s16_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v4s16_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v4s16_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v4s16_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s16_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -9604,36 +10069,42 @@ body: | ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-LABEL: name: test_load_local_v4s16_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s16_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; VI-LABEL: name: test_load_local_v4s16_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v4s16_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v4s16_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -9656,6 +10127,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -9678,12 +10150,14 @@ body: | ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v4s16_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s16_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -9728,6 +10202,7 @@ body: | ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-LABEL: name: test_load_local_v4s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -9756,6 +10231,7 @@ body: | ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s16_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -9784,6 +10260,7 @@ body: | ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_local_v4s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -9812,6 +10289,7 @@ body: | ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v4s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -9834,12 +10312,14 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v4s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -9862,6 +10342,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -9884,6 +10365,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v4s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -9906,6 +10388,7 @@ body: | ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s16_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -9969,6 +10452,7 @@ body: | ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-LABEL: name: test_load_local_v4s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10015,6 +10499,7 @@ body: | ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s16_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -10061,6 +10546,7 @@ body: | ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_local_v4s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10107,6 +10593,7 @@ body: | ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v4s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10147,12 +10634,14 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v4s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10193,6 +10682,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -10215,6 +10705,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v4s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10255,6 +10746,7 @@ body: | ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s16_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -10278,54 +10770,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-LABEL: name: test_load_local_v2s32_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s32_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_load_local_v2s32_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v2s32_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v2s32_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v2s32_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s32_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -10349,54 +10850,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-LABEL: name: test_load_local_v2s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s32_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_load_local_v2s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v2s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v2s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v2s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s32_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -10434,6 +10944,7 @@ body: | ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-LABEL: name: test_load_local_v2s32_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10454,6 +10965,7 @@ body: | ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s32_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -10474,6 +10986,7 @@ body: | ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_local_v2s32_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10494,6 +11007,7 @@ body: | ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v2s32_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10514,12 +11028,14 @@ body: | ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v2s32_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10540,6 +11056,7 @@ body: | ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -10550,6 +11067,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v2s32_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10570,6 +11088,7 @@ body: | ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s32_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -10625,6 +11144,7 @@ body: | ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-LABEL: name: test_load_local_v2s32_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10663,6 +11183,7 @@ body: | ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s32_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -10701,6 +11222,7 @@ body: | ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_local_v2s32_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10739,6 +11261,7 @@ body: | ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v2s32_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10777,12 +11300,14 @@ body: | ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v2s32_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10821,6 +11346,7 @@ body: | ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -10831,6 +11357,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v2s32_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10869,6 +11396,7 @@ body: | ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s32_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -10938,6 +11466,7 @@ body: | ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; CI-LABEL: name: test_load_local_v3s32_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10990,6 +11519,7 @@ body: | ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v3s32_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -11042,6 +11572,7 @@ body: | ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_load_local_v3s32_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -11094,6 +11625,7 @@ body: | ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v3s32_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -11146,12 +11678,14 @@ body: | ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s32_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v3s32_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -11204,6 +11738,7 @@ body: | ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s32_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -11217,6 +11752,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v3s32_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11269,6 +11805,7 @@ body: | ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s32_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -11297,6 +11834,7 @@ body: | ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; CI-LABEL: name: test_load_local_v3s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -11308,6 +11846,7 @@ body: | ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v3s32_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -11321,6 +11860,7 @@ body: | ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_load_local_v3s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -11334,6 +11874,7 @@ body: | ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v3s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -11347,12 +11888,14 @@ body: | ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s32_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v3s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -11366,6 +11909,7 @@ body: | ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s32_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -11379,6 +11923,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v3s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11392,6 +11937,7 @@ body: | ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s32_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -11419,6 +11965,7 @@ body: | ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-LABEL: name: test_load_local_v4s32_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -11429,48 +11976,56 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s32_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_load_local_v4s32_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v4s32_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v4s32_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v4s32_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -11498,6 +12053,7 @@ body: | ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-LABEL: name: test_load_local_v4s32_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -11508,30 +12064,35 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s32_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_load_local_v4s32_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v4s32_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v4s32_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -11548,6 +12109,7 @@ body: | ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -11564,12 +12126,14 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v4s32_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -11597,6 +12161,7 @@ body: | ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-LABEL: name: test_load_local_v4s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -11607,6 +12172,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s32_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -11623,6 +12189,7 @@ body: | ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_local_v4s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -11639,6 +12206,7 @@ body: | ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v4s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -11655,12 +12223,14 @@ body: | ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v4s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -11677,6 +12247,7 @@ body: | ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -11693,6 +12264,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v4s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11709,6 +12281,7 @@ body: | ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -11761,6 +12334,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-LABEL: name: test_load_local_v4s32_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -11796,6 +12370,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s32_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -11830,6 +12405,7 @@ body: | ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_local_v4s32_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -11864,6 +12440,7 @@ body: | ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v4s32_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -11898,12 +12475,14 @@ body: | ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v4s32_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -11938,6 +12517,7 @@ body: | ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -11954,6 +12534,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v4s32_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11988,6 +12569,7 @@ body: | ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -12073,6 +12655,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-LABEL: name: test_load_local_v4s32_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -12141,6 +12724,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s32_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -12208,6 +12792,7 @@ body: | ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_local_v4s32_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -12275,6 +12860,7 @@ body: | ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v4s32_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -12342,12 +12928,14 @@ body: | ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v4s32_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -12415,6 +13003,7 @@ body: | ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -12431,6 +13020,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v4s32_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -12498,6 +13088,7 @@ body: | ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -12531,6 +13122,7 @@ body: | ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x s32>) from unknown-address + 24, addrspace 3) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; ; CI-LABEL: name: test_load_local_v8s32_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -12547,6 +13139,7 @@ body: | ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x s32>) from unknown-address + 24, addrspace 3) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v8s32_align32 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -12557,6 +13150,7 @@ body: | ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; ; VI-LABEL: name: test_load_local_v8s32_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -12567,6 +13161,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v8s32_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -12577,6 +13172,7 @@ body: | ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v8s32_align32 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -12587,6 +13183,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v8s32_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -12597,6 +13194,7 @@ body: | ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v8s32_align32 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -12607,6 +13205,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v8s32_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -12617,6 +13216,7 @@ body: | ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v8s32_align32 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -12666,6 +13266,7 @@ body: | ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD6]](p3) :: (load (<2 x s32>) from unknown-address + 56, addrspace 3) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>), [[LOAD4]](<2 x s32>), [[LOAD5]](<2 x s32>), [[LOAD6]](<2 x s32>), [[LOAD7]](<2 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; ; CI-LABEL: name: test_load_local_v16s32_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -12694,6 +13295,7 @@ body: | ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD6]](p3) :: (load (<2 x s32>) from unknown-address + 56, addrspace 3) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>), [[LOAD4]](<2 x s32>), [[LOAD5]](<2 x s32>), [[LOAD6]](<2 x s32>), [[LOAD7]](<2 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v16s32_align32 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -12710,6 +13312,7 @@ body: | ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; ; VI-LABEL: name: test_load_local_v16s32_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -12726,6 +13329,7 @@ body: | ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v16s32_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -12742,6 +13346,7 @@ body: | ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v16s32_align32 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -12758,6 +13363,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v16s32_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -12774,6 +13380,7 @@ body: | ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v16s32_align32 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -12790,6 +13397,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v16s32_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -12806,6 +13414,7 @@ body: | ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v16s32_align32 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -12843,6 +13452,7 @@ body: | ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; CI-LABEL: name: test_load_local_v2s64_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -12853,6 +13463,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s64_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -12863,6 +13474,7 @@ body: | ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_local_v2s64_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -12873,6 +13485,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_local_v2s64_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -12883,12 +13496,14 @@ body: | ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s64_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX10-LABEL: name: test_load_local_v2s64_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -12899,6 +13514,7 @@ body: | ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s64_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -12909,6 +13525,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX11-LABEL: name: test_load_local_v2s64_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -12919,6 +13536,7 @@ body: | ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s64_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -13012,6 +13630,7 @@ body: | ; SI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; CI-LABEL: name: test_load_local_v2s64_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -13088,6 +13707,7 @@ body: | ; CI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s64_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -13164,6 +13784,7 @@ body: | ; CI-DS128-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_local_v2s64_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -13240,6 +13861,7 @@ body: | ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_local_v2s64_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -13316,12 +13938,14 @@ body: | ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s64_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX10-LABEL: name: test_load_local_v2s64_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -13398,6 +14022,7 @@ body: | ; GFX10-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s64_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -13423,6 +14048,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[ZEXT1]] ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX11-LABEL: name: test_load_local_v2s64_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -13499,6 +14125,7 @@ body: | ; GFX11-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s64_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -13531,6 +14158,7 @@ body: | ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[UV3]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-LABEL: name: test_load_local_v3s64_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -13546,6 +14174,7 @@ body: | ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[UV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-DS128-LABEL: name: test_load_local_v3s64_align32 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -13559,6 +14188,7 @@ body: | ; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_local_v3s64_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -13572,6 +14202,7 @@ body: | ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_local_v3s64_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -13585,6 +14216,7 @@ body: | ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -13598,6 +14230,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX10-LABEL: name: test_load_local_v3s64_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -13611,6 +14244,7 @@ body: | ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -13624,6 +14258,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX11-LABEL: name: test_load_local_v3s64_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -13637,6 +14272,7 @@ body: | ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -13679,6 +14315,7 @@ body: | ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p3) :: (load (s64) from unknown-address + 24, addrspace 3) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[LOAD3]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-LABEL: name: test_load_local_v4s64_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -13695,6 +14332,7 @@ body: | ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p3) :: (load (s64) from unknown-address + 24, addrspace 3) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[LOAD3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s64_align32 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -13705,6 +14343,7 @@ body: | ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; ; VI-LABEL: name: test_load_local_v4s64_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -13715,6 +14354,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_local_v4s64_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -13725,6 +14365,7 @@ body: | ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s64_align32 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -13735,6 +14376,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; ; GFX10-LABEL: name: test_load_local_v4s64_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -13745,6 +14387,7 @@ body: | ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s64_align32 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -13755,6 +14398,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; ; GFX11-LABEL: name: test_load_local_v4s64_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -13765,6 +14409,7 @@ body: | ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s64_align32 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -13797,6 +14442,7 @@ body: | ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-LABEL: name: test_load_local_v2p1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -13808,6 +14454,7 @@ body: | ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-DS128-LABEL: name: test_load_local_v2p1_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -13825,6 +14472,7 @@ body: | ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_local_v2p1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -13842,6 +14490,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_local_v2p1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -13859,6 +14508,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2p1_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -13866,6 +14516,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX10-LABEL: name: test_load_local_v2p1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -13883,6 +14534,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2p1_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -13900,6 +14552,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX11-LABEL: name: test_load_local_v2p1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -13917,6 +14570,7 @@ body: | ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2p1_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -13941,54 +14595,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; CI-LABEL: name: test_load_local_v2p3_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; CI-DS128-LABEL: name: test_load_local_v2p3_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; VI-LABEL: name: test_load_local_v2p3_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX9-LABEL: name: test_load_local_v2p3_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2p3_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX10-LABEL: name: test_load_local_v2p3_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2p3_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX11-LABEL: name: test_load_local_v2p3_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2p3_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14012,54 +14675,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_extload_local_s32_from_1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_extload_local_s32_from_1_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_extload_local_s32_from_1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14083,54 +14755,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_extload_local_s32_from_2_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_extload_local_s32_from_2_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_extload_local_s32_from_2_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14156,6 +14837,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_extload_local_s64_from_1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -14163,6 +14845,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-DS128-LABEL: name: test_extload_local_s64_from_1_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -14170,6 +14853,7 @@ body: | ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_extload_local_s64_from_1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -14177,6 +14861,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -14184,6 +14869,7 @@ body: | ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -14191,6 +14877,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -14198,6 +14885,7 @@ body: | ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -14205,6 +14893,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -14212,6 +14901,7 @@ body: | ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14237,6 +14927,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_extload_local_s64_from_2_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -14244,6 +14935,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-DS128-LABEL: name: test_extload_local_s64_from_2_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -14251,6 +14943,7 @@ body: | ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_extload_local_s64_from_2_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -14258,6 +14951,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -14265,6 +14959,7 @@ body: | ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -14272,6 +14967,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -14279,6 +14975,7 @@ body: | ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -14286,6 +14983,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -14293,6 +14991,7 @@ body: | ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14318,6 +15017,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_extload_local_s64_from_4_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -14325,6 +15025,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-DS128-LABEL: name: test_extload_local_s64_from_4_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -14332,6 +15033,7 @@ body: | ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_extload_local_s64_from_4_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -14339,6 +15041,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -14346,6 +15049,7 @@ body: | ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -14353,6 +15057,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -14360,6 +15065,7 @@ body: | ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -14367,6 +15073,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -14374,6 +15081,7 @@ body: | ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14402,6 +15110,7 @@ body: | ; SI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; CI-LABEL: name: test_extload_local_s128_from_4_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -14412,6 +15121,7 @@ body: | ; CI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; CI-DS128-LABEL: name: test_extload_local_s128_from_4_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -14422,6 +15132,7 @@ body: | ; CI-DS128-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CI-DS128-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; VI-LABEL: name: test_extload_local_s128_from_4_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -14432,6 +15143,7 @@ body: | ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX9-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -14442,6 +15154,7 @@ body: | ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -14452,6 +15165,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX9-UNALIGNED-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX10-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -14462,6 +15176,7 @@ body: | ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -14472,6 +15187,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX10-UNALIGNED-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX11-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -14482,6 +15198,7 @@ body: | ; GFX11-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX11-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14510,6 +15227,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_extload_local_s64_from_2_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -14517,6 +15235,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-DS128-LABEL: name: test_extload_local_s64_from_2_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -14524,6 +15243,7 @@ body: | ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_extload_local_s64_from_2_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -14531,6 +15251,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -14538,6 +15259,7 @@ body: | ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -14545,6 +15267,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -14552,6 +15275,7 @@ body: | ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -14559,6 +15283,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -14566,6 +15291,7 @@ body: | ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14591,6 +15317,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_extload_local_s64_from_1_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -14598,6 +15325,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-DS128-LABEL: name: test_extload_local_s64_from_1_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -14605,6 +15333,7 @@ body: | ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_extload_local_s64_from_1_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -14612,6 +15341,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -14619,6 +15349,7 @@ body: | ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -14626,6 +15357,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -14633,6 +15365,7 @@ body: | ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -14640,6 +15373,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -14647,6 +15381,7 @@ body: | ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14671,54 +15406,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-LABEL: name: test_extload_local_v2s32_from_4_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-DS128-LABEL: name: test_extload_local_v2s32_from_4_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_local_v2s32_from_4_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14742,54 +15486,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-LABEL: name: test_extload_local_v2s32_from_4_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-DS128-LABEL: name: test_extload_local_v2s32_from_4_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_local_v2s32_from_4_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14813,54 +15566,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-LABEL: name: test_extload_local_v2s32_from_4_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-DS128-LABEL: name: test_extload_local_v2s32_from_4_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_local_v2s32_from_4_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14884,54 +15646,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; CI-LABEL: name: test_extload_local_v3s32_from_6_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; CI-DS128-LABEL: name: test_extload_local_v3s32_from_6_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; VI-LABEL: name: test_extload_local_v3s32_from_6_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX10-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX11-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14955,54 +15726,63 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-LABEL: name: test_extload_local_v4s32_from_8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-DS128-LABEL: name: test_extload_local_v4s32_from_8_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_extload_local_v4s32_from_8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX11-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -15121,6 +15901,7 @@ body: | ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-LABEL: name: test_load_local_v2s96_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -15222,6 +16003,7 @@ body: | ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-DS128-LABEL: name: test_load_local_v2s96_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -15323,6 +16105,7 @@ body: | ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_load_local_v2s96_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -15424,6 +16207,7 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_load_local_v2s96_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -15525,6 +16309,7 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -15539,6 +16324,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-LABEL: name: test_load_local_v2s96_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -15640,6 +16426,7 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -15666,6 +16453,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-LABEL: name: test_load_local_v2s96_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -15767,6 +16555,7 @@ body: | ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -15847,6 +16636,7 @@ body: | ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-LABEL: name: test_load_local_v2s96_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -15899,6 +16689,7 @@ body: | ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-DS128-LABEL: name: test_load_local_v2s96_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -15951,6 +16742,7 @@ body: | ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_load_local_v2s96_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -16003,6 +16795,7 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_load_local_v2s96_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -16055,6 +16848,7 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -16069,6 +16863,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-LABEL: name: test_load_local_v2s96_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -16121,6 +16916,7 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -16147,6 +16943,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-LABEL: name: test_load_local_v2s96_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -16199,6 +16996,7 @@ body: | ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -16250,6 +17048,7 @@ body: | ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-LABEL: name: test_load_local_v2s96_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -16273,6 +17072,7 @@ body: | ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-DS128-LABEL: name: test_load_local_v2s96_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -16299,6 +17099,7 @@ body: | ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_load_local_v2s96_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -16325,6 +17126,7 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_load_local_v2s96_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -16351,6 +17153,7 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -16365,6 +17168,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-LABEL: name: test_load_local_v2s96_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -16391,6 +17195,7 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -16417,6 +17222,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-LABEL: name: test_load_local_v2s96_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -16443,6 +17249,7 @@ body: | ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -16494,6 +17301,7 @@ body: | ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-LABEL: name: test_load_local_v2s96_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -16517,6 +17325,7 @@ body: | ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-DS128-LABEL: name: test_load_local_v2s96_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -16538,6 +17347,7 @@ body: | ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_load_local_v2s96_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -16559,6 +17369,7 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_load_local_v2s96_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -16580,6 +17391,7 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -16594,6 +17406,7 @@ body: | ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-LABEL: name: test_load_local_v2s96_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -16615,6 +17428,7 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -16636,6 +17450,7 @@ body: | ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-LABEL: name: test_load_local_v2s96_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -16657,6 +17472,7 @@ body: | ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir index 620f0128052943..40884567f9962a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -20,6 +20,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-LABEL: name: test_load_private_s1_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -28,6 +29,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_private_s1_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -36,6 +38,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_load_private_s1_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -44,6 +47,7 @@ body: | ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX10-LABEL: name: test_load_private_s1_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -52,6 +56,7 @@ body: | ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX11-LABEL: name: test_load_private_s1_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -80,6 +85,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-LABEL: name: test_load_private_s2_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -88,6 +94,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_private_s2_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -96,6 +103,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_load_private_s2_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -104,6 +112,7 @@ body: | ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX10-LABEL: name: test_load_private_s2_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -112,6 +121,7 @@ body: | ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX11-LABEL: name: test_load_private_s2_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -138,30 +148,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_private_s8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_private_s8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_private_s8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_private_s8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_private_s8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -186,30 +201,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_private_s8_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_private_s8_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_private_s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_private_s8_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_private_s8_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -234,30 +254,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_private_s16_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_private_s16_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_private_s16_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_private_s16_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_private_s16_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -282,30 +307,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_private_s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_private_s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_private_s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_private_s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_private_s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -336,6 +366,7 @@ body: | ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-LABEL: name: test_load_private_s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -348,6 +379,7 @@ body: | ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_private_s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -360,6 +392,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_private_s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -372,6 +405,7 @@ body: | ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX10-LABEL: name: test_load_private_s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -384,6 +418,7 @@ body: | ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX11-LABEL: name: test_load_private_s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -408,30 +443,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_private_s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_private_s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_private_s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_private_s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_private_s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -461,6 +501,7 @@ body: | ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-LABEL: name: test_load_private_s32_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -473,6 +514,7 @@ body: | ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_private_s32_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -485,6 +527,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_private_s32_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -497,6 +540,7 @@ body: | ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX10-LABEL: name: test_load_private_s32_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -509,6 +553,7 @@ body: | ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX11-LABEL: name: test_load_private_s32_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -548,6 +593,7 @@ body: | ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; CI-LABEL: name: test_load_private_s32_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -570,6 +616,7 @@ body: | ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_private_s32_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -592,6 +639,7 @@ body: | ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_private_s32_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -614,6 +662,7 @@ body: | ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX10-LABEL: name: test_load_private_s32_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -636,6 +685,7 @@ body: | ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX11-LABEL: name: test_load_private_s32_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -659,30 +709,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_private_s24_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_private_s24_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_private_s24_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_private_s24_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_private_s24_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -707,30 +762,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_private_s24_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_private_s24_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_private_s24_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_private_s24_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_private_s24_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -761,6 +821,7 @@ body: | ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-LABEL: name: test_load_private_s24_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -773,6 +834,7 @@ body: | ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_private_s24_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -785,6 +847,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_private_s24_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -797,6 +860,7 @@ body: | ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX10-LABEL: name: test_load_private_s24_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -809,6 +873,7 @@ body: | ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX11-LABEL: name: test_load_private_s24_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -851,6 +916,7 @@ body: | ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; SI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; CI-LABEL: name: test_load_private_s24_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -869,6 +935,7 @@ body: | ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; VI-LABEL: name: test_load_private_s24_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -887,6 +954,7 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX9-LABEL: name: test_load_private_s24_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -905,6 +973,7 @@ body: | ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; GFX9-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX10-LABEL: name: test_load_private_s24_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -923,6 +992,7 @@ body: | ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; GFX10-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX11-LABEL: name: test_load_private_s24_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -959,15 +1029,15 @@ body: | ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; CI-LABEL: name: test_load_private_s48_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -980,15 +1050,15 @@ body: | ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; VI-LABEL: name: test_load_private_s48_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1001,15 +1071,15 @@ body: | ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_load_private_s48_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1022,15 +1092,15 @@ body: | ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX10-LABEL: name: test_load_private_s48_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1043,15 +1113,15 @@ body: | ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX11-LABEL: name: test_load_private_s48_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1080,6 +1150,7 @@ body: | ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; CI-LABEL: name: test_load_private_s64_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1090,6 +1161,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; VI-LABEL: name: test_load_private_s64_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1100,6 +1172,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_load_private_s64_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1110,6 +1183,7 @@ body: | ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX10-LABEL: name: test_load_private_s64_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1120,6 +1194,7 @@ body: | ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX11-LABEL: name: test_load_private_s64_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1147,6 +1222,7 @@ body: | ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; CI-LABEL: name: test_load_private_s64_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1157,6 +1233,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; VI-LABEL: name: test_load_private_s64_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1167,6 +1244,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_load_private_s64_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1177,6 +1255,7 @@ body: | ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX10-LABEL: name: test_load_private_s64_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1187,6 +1266,7 @@ body: | ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX11-LABEL: name: test_load_private_s64_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1224,6 +1304,7 @@ body: | ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; CI-LABEL: name: test_load_private_s64_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1244,6 +1325,7 @@ body: | ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; VI-LABEL: name: test_load_private_s64_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1264,6 +1346,7 @@ body: | ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_load_private_s64_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1284,6 +1367,7 @@ body: | ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX10-LABEL: name: test_load_private_s64_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1304,6 +1388,7 @@ body: | ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX11-LABEL: name: test_load_private_s64_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1359,6 +1444,7 @@ body: | ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; CI-LABEL: name: test_load_private_s64_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1397,6 +1483,7 @@ body: | ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; VI-LABEL: name: test_load_private_s64_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1435,6 +1522,7 @@ body: | ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_load_private_s64_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1473,6 +1561,7 @@ body: | ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX10-LABEL: name: test_load_private_s64_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1511,6 +1600,7 @@ body: | ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX11-LABEL: name: test_load_private_s64_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1581,6 +1671,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_private_s96_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1634,6 +1725,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_private_s96_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1687,6 +1779,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_private_s96_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1740,6 +1833,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_private_s96_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1793,6 +1887,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_private_s96_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1825,6 +1920,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_private_s96_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1839,6 +1935,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_private_s96_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1853,6 +1950,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_private_s96_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1867,6 +1965,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_private_s96_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1881,6 +1980,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_private_s96_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1913,6 +2013,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_private_s96_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1927,6 +2028,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_private_s96_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1941,6 +2043,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_private_s96_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1955,6 +2058,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_private_s96_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1969,6 +2073,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_private_s96_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -2015,6 +2120,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_private_s96_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -2043,6 +2149,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_private_s96_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2071,6 +2178,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_private_s96_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2099,6 +2207,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_private_s96_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -2127,6 +2236,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_private_s96_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -2198,6 +2308,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_private_s96_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -2251,6 +2362,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_private_s96_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2304,6 +2416,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_private_s96_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2357,6 +2470,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_private_s96_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -2410,6 +2524,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_private_s96_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -2496,6 +2611,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_private_s128_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -2564,6 +2680,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_private_s128_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2632,6 +2749,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_private_s128_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2700,6 +2818,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_private_s128_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -2768,6 +2887,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_private_s128_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -2803,6 +2923,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_private_s128_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -2820,6 +2941,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_private_s128_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2837,6 +2959,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_private_s128_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2854,6 +2977,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_private_s128_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -2871,6 +2995,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_private_s128_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -2906,6 +3031,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_private_s128_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -2923,6 +3049,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_private_s128_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2940,6 +3067,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_private_s128_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2957,6 +3085,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_private_s128_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -2974,6 +3103,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_private_s128_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -3027,6 +3157,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_private_s128_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -3062,6 +3193,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_private_s128_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -3097,6 +3229,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_private_s128_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -3132,6 +3265,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_private_s128_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -3167,6 +3301,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_private_s128_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -3253,6 +3388,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_private_s128_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -3321,6 +3457,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_private_s128_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -3389,6 +3526,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_private_s128_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -3457,6 +3595,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_private_s128_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -3525,6 +3664,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_private_s128_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -3553,6 +3693,7 @@ body: | ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; CI-LABEL: name: test_load_private_p1_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -3563,6 +3704,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; VI-LABEL: name: test_load_private_p1_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -3573,6 +3715,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX9-LABEL: name: test_load_private_p1_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -3583,6 +3726,7 @@ body: | ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX10-LABEL: name: test_load_private_p1_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -3593,6 +3737,7 @@ body: | ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX11-LABEL: name: test_load_private_p1_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -3620,6 +3765,7 @@ body: | ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; CI-LABEL: name: test_load_private_p1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -3630,6 +3776,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; VI-LABEL: name: test_load_private_p1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -3640,6 +3787,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX9-LABEL: name: test_load_private_p1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -3650,6 +3798,7 @@ body: | ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX10-LABEL: name: test_load_private_p1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -3660,6 +3809,7 @@ body: | ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX11-LABEL: name: test_load_private_p1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -3697,6 +3847,7 @@ body: | ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; CI-LABEL: name: test_load_private_p1_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -3717,6 +3868,7 @@ body: | ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; VI-LABEL: name: test_load_private_p1_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -3737,6 +3889,7 @@ body: | ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX9-LABEL: name: test_load_private_p1_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -3757,6 +3910,7 @@ body: | ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX10-LABEL: name: test_load_private_p1_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -3777,6 +3931,7 @@ body: | ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX11-LABEL: name: test_load_private_p1_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -3832,6 +3987,7 @@ body: | ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; CI-LABEL: name: test_load_private_p1_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -3870,6 +4026,7 @@ body: | ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; VI-LABEL: name: test_load_private_p1_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -3908,6 +4065,7 @@ body: | ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX9-LABEL: name: test_load_private_p1_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -3946,6 +4104,7 @@ body: | ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX10-LABEL: name: test_load_private_p1_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -3984,6 +4143,7 @@ body: | ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX11-LABEL: name: test_load_private_p1_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4007,30 +4167,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; CI-LABEL: name: test_load_private_p3_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; VI-LABEL: name: test_load_private_p3_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX9-LABEL: name: test_load_private_p3_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX10-LABEL: name: test_load_private_p3_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX11-LABEL: name: test_load_private_p3_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4061,6 +4226,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; CI-LABEL: name: test_load_private_p3_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4074,6 +4240,7 @@ body: | ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; VI-LABEL: name: test_load_private_p3_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4087,6 +4254,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX9-LABEL: name: test_load_private_p3_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4100,6 +4268,7 @@ body: | ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX10-LABEL: name: test_load_private_p3_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4113,6 +4282,7 @@ body: | ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX11-LABEL: name: test_load_private_p3_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4153,6 +4323,7 @@ body: | ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; CI-LABEL: name: test_load_private_p3_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4176,6 +4347,7 @@ body: | ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; VI-LABEL: name: test_load_private_p3_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4199,6 +4371,7 @@ body: | ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX9-LABEL: name: test_load_private_p3_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4222,6 +4395,7 @@ body: | ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX10-LABEL: name: test_load_private_p3_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4245,6 +4419,7 @@ body: | ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX11-LABEL: name: test_load_private_p3_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4268,30 +4443,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; CI-LABEL: name: test_load_private_p5_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; VI-LABEL: name: test_load_private_p5_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX9-LABEL: name: test_load_private_p5_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX10-LABEL: name: test_load_private_p5_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX11-LABEL: name: test_load_private_p5_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4322,6 +4502,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; CI-LABEL: name: test_load_private_p5_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4335,6 +4516,7 @@ body: | ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_private_p5_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4348,6 +4530,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-LABEL: name: test_load_private_p5_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4361,6 +4544,7 @@ body: | ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX10-LABEL: name: test_load_private_p5_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4374,6 +4558,7 @@ body: | ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX11-LABEL: name: test_load_private_p5_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4414,6 +4599,7 @@ body: | ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; CI-LABEL: name: test_load_private_p5_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4437,6 +4623,7 @@ body: | ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_private_p5_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4460,6 +4647,7 @@ body: | ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-LABEL: name: test_load_private_p5_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4483,6 +4671,7 @@ body: | ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX10-LABEL: name: test_load_private_p5_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4506,6 +4695,7 @@ body: | ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX11-LABEL: name: test_load_private_p5_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4529,30 +4719,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_private_v2s8_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_private_v2s8_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_private_v2s8_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_private_v2s8_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_private_v2s8_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4586,6 +4781,7 @@ body: | ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-LABEL: name: test_load_private_v2s8_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4600,6 +4796,7 @@ body: | ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_private_v2s8_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4614,6 +4811,7 @@ body: | ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v2s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4628,6 +4826,7 @@ body: | ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v2s8_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4642,6 +4841,7 @@ body: | ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v2s8_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4673,8 +4873,8 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -4694,6 +4894,7 @@ body: | ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; CI-LABEL: name: test_load_private_v3s8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4704,8 +4905,8 @@ body: | ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -4725,6 +4926,7 @@ body: | ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_private_v3s8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4735,8 +4937,8 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -4754,6 +4956,7 @@ body: | ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_private_v3s8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4764,8 +4967,8 @@ body: | ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -4783,6 +4986,7 @@ body: | ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX10-LABEL: name: test_load_private_v3s8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4793,8 +4997,8 @@ body: | ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -4812,6 +5016,7 @@ body: | ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX11-LABEL: name: test_load_private_v3s8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4822,8 +5027,8 @@ body: | ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -4874,8 +5079,8 @@ body: | ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -4895,6 +5100,7 @@ body: | ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; SI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; CI-LABEL: name: test_load_private_v3s8_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4915,8 +5121,8 @@ body: | ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -4936,6 +5142,7 @@ body: | ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; CI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; VI-LABEL: name: test_load_private_v3s8_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4956,8 +5163,8 @@ body: | ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -4975,6 +5182,7 @@ body: | ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; VI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX9-LABEL: name: test_load_private_v3s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4995,8 +5203,8 @@ body: | ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -5014,6 +5222,7 @@ body: | ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; GFX9-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX10-LABEL: name: test_load_private_v3s8_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -5034,8 +5243,8 @@ body: | ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -5053,6 +5262,7 @@ body: | ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; GFX10-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX11-LABEL: name: test_load_private_v3s8_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5068,8 +5278,8 @@ body: | ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] @@ -5106,30 +5316,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_private_v4s8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_private_v4s8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_private_v4s8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_private_v4s8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_private_v4s8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5158,6 +5373,7 @@ body: | ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-LABEL: name: test_load_private_v8s8_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -5168,6 +5384,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_private_v8s8_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -5178,6 +5395,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v8s8_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -5188,6 +5406,7 @@ body: | ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v8s8_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -5198,6 +5417,7 @@ body: | ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v8s8_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5283,6 +5503,7 @@ body: | ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; CI-LABEL: name: test_load_private_v16s8_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -5350,6 +5571,7 @@ body: | ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_private_v16s8_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -5417,6 +5639,7 @@ body: | ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v16s8_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -5484,6 +5707,7 @@ body: | ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v16s8_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -5551,6 +5775,7 @@ body: | ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v16s8_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5575,30 +5800,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; CI-LABEL: name: test_load_private_v2s16_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; VI-LABEL: name: test_load_private_v2s16_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v2s16_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v2s16_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v2s16_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5632,6 +5862,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; CI-LABEL: name: test_load_private_v2s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -5648,6 +5879,7 @@ body: | ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_private_v2s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -5664,6 +5896,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v2s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -5676,6 +5909,7 @@ body: | ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v2s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -5688,6 +5922,7 @@ body: | ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v2s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5731,6 +5966,7 @@ body: | ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; CI-LABEL: name: test_load_private_v2s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -5757,6 +5993,7 @@ body: | ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_private_v2s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -5783,6 +6020,7 @@ body: | ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v2s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -5805,6 +6043,7 @@ body: | ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v2s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -5827,6 +6066,7 @@ body: | ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v2s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5862,22 +6102,21 @@ body: | ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-LABEL: name: test_load_private_v3s16_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -5896,22 +6135,21 @@ body: | ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_private_v3s16_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -5930,22 +6168,21 @@ body: | ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v3s16_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -5973,6 +6210,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v3s16_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6000,6 +6238,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v3s16_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6062,13 +6301,13 @@ body: | ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-LABEL: name: test_load_private_v3s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6097,13 +6336,13 @@ body: | ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_private_v3s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6132,13 +6371,13 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v3s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6167,6 +6406,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v3s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6195,6 +6435,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v3s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6278,13 +6519,13 @@ body: | ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-LABEL: name: test_load_private_v3s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6327,13 +6568,13 @@ body: | ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_private_v3s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6376,13 +6617,13 @@ body: | ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v3s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6425,6 +6666,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v3s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6467,6 +6709,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v3s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6517,6 +6760,7 @@ body: | ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-LABEL: name: test_load_private_v4s16_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6527,6 +6771,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_private_v4s16_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6537,6 +6782,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v4s16_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6547,6 +6793,7 @@ body: | ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v4s16_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6557,6 +6804,7 @@ body: | ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v4s16_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6584,6 +6832,7 @@ body: | ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-LABEL: name: test_load_private_v4s16_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6594,6 +6843,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_private_v4s16_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6604,6 +6854,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v4s16_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6614,6 +6865,7 @@ body: | ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v4s16_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6624,6 +6876,7 @@ body: | ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v4s16_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6667,6 +6920,7 @@ body: | ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-LABEL: name: test_load_private_v4s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6694,6 +6948,7 @@ body: | ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_private_v4s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6721,6 +6976,7 @@ body: | ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v4s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6742,6 +6998,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v4s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6763,6 +7020,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v4s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6825,6 +7083,7 @@ body: | ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-LABEL: name: test_load_private_v4s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6870,6 +7129,7 @@ body: | ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_private_v4s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6915,6 +7175,7 @@ body: | ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v4s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6954,6 +7215,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v4s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6993,6 +7255,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v4s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -7020,6 +7283,7 @@ body: | ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-LABEL: name: test_load_private_v2s32_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -7030,6 +7294,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_private_v2s32_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -7040,6 +7305,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v2s32_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -7050,6 +7316,7 @@ body: | ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v2s32_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -7060,6 +7327,7 @@ body: | ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v2s32_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -7087,6 +7355,7 @@ body: | ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-LABEL: name: test_load_private_v2s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -7097,6 +7366,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_private_v2s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -7107,6 +7377,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v2s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -7117,6 +7388,7 @@ body: | ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v2s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -7127,6 +7399,7 @@ body: | ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v2s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -7164,6 +7437,7 @@ body: | ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-LABEL: name: test_load_private_v2s32_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -7184,6 +7458,7 @@ body: | ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_private_v2s32_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -7204,6 +7479,7 @@ body: | ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v2s32_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -7224,6 +7500,7 @@ body: | ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v2s32_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -7244,6 +7521,7 @@ body: | ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v2s32_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -7299,6 +7577,7 @@ body: | ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-LABEL: name: test_load_private_v2s32_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -7337,6 +7616,7 @@ body: | ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_private_v2s32_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -7375,6 +7655,7 @@ body: | ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v2s32_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -7413,6 +7694,7 @@ body: | ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v2s32_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -7451,6 +7733,7 @@ body: | ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v2s32_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -7520,6 +7803,7 @@ body: | ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; CI-LABEL: name: test_load_private_v3s32_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -7572,6 +7856,7 @@ body: | ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_load_private_v3s32_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -7624,6 +7909,7 @@ body: | ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v3s32_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -7676,6 +7962,7 @@ body: | ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v3s32_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -7728,6 +8015,7 @@ body: | ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v3s32_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -7758,6 +8046,7 @@ body: | ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; CI-LABEL: name: test_load_private_v3s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -7771,6 +8060,7 @@ body: | ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_load_private_v3s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -7784,6 +8074,7 @@ body: | ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v3s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -7797,6 +8088,7 @@ body: | ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v3s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -7810,6 +8102,7 @@ body: | ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v3s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -7894,6 +8187,7 @@ body: | ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; CI-LABEL: name: test_load_private_v4s32_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -7961,6 +8255,7 @@ body: | ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_private_v4s32_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8028,6 +8323,7 @@ body: | ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v4s32_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -8095,6 +8391,7 @@ body: | ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v4s32_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8162,6 +8459,7 @@ body: | ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v4s32_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -8195,6 +8493,7 @@ body: | ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; CI-LABEL: name: test_load_private_v4s32_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -8211,6 +8510,7 @@ body: | ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_private_v4s32_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8227,6 +8527,7 @@ body: | ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v4s32_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -8243,6 +8544,7 @@ body: | ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v4s32_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8259,6 +8561,7 @@ body: | ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v4s32_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -8292,6 +8595,7 @@ body: | ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; CI-LABEL: name: test_load_private_v4s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -8308,6 +8612,7 @@ body: | ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_private_v4s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8324,6 +8629,7 @@ body: | ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v4s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -8340,6 +8646,7 @@ body: | ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v4s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8356,6 +8663,7 @@ body: | ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v4s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -8407,6 +8715,7 @@ body: | ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; CI-LABEL: name: test_load_private_v4s32_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -8441,6 +8750,7 @@ body: | ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_private_v4s32_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8475,6 +8785,7 @@ body: | ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v4s32_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -8509,6 +8820,7 @@ body: | ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v4s32_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8543,6 +8855,7 @@ body: | ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v4s32_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -8627,6 +8940,7 @@ body: | ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; CI-LABEL: name: test_load_private_v4s32_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -8694,6 +9008,7 @@ body: | ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_private_v4s32_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8761,6 +9076,7 @@ body: | ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v4s32_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -8828,6 +9144,7 @@ body: | ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v4s32_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8895,6 +9212,7 @@ body: | ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v4s32_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -8940,6 +9258,7 @@ body: | ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; ; CI-LABEL: name: test_load_private_v8s32_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -8968,6 +9287,7 @@ body: | ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; ; VI-LABEL: name: test_load_private_v8s32_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8996,6 +9316,7 @@ body: | ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v8s32_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -9024,6 +9345,7 @@ body: | ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v8s32_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -9052,6 +9374,7 @@ body: | ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v8s32_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -9125,6 +9448,7 @@ body: | ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; ; CI-LABEL: name: test_load_private_v16s32_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -9177,6 +9501,7 @@ body: | ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; ; VI-LABEL: name: test_load_private_v16s32_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -9229,6 +9554,7 @@ body: | ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v16s32_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -9281,6 +9607,7 @@ body: | ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v16s32_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -9333,6 +9660,7 @@ body: | ; GFX10-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v16s32_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -9377,6 +9705,7 @@ body: | ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; CI-LABEL: name: test_load_private_v2s64_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -9394,6 +9723,7 @@ body: | ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_private_v2s64_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -9411,6 +9741,7 @@ body: | ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_private_v2s64_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -9428,6 +9759,7 @@ body: | ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX10-LABEL: name: test_load_private_v2s64_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -9445,6 +9777,7 @@ body: | ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX11-LABEL: name: test_load_private_v2s64_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -9530,6 +9863,7 @@ body: | ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; CI-LABEL: name: test_load_private_v2s64_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -9598,6 +9932,7 @@ body: | ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_private_v2s64_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -9666,6 +10001,7 @@ body: | ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_private_v2s64_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -9734,6 +10070,7 @@ body: | ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX10-LABEL: name: test_load_private_v2s64_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -9802,6 +10139,7 @@ body: | ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX11-LABEL: name: test_load_private_v2s64_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -9844,6 +10182,7 @@ body: | ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-LABEL: name: test_load_private_v3s64_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -9869,6 +10208,7 @@ body: | ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_private_v3s64_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -9894,6 +10234,7 @@ body: | ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_private_v3s64_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -9919,6 +10260,7 @@ body: | ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX10-LABEL: name: test_load_private_v3s64_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -9944,6 +10286,7 @@ body: | ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX11-LABEL: name: test_load_private_v3s64_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -9999,6 +10342,7 @@ body: | ; SI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-LABEL: name: test_load_private_v4s64_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10028,6 +10372,7 @@ body: | ; CI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_private_v4s64_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10057,6 +10402,7 @@ body: | ; VI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_private_v4s64_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10086,6 +10432,7 @@ body: | ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX10-LABEL: name: test_load_private_v4s64_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10115,6 +10462,7 @@ body: | ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX11-LABEL: name: test_load_private_v4s64_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10153,6 +10501,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-LABEL: name: test_load_private_v2p1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10170,6 +10519,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_private_v2p1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10187,6 +10537,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_private_v2p1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10204,6 +10555,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX10-LABEL: name: test_load_private_v2p1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10221,6 +10573,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX11-LABEL: name: test_load_private_v2p1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10268,6 +10621,7 @@ body: | ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; CI-LABEL: name: test_load_private_v4p1_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10297,6 +10651,7 @@ body: | ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; VI-LABEL: name: test_load_private_v4p1_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10326,6 +10681,7 @@ body: | ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; GFX9-LABEL: name: test_load_private_v4p1_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10355,6 +10711,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; GFX10-LABEL: name: test_load_private_v4p1_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10384,6 +10741,7 @@ body: | ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; GFX11-LABEL: name: test_load_private_v4p1_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10416,6 +10774,7 @@ body: | ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; CI-LABEL: name: test_load_private_v2p3_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10426,6 +10785,7 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; VI-LABEL: name: test_load_private_v2p3_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10436,6 +10796,7 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; GFX9-LABEL: name: test_load_private_v2p3_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10446,6 +10807,7 @@ body: | ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; GFX10-LABEL: name: test_load_private_v2p3_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10456,6 +10818,7 @@ body: | ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; GFX11-LABEL: name: test_load_private_v2p3_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10479,30 +10842,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_ext_load_private_s32_from_1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_ext_load_private_s32_from_1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_ext_load_private_s32_from_1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_ext_load_private_s32_from_1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_ext_load_private_s32_from_1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10526,30 +10894,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_ext_load_private_s32_from_2_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_ext_load_private_s32_from_2_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_ext_load_private_s32_from_2_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_ext_load_private_s32_from_2_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_ext_load_private_s32_from_2_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10575,6 +10948,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_ext_load_private_s64_from_1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10582,6 +10956,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_private_s64_from_1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10589,6 +10964,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_private_s64_from_1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10596,6 +10972,7 @@ body: | ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_ext_load_private_s64_from_1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10603,6 +10980,7 @@ body: | ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_ext_load_private_s64_from_1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10628,6 +11006,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_ext_load_private_s64_from_2_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10635,6 +11014,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_private_s64_from_2_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10642,6 +11022,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_private_s64_from_2_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10649,6 +11030,7 @@ body: | ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_ext_load_private_s64_from_2_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10656,6 +11038,7 @@ body: | ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_ext_load_private_s64_from_2_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10681,6 +11064,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_ext_load_private_s64_from_4_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10688,6 +11072,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_private_s64_from_4_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10695,6 +11080,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_private_s64_from_4_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10702,6 +11088,7 @@ body: | ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_ext_load_private_s64_from_4_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10709,6 +11096,7 @@ body: | ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_ext_load_private_s64_from_4_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10737,6 +11125,7 @@ body: | ; SI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; CI-LABEL: name: test_ext_load_private_s128_from_4_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10747,6 +11136,7 @@ body: | ; CI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; VI-LABEL: name: test_ext_load_private_s128_from_4_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10757,6 +11147,7 @@ body: | ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX9-LABEL: name: test_ext_load_private_s128_from_4_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10767,6 +11158,7 @@ body: | ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX10-LABEL: name: test_ext_load_private_s128_from_4_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10777,6 +11169,7 @@ body: | ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX11-LABEL: name: test_ext_load_private_s128_from_4_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10805,6 +11198,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_ext_load_private_s64_from_2_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10812,6 +11206,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_private_s64_from_2_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10819,6 +11214,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_private_s64_from_2_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10826,6 +11222,7 @@ body: | ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_ext_load_private_s64_from_2_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10833,6 +11230,7 @@ body: | ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_ext_load_private_s64_from_2_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10858,6 +11256,7 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_ext_load_private_s64_from_1_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10865,6 +11264,7 @@ body: | ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_private_s64_from_1_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10872,6 +11272,7 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_private_s64_from_1_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10879,6 +11280,7 @@ body: | ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_ext_load_private_s64_from_1_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10886,6 +11288,7 @@ body: | ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_ext_load_private_s64_from_1_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10910,30 +11313,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-LABEL: name: test_extload_private_v2s32_from_4_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_private_v2s32_from_4_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_extload_private_v2s32_from_4_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_extload_private_v2s32_from_4_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-LABEL: name: test_extload_private_v2s32_from_4_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10957,30 +11365,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-LABEL: name: test_extload_private_v2s32_from_4_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_private_v2s32_from_4_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_extload_private_v2s32_from_4_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_extload_private_v2s32_from_4_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-LABEL: name: test_extload_private_v2s32_from_4_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11004,30 +11417,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-LABEL: name: test_extload_private_v2s32_from_4_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_private_v2s32_from_4_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_extload_private_v2s32_from_4_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_extload_private_v2s32_from_4_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-LABEL: name: test_extload_private_v2s32_from_4_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11051,30 +11469,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; CI-LABEL: name: test_extload_private_v3s32_from_6_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; VI-LABEL: name: test_extload_private_v3s32_from_6_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-LABEL: name: test_extload_private_v3s32_from_6_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX10-LABEL: name: test_extload_private_v3s32_from_6_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX11-LABEL: name: test_extload_private_v3s32_from_6_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11098,30 +11521,35 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-LABEL: name: test_extload_private_v4s32_from_8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_extload_private_v4s32_from_8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-LABEL: name: test_extload_private_v4s32_from_8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-LABEL: name: test_extload_private_v4s32_from_8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX11-LABEL: name: test_extload_private_v4s32_from_8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11240,6 +11668,7 @@ body: | ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-LABEL: name: test_load_private_v2s96_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -11341,6 +11770,7 @@ body: | ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_load_private_v2s96_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -11442,6 +11872,7 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_load_private_v2s96_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -11543,6 +11974,7 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-LABEL: name: test_load_private_v2s96_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -11644,6 +12076,7 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-LABEL: name: test_load_private_v2s96_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11724,6 +12157,7 @@ body: | ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-LABEL: name: test_load_private_v2s96_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -11776,6 +12210,7 @@ body: | ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_load_private_v2s96_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -11828,6 +12263,7 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_load_private_v2s96_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -11880,6 +12316,7 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-LABEL: name: test_load_private_v2s96_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -11932,6 +12369,7 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-LABEL: name: test_load_private_v2s96_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11986,6 +12424,7 @@ body: | ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-LABEL: name: test_load_private_v2s96_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -12012,6 +12451,7 @@ body: | ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_load_private_v2s96_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -12038,6 +12478,7 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_load_private_v2s96_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -12064,6 +12505,7 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-LABEL: name: test_load_private_v2s96_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -12090,6 +12532,7 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-LABEL: name: test_load_private_v2s96_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -12144,6 +12587,7 @@ body: | ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-LABEL: name: test_load_private_v2s96_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -12170,6 +12614,7 @@ body: | ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_load_private_v2s96_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -12196,6 +12641,7 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_load_private_v2s96_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -12222,6 +12668,7 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-LABEL: name: test_load_private_v2s96_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -12248,6 +12695,7 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-LABEL: name: test_load_private_v2s96_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir index 8b2d986682c710..3ec9a48b02e69c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -18,6 +18,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY1]](s32) ; SI-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; VI-LABEL: name: test_lshr_s32_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY1]](s32) ; VI-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; GFX9-LABEL: name: test_lshr_s32_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -51,6 +53,7 @@ body: | ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; ; VI-LABEL: name: test_lshr_s64_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -59,6 +62,7 @@ body: | ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; ; GFX9-LABEL: name: test_lshr_s64_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -85,6 +89,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[COPY1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; ; VI-LABEL: name: test_lshr_s64_s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -92,6 +97,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[COPY1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; ; GFX9-LABEL: name: test_lshr_s64_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -119,6 +125,7 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[AND]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; ; VI-LABEL: name: test_lshr_s64_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -128,6 +135,7 @@ body: | ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[AND]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; ; GFX9-LABEL: name: test_lshr_s64_s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -159,6 +167,7 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) ; SI-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; VI-LABEL: name: test_lshr_s16_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -169,6 +178,7 @@ body: | ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_lshr_s16_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -203,6 +213,7 @@ body: | ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; VI-LABEL: name: test_lshr_s16_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -213,6 +224,7 @@ body: | ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_lshr_s16_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -249,26 +261,28 @@ body: | ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; VI-LABEL: name: test_lshr_s16_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_lshr_s16_i8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND]](s16) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) @@ -298,26 +312,28 @@ body: | ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; VI-LABEL: name: test_lshr_i8_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND1]], [[AND]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_lshr_i8_i8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -350,6 +366,7 @@ body: | ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[UV3]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_lshr_v2s32_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -361,6 +378,7 @@ body: | ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[UV3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_lshr_v2s32_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -396,6 +414,7 @@ body: | ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[UV5]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_lshr_v3s32_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -408,6 +427,7 @@ body: | ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[UV5]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_lshr_v3s32_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -443,6 +463,7 @@ body: | ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV3]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_lshr_v2s64_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -454,6 +475,7 @@ body: | ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_lshr_v2s64_v2s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -491,6 +513,7 @@ body: | ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_lshr_v3s64_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} @@ -505,6 +528,7 @@ body: | ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_lshr_v3s64_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} @@ -548,15 +572,14 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[LSHR1]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_lshr_v2s16_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -579,6 +602,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_lshr_v2s16_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -610,14 +634,14 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[UV]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[UV1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[UV1]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; VI-LABEL: name: test_lshr_v2s16_v2s32 ; VI: liveins: $vgpr0, $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -639,6 +663,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: test_lshr_v2s16_v2s32 ; GFX9: liveins: $vgpr0, $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -686,28 +711,26 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[LSHR1]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL1]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL1]] ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_lshr_v3s16_v3s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -741,13 +764,12 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_lshr_v3s16_v3s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -810,34 +832,32 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[LSHR1]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL1]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL1]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL2]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL2]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_ashr_v3s16_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -877,13 +897,13 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL2]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_ashr_v3s16_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -953,27 +973,24 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[AND6]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[LSHR2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[LSHR3]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL1]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL1]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_lshr_v4s16_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -1014,6 +1031,7 @@ body: | ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_lshr_v4s16_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1042,68 +1060,70 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[COPY1]](s32) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[SUB]](s32) ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C1]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_lshr_s128_s128 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[COPY1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[SUB]](s32) ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C1]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_lshr_s128_s128 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[COPY1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[SUB]](s32) ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C1]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 @@ -1142,6 +1162,7 @@ body: | ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_lshr_s128_s132 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1165,6 +1186,7 @@ body: | ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_lshr_s128_s132 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1205,11 +1227,13 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; ; VI-LABEL: name: test_lshr_s128_s32_0 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; ; GFX9-LABEL: name: test_lshr_s128_s32_0 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1241,6 +1265,7 @@ body: | ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_lshr_s128_s32_23 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1254,6 +1279,7 @@ body: | ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_lshr_s128_s32_23 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1292,6 +1318,7 @@ body: | ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_lshr_s128_s32_31 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1305,6 +1332,7 @@ body: | ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_lshr_s128_s32_31 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1342,6 +1370,7 @@ body: | ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_lshr_s128_s32_32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1354,6 +1383,7 @@ body: | ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_lshr_s128_s32_32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1391,6 +1421,7 @@ body: | ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_lshr_s128_s32_33 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1404,6 +1435,7 @@ body: | ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_lshr_s128_s32_33 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1439,6 +1471,7 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LSHR]](s64), [[C1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_lshr_s128_s32_127 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1449,6 +1482,7 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LSHR]](s64), [[C1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_lshr_s128_s32_127 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1476,33 +1510,33 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; SI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[COPY1]](s32) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[SUB2]](s32) ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[LSHR2]] ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C1]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C3]] ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; SI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) @@ -1510,27 +1544,27 @@ body: | ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR5]] ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C1]] + ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C3]] ; SI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; SI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; SI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; SI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR6]], [[SHL3]] ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] + ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C3]] ; SI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; SI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; SI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; SI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; SI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; SI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; SI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; SI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB]](s32) ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB]](s32) ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB9]](s32) @@ -1538,50 +1572,51 @@ body: | ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB8]](s32) ; SI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[LSHR9]] ; SI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV8]], [[SELECT9]] - ; SI-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C1]] + ; SI-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C3]] ; SI-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT10]] ; SI-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] ; SI-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) ; SI-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV10]], [[SELECT12]] ; SI-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV11]], [[SELECT13]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) - ; SI-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C1]] - ; SI-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C1]] + ; SI-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C3]] + ; SI-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) ; SI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; ; VI-LABEL: name: test_lshr_s256_s256 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; VI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[COPY1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[SUB2]](s32) ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[LSHR2]] ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C1]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C3]] ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; VI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) @@ -1589,27 +1624,27 @@ body: | ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; VI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR5]] ; VI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C1]] + ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C3]] ; VI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; VI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; VI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; VI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR6]], [[SHL3]] ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] + ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C3]] ; VI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; VI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; VI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; VI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; VI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; VI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; VI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; VI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB]](s32) ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB]](s32) ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB9]](s32) @@ -1617,50 +1652,51 @@ body: | ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB8]](s32) ; VI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[LSHR9]] ; VI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV8]], [[SELECT9]] - ; VI-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C1]] + ; VI-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C3]] ; VI-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT10]] ; VI-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] ; VI-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) ; VI-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV10]], [[SELECT12]] ; VI-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV11]], [[SELECT13]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) - ; VI-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C1]] - ; VI-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C1]] + ; VI-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C3]] + ; VI-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) ; VI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; ; GFX9-LABEL: name: test_lshr_s256_s256 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[COPY1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[SUB2]](s32) ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[LSHR2]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C1]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C3]] ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) @@ -1668,27 +1704,27 @@ body: | ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR5]] ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C1]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C3]] ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR6]], [[SHL3]] ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] + ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C3]] ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; GFX9-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; GFX9-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB]](s32) ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB]](s32) ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB9]](s32) @@ -1696,15 +1732,15 @@ body: | ; GFX9-NEXT: [[LSHR9:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB8]](s32) ; GFX9-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[LSHR9]] ; GFX9-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV8]], [[SELECT9]] - ; GFX9-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C1]] + ; GFX9-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C3]] ; GFX9-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT10]] ; GFX9-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) ; GFX9-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV10]], [[SELECT12]] ; GFX9-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV11]], [[SELECT13]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) - ; GFX9-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C1]] - ; GFX9-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C1]] + ; GFX9-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C3]] + ; GFX9-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) @@ -1761,6 +1797,7 @@ body: | ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT4]](s64), [[SELECT5]](s64) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; ; VI-LABEL: name: test_lshr_v2s128_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -1801,6 +1838,7 @@ body: | ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT4]](s64), [[SELECT5]](s64) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; ; GFX9-LABEL: name: test_lshr_v2s128_v2s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -1859,12 +1897,12 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %23(s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 @@ -1885,18 +1923,19 @@ body: | ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; VI-LABEL: name: test_lshr_s65_s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %23(s64) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 @@ -1917,18 +1956,19 @@ body: | ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; GFX9-LABEL: name: test_lshr_s65_s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %23(s64) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 @@ -1968,12 +2008,12 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %23(s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 @@ -1994,17 +2034,18 @@ body: | ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; VI-LABEL: name: test_lshr_s65_s32_constant8 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %23(s64) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 @@ -2025,17 +2066,18 @@ body: | ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; GFX9-LABEL: name: test_lshr_s65_s32_constant8 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %23(s64) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 @@ -2077,12 +2119,12 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY1]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C2]] ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 @@ -2103,6 +2145,7 @@ body: | ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; ; VI-LABEL: name: test_lshr_s65_s32_known_pow2 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} @@ -2110,12 +2153,12 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY1]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C1]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C2]] ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 @@ -2136,6 +2179,7 @@ body: | ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; ; GFX9-LABEL: name: test_lshr_s65_s32_known_pow2 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -2143,12 +2187,12 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C1]] ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C2]] ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir index c1b3b758c22cf9..70f7e7ae623c39 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir @@ -23,8 +23,8 @@ body: | ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) @@ -91,14 +91,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_merge_s16_s8_s8 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C2]], [[C1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C3]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C1]], [[TRUNC]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s8) = G_CONSTANT i8 0 @@ -115,26 +112,22 @@ body: | ; CHECK-LABEL: name: test_merge_s24_s8_s8_s8 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C2]], [[C1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C5]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C3]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C1]], [[TRUNC]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C3]], [[TRUNC1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; CHECK-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(s8) = G_CONSTANT i8 0 @@ -154,18 +147,14 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C6]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C6]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CHECK-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(s8) = G_CONSTANT i8 0 @@ -242,47 +231,40 @@ body: | ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C6]], [[C5]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C9]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C5]], [[TRUNC]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C7]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[TRUNC1]] - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C10]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[TRUNC2]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[C7]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[COPY6]], [[C5]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[COPY7]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[C6]](s16) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[COPY4]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC3]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C8]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C9]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[COPY3]], [[TRUNC3]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C9]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C7]](s32) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[TRUNC4]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C8]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C10]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C9]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[TRUNC5]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C11]](s32) + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s24) = G_TRUNC [[OR6]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC6]](s24) @@ -308,47 +290,39 @@ body: | ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C7]], [[C6]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C10]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C6]], [[TRUNC]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[TRUNC1]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C11]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C9]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[TRUNC2]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[C8]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[COPY6]], [[C6]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[COPY7]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[C7]](s16) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[COPY4]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC3]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C10]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[COPY3]], [[TRUNC3]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C8]](s32) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[TRUNC4]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C9]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C11]](s32) + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C10]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C9]](s32) ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[TRUNC5]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C12]](s32) + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C11]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s28) = G_TRUNC [[OR6]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC6]](s28) @@ -376,33 +350,25 @@ body: | ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C8]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C8]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C9]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C9]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C10]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C10]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C11]](s32) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C11]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C6]](s32) ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C12]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C12]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C7]](s32) ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C13]](s32) + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C13]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] ; CHECK-NEXT: S_NOP 0, implicit [[OR6]](s32) %0:_(s4) = G_CONSTANT i4 0 @@ -426,43 +392,34 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C5]], [[C4]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C7]], [[C4]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[TRUNC]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C8]], [[C4]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C6]], [[TRUNC1]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC2]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C9]], [[C4]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C6]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C7]], [[TRUNC2]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C5]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC3]] + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C8]], [[TRUNC3]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) @@ -490,62 +447,49 @@ body: | ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C8]], [[C7]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C10]], [[C7]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C7]], [[TRUNC]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C11]], [[C7]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C9]], [[TRUNC1]] + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC2]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C12]], [[C7]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[COPY6]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C10]], [[TRUNC2]] + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[COPY3]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC3]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[C9]](s16) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[COPY8]], [[C7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[COPY9]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C11]], [[TRUNC3]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[C8]](s16) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[COPY5]](s32) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC4]] - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 10 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[C13]], [[C7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[COPY11]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[COPY4]], [[TRUNC4]] + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 10 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[COPY6]](s32) ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND5]], [[TRUNC5]] + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[C12]], [[TRUNC5]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C14]](s32) + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C13]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C14]](s32) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C13]](s32) ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C14]](s32) + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C13]](s32) ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) @@ -605,44 +549,36 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C4]], [[C3]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C7]], [[C3]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C3]], [[TRUNC]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C8]], [[C3]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C5]], [[TRUNC1]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC2]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C9]], [[C3]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C6]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C5]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C6]], [[TRUNC2]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C8]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C4]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC3]] + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C7]], [[TRUNC3]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s56) = G_TRUNC [[MV]](s64) @@ -667,315 +603,249 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C3]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C3]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C4]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C4]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C5]](s32) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C5]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C6]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C6]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C7]](s32) + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C7]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C8]](s32) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C8]](s32) ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C9]](s32) + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C9]](s32) ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C10]](s32) + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C10]](s32) ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[SHL9]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C11]](s32) + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C11]](s32) ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[C12]](s32) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C12]](s32) ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C13]](s32) + ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C13]](s32) ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR11]], [[SHL12]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[COPY14]], [[C14]](s32) + ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C14]](s32) ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C15]](s32) + ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C15]](s32) ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C16]](s32) + ; CHECK-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C16]](s32) ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[OR14]], [[SHL15]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C17]](s32) ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; CHECK-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C18]](s32) + ; CHECK-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C18]](s32) ; CHECK-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 - ; CHECK-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C19]](s32) + ; CHECK-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C19]](s32) ; CHECK-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[OR17]], [[SHL18]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C20]](s32) + ; CHECK-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C20]](s32) ; CHECK-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]] - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 - ; CHECK-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[COPY21]], [[C21]](s32) + ; CHECK-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C21]](s32) ; CHECK-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]] - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 - ; CHECK-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[COPY22]], [[C22]](s32) + ; CHECK-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C22]](s32) ; CHECK-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[OR20]], [[SHL21]] - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; CHECK-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[COPY23]], [[C23]](s32) + ; CHECK-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C23]](s32) ; CHECK-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]] - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[COPY24]], [[C24]](s32) + ; CHECK-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C24]](s32) ; CHECK-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]] - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; CHECK-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[COPY25]], [[C25]](s32) + ; CHECK-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C25]](s32) ; CHECK-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[OR23]], [[SHL24]] - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 - ; CHECK-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[COPY26]], [[C26]](s32) + ; CHECK-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C26]](s32) ; CHECK-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[OR24]], [[SHL25]] - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 - ; CHECK-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[COPY27]], [[C27]](s32) + ; CHECK-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C27]](s32) ; CHECK-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[OR25]], [[SHL26]] - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[COPY28]], [[C28]](s32) + ; CHECK-NEXT: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C28]](s32) ; CHECK-NEXT: [[OR27:%[0-9]+]]:_(s32) = G_OR [[OR26]], [[SHL27]] - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 - ; CHECK-NEXT: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[COPY29]], [[C29]](s32) + ; CHECK-NEXT: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C29]](s32) ; CHECK-NEXT: [[OR28:%[0-9]+]]:_(s32) = G_OR [[OR27]], [[SHL28]] - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; CHECK-NEXT: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[COPY30]], [[C30]](s32) + ; CHECK-NEXT: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C30]](s32) ; CHECK-NEXT: [[OR29:%[0-9]+]]:_(s32) = G_OR [[OR28]], [[SHL29]] - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[COPY31]], [[C31]](s32) + ; CHECK-NEXT: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C31]](s32) ; CHECK-NEXT: [[OR30:%[0-9]+]]:_(s32) = G_OR [[OR29]], [[SHL30]] - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL31:%[0-9]+]]:_(s32) = G_SHL [[COPY33]], [[C1]](s32) - ; CHECK-NEXT: [[OR31:%[0-9]+]]:_(s32) = G_OR [[COPY32]], [[SHL31]] - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL32:%[0-9]+]]:_(s32) = G_SHL [[COPY34]], [[C2]](s32) + ; CHECK-NEXT: [[SHL31:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR31:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL31]] + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL32:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s32) ; CHECK-NEXT: [[OR32:%[0-9]+]]:_(s32) = G_OR [[OR31]], [[SHL32]] - ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL33:%[0-9]+]]:_(s32) = G_SHL [[COPY35]], [[C3]](s32) + ; CHECK-NEXT: [[SHL33:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C3]](s32) ; CHECK-NEXT: [[OR33:%[0-9]+]]:_(s32) = G_OR [[OR32]], [[SHL33]] - ; CHECK-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL34:%[0-9]+]]:_(s32) = G_SHL [[COPY36]], [[C4]](s32) + ; CHECK-NEXT: [[SHL34:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C4]](s32) ; CHECK-NEXT: [[OR34:%[0-9]+]]:_(s32) = G_OR [[OR33]], [[SHL34]] - ; CHECK-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL35:%[0-9]+]]:_(s32) = G_SHL [[COPY37]], [[C5]](s32) + ; CHECK-NEXT: [[SHL35:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C5]](s32) ; CHECK-NEXT: [[OR35:%[0-9]+]]:_(s32) = G_OR [[OR34]], [[SHL35]] - ; CHECK-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL36:%[0-9]+]]:_(s32) = G_SHL [[COPY38]], [[C6]](s32) + ; CHECK-NEXT: [[SHL36:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C6]](s32) ; CHECK-NEXT: [[OR36:%[0-9]+]]:_(s32) = G_OR [[OR35]], [[SHL36]] - ; CHECK-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL37:%[0-9]+]]:_(s32) = G_SHL [[COPY39]], [[C7]](s32) + ; CHECK-NEXT: [[SHL37:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C7]](s32) ; CHECK-NEXT: [[OR37:%[0-9]+]]:_(s32) = G_OR [[OR36]], [[SHL37]] - ; CHECK-NEXT: [[COPY40:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL38:%[0-9]+]]:_(s32) = G_SHL [[COPY40]], [[C8]](s32) + ; CHECK-NEXT: [[SHL38:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C8]](s32) ; CHECK-NEXT: [[OR38:%[0-9]+]]:_(s32) = G_OR [[OR37]], [[SHL38]] - ; CHECK-NEXT: [[COPY41:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL39:%[0-9]+]]:_(s32) = G_SHL [[COPY41]], [[C9]](s32) + ; CHECK-NEXT: [[SHL39:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C9]](s32) ; CHECK-NEXT: [[OR39:%[0-9]+]]:_(s32) = G_OR [[OR38]], [[SHL39]] - ; CHECK-NEXT: [[COPY42:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL40:%[0-9]+]]:_(s32) = G_SHL [[COPY42]], [[C10]](s32) + ; CHECK-NEXT: [[SHL40:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C10]](s32) ; CHECK-NEXT: [[OR40:%[0-9]+]]:_(s32) = G_OR [[OR39]], [[SHL40]] - ; CHECK-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL41:%[0-9]+]]:_(s32) = G_SHL [[COPY43]], [[C11]](s32) + ; CHECK-NEXT: [[SHL41:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C11]](s32) ; CHECK-NEXT: [[OR41:%[0-9]+]]:_(s32) = G_OR [[OR40]], [[SHL41]] - ; CHECK-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL42:%[0-9]+]]:_(s32) = G_SHL [[COPY44]], [[C12]](s32) + ; CHECK-NEXT: [[SHL42:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C12]](s32) ; CHECK-NEXT: [[OR42:%[0-9]+]]:_(s32) = G_OR [[OR41]], [[SHL42]] - ; CHECK-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL43:%[0-9]+]]:_(s32) = G_SHL [[COPY45]], [[C13]](s32) + ; CHECK-NEXT: [[SHL43:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C13]](s32) ; CHECK-NEXT: [[OR43:%[0-9]+]]:_(s32) = G_OR [[OR42]], [[SHL43]] - ; CHECK-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL44:%[0-9]+]]:_(s32) = G_SHL [[COPY46]], [[C14]](s32) + ; CHECK-NEXT: [[SHL44:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C14]](s32) ; CHECK-NEXT: [[OR44:%[0-9]+]]:_(s32) = G_OR [[OR43]], [[SHL44]] - ; CHECK-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL45:%[0-9]+]]:_(s32) = G_SHL [[COPY47]], [[C15]](s32) + ; CHECK-NEXT: [[SHL45:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C15]](s32) ; CHECK-NEXT: [[OR45:%[0-9]+]]:_(s32) = G_OR [[OR44]], [[SHL45]] - ; CHECK-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL46:%[0-9]+]]:_(s32) = G_SHL [[COPY48]], [[C16]](s32) + ; CHECK-NEXT: [[SHL46:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C16]](s32) ; CHECK-NEXT: [[OR46:%[0-9]+]]:_(s32) = G_OR [[OR45]], [[SHL46]] - ; CHECK-NEXT: [[COPY49:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL47:%[0-9]+]]:_(s32) = G_SHL [[COPY49]], [[C17]](s32) + ; CHECK-NEXT: [[SHL47:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C17]](s32) ; CHECK-NEXT: [[OR47:%[0-9]+]]:_(s32) = G_OR [[OR46]], [[SHL47]] - ; CHECK-NEXT: [[COPY50:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL48:%[0-9]+]]:_(s32) = G_SHL [[COPY50]], [[C18]](s32) + ; CHECK-NEXT: [[SHL48:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C18]](s32) ; CHECK-NEXT: [[OR48:%[0-9]+]]:_(s32) = G_OR [[OR47]], [[SHL48]] - ; CHECK-NEXT: [[COPY51:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL49:%[0-9]+]]:_(s32) = G_SHL [[COPY51]], [[C19]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL49:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C19]](s32) ; CHECK-NEXT: [[OR49:%[0-9]+]]:_(s32) = G_OR [[OR48]], [[SHL49]] - ; CHECK-NEXT: [[COPY52:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL50:%[0-9]+]]:_(s32) = G_SHL [[COPY52]], [[C20]](s32) + ; CHECK-NEXT: [[SHL50:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C20]](s32) ; CHECK-NEXT: [[OR50:%[0-9]+]]:_(s32) = G_OR [[OR49]], [[SHL50]] - ; CHECK-NEXT: [[COPY53:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL51:%[0-9]+]]:_(s32) = G_SHL [[COPY53]], [[C21]](s32) + ; CHECK-NEXT: [[SHL51:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C21]](s32) ; CHECK-NEXT: [[OR51:%[0-9]+]]:_(s32) = G_OR [[OR50]], [[SHL51]] - ; CHECK-NEXT: [[COPY54:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL52:%[0-9]+]]:_(s32) = G_SHL [[COPY54]], [[C22]](s32) + ; CHECK-NEXT: [[SHL52:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C22]](s32) ; CHECK-NEXT: [[OR52:%[0-9]+]]:_(s32) = G_OR [[OR51]], [[SHL52]] - ; CHECK-NEXT: [[COPY55:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL53:%[0-9]+]]:_(s32) = G_SHL [[COPY55]], [[C23]](s32) + ; CHECK-NEXT: [[SHL53:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C23]](s32) ; CHECK-NEXT: [[OR53:%[0-9]+]]:_(s32) = G_OR [[OR52]], [[SHL53]] - ; CHECK-NEXT: [[COPY56:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL54:%[0-9]+]]:_(s32) = G_SHL [[COPY56]], [[C24]](s32) + ; CHECK-NEXT: [[SHL54:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C24]](s32) ; CHECK-NEXT: [[OR54:%[0-9]+]]:_(s32) = G_OR [[OR53]], [[SHL54]] - ; CHECK-NEXT: [[COPY57:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL55:%[0-9]+]]:_(s32) = G_SHL [[COPY57]], [[C25]](s32) + ; CHECK-NEXT: [[SHL55:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C25]](s32) ; CHECK-NEXT: [[OR55:%[0-9]+]]:_(s32) = G_OR [[OR54]], [[SHL55]] - ; CHECK-NEXT: [[COPY58:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL56:%[0-9]+]]:_(s32) = G_SHL [[COPY58]], [[C26]](s32) + ; CHECK-NEXT: [[SHL56:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C26]](s32) ; CHECK-NEXT: [[OR56:%[0-9]+]]:_(s32) = G_OR [[OR55]], [[SHL56]] - ; CHECK-NEXT: [[COPY59:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL57:%[0-9]+]]:_(s32) = G_SHL [[COPY59]], [[C27]](s32) + ; CHECK-NEXT: [[SHL57:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C27]](s32) ; CHECK-NEXT: [[OR57:%[0-9]+]]:_(s32) = G_OR [[OR56]], [[SHL57]] - ; CHECK-NEXT: [[COPY60:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL58:%[0-9]+]]:_(s32) = G_SHL [[COPY60]], [[C28]](s32) + ; CHECK-NEXT: [[SHL58:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C28]](s32) ; CHECK-NEXT: [[OR58:%[0-9]+]]:_(s32) = G_OR [[OR57]], [[SHL58]] - ; CHECK-NEXT: [[COPY61:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL59:%[0-9]+]]:_(s32) = G_SHL [[COPY61]], [[C29]](s32) + ; CHECK-NEXT: [[SHL59:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C29]](s32) ; CHECK-NEXT: [[OR59:%[0-9]+]]:_(s32) = G_OR [[OR58]], [[SHL59]] - ; CHECK-NEXT: [[COPY62:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL60:%[0-9]+]]:_(s32) = G_SHL [[COPY62]], [[C30]](s32) + ; CHECK-NEXT: [[SHL60:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C30]](s32) ; CHECK-NEXT: [[OR60:%[0-9]+]]:_(s32) = G_OR [[OR59]], [[SHL60]] - ; CHECK-NEXT: [[COPY63:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL61:%[0-9]+]]:_(s32) = G_SHL [[COPY63]], [[C31]](s32) + ; CHECK-NEXT: [[SHL61:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C31]](s32) ; CHECK-NEXT: [[OR61:%[0-9]+]]:_(s32) = G_OR [[OR60]], [[SHL61]] - ; CHECK-NEXT: [[COPY64:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY65:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL62:%[0-9]+]]:_(s32) = G_SHL [[COPY65]], [[C1]](s32) - ; CHECK-NEXT: [[OR62:%[0-9]+]]:_(s32) = G_OR [[COPY64]], [[SHL62]] - ; CHECK-NEXT: [[COPY66:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL63:%[0-9]+]]:_(s32) = G_SHL [[COPY66]], [[C2]](s32) + ; CHECK-NEXT: [[SHL62:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR62:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL62]] + ; CHECK-NEXT: [[SHL63:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C2]](s32) ; CHECK-NEXT: [[OR63:%[0-9]+]]:_(s32) = G_OR [[OR62]], [[SHL63]] - ; CHECK-NEXT: [[COPY67:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL64:%[0-9]+]]:_(s32) = G_SHL [[COPY67]], [[C3]](s32) + ; CHECK-NEXT: [[SHL64:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C3]](s32) ; CHECK-NEXT: [[OR64:%[0-9]+]]:_(s32) = G_OR [[OR63]], [[SHL64]] - ; CHECK-NEXT: [[COPY68:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL65:%[0-9]+]]:_(s32) = G_SHL [[COPY68]], [[C4]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL65:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C4]](s32) ; CHECK-NEXT: [[OR65:%[0-9]+]]:_(s32) = G_OR [[OR64]], [[SHL65]] - ; CHECK-NEXT: [[COPY69:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL66:%[0-9]+]]:_(s32) = G_SHL [[COPY69]], [[C5]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL66:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C5]](s32) ; CHECK-NEXT: [[OR66:%[0-9]+]]:_(s32) = G_OR [[OR65]], [[SHL66]] - ; CHECK-NEXT: [[COPY70:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL67:%[0-9]+]]:_(s32) = G_SHL [[COPY70]], [[C6]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL67:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C6]](s32) ; CHECK-NEXT: [[OR67:%[0-9]+]]:_(s32) = G_OR [[OR66]], [[SHL67]] - ; CHECK-NEXT: [[COPY71:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL68:%[0-9]+]]:_(s32) = G_SHL [[COPY71]], [[C7]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL68:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C7]](s32) ; CHECK-NEXT: [[OR68:%[0-9]+]]:_(s32) = G_OR [[OR67]], [[SHL68]] - ; CHECK-NEXT: [[COPY72:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL69:%[0-9]+]]:_(s32) = G_SHL [[COPY72]], [[C8]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL69:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C8]](s32) ; CHECK-NEXT: [[OR69:%[0-9]+]]:_(s32) = G_OR [[OR68]], [[SHL69]] - ; CHECK-NEXT: [[COPY73:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL70:%[0-9]+]]:_(s32) = G_SHL [[COPY73]], [[C9]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL70:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C9]](s32) ; CHECK-NEXT: [[OR70:%[0-9]+]]:_(s32) = G_OR [[OR69]], [[SHL70]] - ; CHECK-NEXT: [[COPY74:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL71:%[0-9]+]]:_(s32) = G_SHL [[COPY74]], [[C10]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL71:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C10]](s32) ; CHECK-NEXT: [[OR71:%[0-9]+]]:_(s32) = G_OR [[OR70]], [[SHL71]] - ; CHECK-NEXT: [[COPY75:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL72:%[0-9]+]]:_(s32) = G_SHL [[COPY75]], [[C11]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL72:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C11]](s32) ; CHECK-NEXT: [[OR72:%[0-9]+]]:_(s32) = G_OR [[OR71]], [[SHL72]] - ; CHECK-NEXT: [[COPY76:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL73:%[0-9]+]]:_(s32) = G_SHL [[COPY76]], [[C12]](s32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL73:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C12]](s32) ; CHECK-NEXT: [[OR73:%[0-9]+]]:_(s32) = G_OR [[OR72]], [[SHL73]] - ; CHECK-NEXT: [[COPY77:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL74:%[0-9]+]]:_(s32) = G_SHL [[COPY77]], [[C13]](s32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL74:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C13]](s32) ; CHECK-NEXT: [[OR74:%[0-9]+]]:_(s32) = G_OR [[OR73]], [[SHL74]] - ; CHECK-NEXT: [[COPY78:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL75:%[0-9]+]]:_(s32) = G_SHL [[COPY78]], [[C14]](s32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL75:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[C14]](s32) ; CHECK-NEXT: [[OR75:%[0-9]+]]:_(s32) = G_OR [[OR74]], [[SHL75]] - ; CHECK-NEXT: [[COPY79:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL76:%[0-9]+]]:_(s32) = G_SHL [[COPY79]], [[C15]](s32) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL76:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C15]](s32) ; CHECK-NEXT: [[OR76:%[0-9]+]]:_(s32) = G_OR [[OR75]], [[SHL76]] - ; CHECK-NEXT: [[COPY80:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL77:%[0-9]+]]:_(s32) = G_SHL [[COPY80]], [[C16]](s32) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL77:%[0-9]+]]:_(s32) = G_SHL [[COPY14]], [[C16]](s32) ; CHECK-NEXT: [[OR77:%[0-9]+]]:_(s32) = G_OR [[OR76]], [[SHL77]] - ; CHECK-NEXT: [[COPY81:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL78:%[0-9]+]]:_(s32) = G_SHL [[COPY81]], [[C17]](s32) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL78:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C17]](s32) ; CHECK-NEXT: [[OR78:%[0-9]+]]:_(s32) = G_OR [[OR77]], [[SHL78]] - ; CHECK-NEXT: [[COPY82:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL79:%[0-9]+]]:_(s32) = G_SHL [[COPY82]], [[C18]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL79:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C18]](s32) ; CHECK-NEXT: [[OR79:%[0-9]+]]:_(s32) = G_OR [[OR78]], [[SHL79]] - ; CHECK-NEXT: [[COPY83:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL80:%[0-9]+]]:_(s32) = G_SHL [[COPY83]], [[C19]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL80:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C19]](s32) ; CHECK-NEXT: [[OR80:%[0-9]+]]:_(s32) = G_OR [[OR79]], [[SHL80]] - ; CHECK-NEXT: [[COPY84:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL81:%[0-9]+]]:_(s32) = G_SHL [[COPY84]], [[C20]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL81:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C20]](s32) ; CHECK-NEXT: [[OR81:%[0-9]+]]:_(s32) = G_OR [[OR80]], [[SHL81]] - ; CHECK-NEXT: [[COPY85:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL82:%[0-9]+]]:_(s32) = G_SHL [[COPY85]], [[C21]](s32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL82:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C21]](s32) ; CHECK-NEXT: [[OR82:%[0-9]+]]:_(s32) = G_OR [[OR81]], [[SHL82]] - ; CHECK-NEXT: [[COPY86:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL83:%[0-9]+]]:_(s32) = G_SHL [[COPY86]], [[C22]](s32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL83:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C22]](s32) ; CHECK-NEXT: [[OR83:%[0-9]+]]:_(s32) = G_OR [[OR82]], [[SHL83]] - ; CHECK-NEXT: [[COPY87:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL84:%[0-9]+]]:_(s32) = G_SHL [[COPY87]], [[C23]](s32) + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL84:%[0-9]+]]:_(s32) = G_SHL [[COPY21]], [[C23]](s32) ; CHECK-NEXT: [[OR84:%[0-9]+]]:_(s32) = G_OR [[OR83]], [[SHL84]] - ; CHECK-NEXT: [[COPY88:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL85:%[0-9]+]]:_(s32) = G_SHL [[COPY88]], [[C24]](s32) + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL85:%[0-9]+]]:_(s32) = G_SHL [[COPY22]], [[C24]](s32) ; CHECK-NEXT: [[OR85:%[0-9]+]]:_(s32) = G_OR [[OR84]], [[SHL85]] - ; CHECK-NEXT: [[COPY89:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL86:%[0-9]+]]:_(s32) = G_SHL [[COPY89]], [[C25]](s32) + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL86:%[0-9]+]]:_(s32) = G_SHL [[COPY23]], [[C25]](s32) ; CHECK-NEXT: [[OR86:%[0-9]+]]:_(s32) = G_OR [[OR85]], [[SHL86]] - ; CHECK-NEXT: [[COPY90:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL87:%[0-9]+]]:_(s32) = G_SHL [[COPY90]], [[C26]](s32) + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL87:%[0-9]+]]:_(s32) = G_SHL [[COPY24]], [[C26]](s32) ; CHECK-NEXT: [[OR87:%[0-9]+]]:_(s32) = G_OR [[OR86]], [[SHL87]] - ; CHECK-NEXT: [[COPY91:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL88:%[0-9]+]]:_(s32) = G_SHL [[COPY91]], [[C27]](s32) + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL88:%[0-9]+]]:_(s32) = G_SHL [[COPY25]], [[C27]](s32) ; CHECK-NEXT: [[OR88:%[0-9]+]]:_(s32) = G_OR [[OR87]], [[SHL88]] - ; CHECK-NEXT: [[COPY92:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL89:%[0-9]+]]:_(s32) = G_SHL [[COPY92]], [[C28]](s32) + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL89:%[0-9]+]]:_(s32) = G_SHL [[COPY26]], [[C28]](s32) ; CHECK-NEXT: [[OR89:%[0-9]+]]:_(s32) = G_OR [[OR88]], [[SHL89]] - ; CHECK-NEXT: [[COPY93:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL90:%[0-9]+]]:_(s32) = G_SHL [[COPY93]], [[C29]](s32) + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL90:%[0-9]+]]:_(s32) = G_SHL [[COPY27]], [[C29]](s32) ; CHECK-NEXT: [[OR90:%[0-9]+]]:_(s32) = G_OR [[OR89]], [[SHL90]] - ; CHECK-NEXT: [[COPY94:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL91:%[0-9]+]]:_(s32) = G_SHL [[COPY94]], [[C30]](s32) + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL91:%[0-9]+]]:_(s32) = G_SHL [[COPY28]], [[C30]](s32) ; CHECK-NEXT: [[OR91:%[0-9]+]]:_(s32) = G_OR [[OR90]], [[SHL91]] - ; CHECK-NEXT: [[COPY95:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL92:%[0-9]+]]:_(s32) = G_SHL [[COPY95]], [[C31]](s32) + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL92:%[0-9]+]]:_(s32) = G_SHL [[COPY29]], [[C31]](s32) ; CHECK-NEXT: [[OR92:%[0-9]+]]:_(s32) = G_OR [[OR91]], [[SHL92]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR30]](s32), [[OR61]](s32), [[OR92]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s68) = G_TRUNC [[MV]](s96) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir index 3174495e6bf0af..483698e01417b4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -472,10 +472,9 @@ body: | ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) @@ -534,10 +533,9 @@ body: | ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -546,10 +544,9 @@ body: | ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL1]] ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) @@ -564,20 +561,17 @@ body: | ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL5]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL5]] ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[UV13]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index 7b439e501854cf..00612d552a1048 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -179,10 +179,9 @@ body: | ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C7]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C7]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C7]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL4]] ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV2]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) @@ -1373,8 +1372,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[TRUNC1]](s16), %bb.1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 @@ -1430,8 +1429,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[TRUNC1]](s16), %bb.1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 @@ -1654,9 +1653,7 @@ body: | ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND2]](s32), [[AND3]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LSHR]](s32), [[LSHR1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]] ; CHECK-NEXT: G_BRCOND [[ICMP2]](s1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 @@ -1670,12 +1667,10 @@ body: | ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND4]](s32), [[AND5]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND6]](s32), [[AND7]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND2]](s32), [[AND3]] + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LSHR2]](s32), [[LSHR3]] ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: @@ -1684,9 +1679,9 @@ body: | ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s1) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C5]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C5]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C5]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C5]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir index 227d4645ea6478..80d1aa4d8889d8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir @@ -12,8 +12,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[AND]](s64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRMASK]](p1) @@ -94,8 +94,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[AND]](s64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRMASK]](p0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir index e5017d0b615d78..07aca9f80be58d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir @@ -21,36 +21,33 @@ body: | ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY2]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]] + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C5]](s32) ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[AND3]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] ; GFX6-NEXT: $sgpr0 = COPY [[OR]](s32) ; @@ -62,41 +59,38 @@ body: | ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C]](s32) ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[C]] ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C]] ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT1]](s32) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND1]](s16) ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C5]], [[COPY3]] + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C5]], [[COPY2]] ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C6]], [[C4]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[AND2]](s16) - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[SUB4]], [[C4]] - ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C4]] - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[AND4]](s16) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND2]], [[C6]](s16) + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[SUB4]], [[C4]] + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[LSHR]], [[AND3]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] @@ -214,36 +208,33 @@ body: | ; GFX-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 ; GFX-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 ; GFX-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32) + ; GFX-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) ; GFX-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; GFX-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY2]] + ; GFX-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] ; GFX-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]] + ; GFX-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] ; GFX-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]] - ; GFX-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]] + ; GFX-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] + ; GFX-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] ; GFX-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]] - ; GFX-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]] + ; GFX-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] + ; GFX-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] ; GFX-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; GFX-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) ; GFX-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; GFX-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) + ; GFX-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C5]](s32) ; GFX-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; GFX-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; GFX-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[AND3]](s32) ; GFX-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] ; GFX-NEXT: $sgpr0 = COPY [[OR]](s32) %0:_(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir index 09aaf8d548758a..dba20e128237cd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir @@ -256,17 +256,16 @@ body: | ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL2]] ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32), [[AND8]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir index a175a87f239e35..11018723d979a6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir @@ -31,6 +31,7 @@ body: | ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX8-LABEL: name: saddsat_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -54,6 +55,7 @@ body: | ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[ADD]], [[C]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: saddsat_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -103,6 +105,7 @@ body: | ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX8-LABEL: name: saddsat_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -126,6 +129,7 @@ body: | ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[ADD]], [[C]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: saddsat_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -187,8 +191,8 @@ body: | ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C1]](s32) - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -198,6 +202,7 @@ body: | ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: saddsat_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -241,6 +246,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: saddsat_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -311,6 +317,7 @@ body: | ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX8-LABEL: name: saddsat_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -330,6 +337,7 @@ body: | ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: saddsat_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -395,6 +403,7 @@ body: | ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: saddsat_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -432,6 +441,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: saddsat_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -512,13 +522,13 @@ body: | ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL8]] ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX8-LABEL: name: saddsat_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -577,13 +587,13 @@ body: | ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: saddsat_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -709,6 +719,7 @@ body: | ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: saddsat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -776,6 +787,7 @@ body: | ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: saddsat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -815,6 +827,7 @@ body: | ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[SMIN1]] ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; ; GFX8-LABEL: name: saddsat_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -831,6 +844,7 @@ body: | ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[SMIN1]] ; GFX8-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; ; GFX9-LABEL: name: saddsat_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -876,6 +890,7 @@ body: | ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[SMIN3]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: saddsat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -902,6 +917,7 @@ body: | ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[SMIN3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: saddsat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -949,6 +965,7 @@ body: | ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX8-LABEL: name: saddsat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -973,6 +990,7 @@ body: | ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX9-LABEL: name: saddsat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1051,6 +1069,7 @@ body: | ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: saddsat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -1093,6 +1112,7 @@ body: | ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: saddsat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir index 674f23e2f411ce..93a7e837cdd9b8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -417,10 +417,9 @@ body: | ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) @@ -1362,10 +1361,9 @@ body: | ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) @@ -1401,8 +1399,8 @@ body: | ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP1]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP2]](s1) ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C1]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir index d5f992a42b07ad..40c48e10f933ff 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir @@ -17,12 +17,14 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1 ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX8-LABEL: name: test_sext_inreg_s32_1 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1 ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX6-LABEL: name: test_sext_inreg_s32_1 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} @@ -46,12 +48,14 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 2 ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX8-LABEL: name: test_sext_inreg_s32_2 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 2 ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX6-LABEL: name: test_sext_inreg_s32_2 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} @@ -75,12 +79,14 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX8-LABEL: name: test_sext_inreg_s32_8 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX6-LABEL: name: test_sext_inreg_s32_8 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} @@ -104,12 +110,14 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX8-LABEL: name: test_sext_inreg_s32_16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX6-LABEL: name: test_sext_inreg_s32_16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} @@ -133,12 +141,14 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 31 ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX8-LABEL: name: test_sext_inreg_s32_31 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 31 ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX6-LABEL: name: test_sext_inreg_s32_31 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} @@ -162,12 +172,14 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 1 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX8-LABEL: name: test_sext_inreg_s64_1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 1 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX6-LABEL: name: test_sext_inreg_s64_1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -191,12 +203,14 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 2 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX8-LABEL: name: test_sext_inreg_s64_2 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 2 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX6-LABEL: name: test_sext_inreg_s64_2 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -220,12 +234,14 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 8 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX8-LABEL: name: test_sext_inreg_s64_8 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 8 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX6-LABEL: name: test_sext_inreg_s64_8 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -249,12 +265,14 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 8 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX8-LABEL: name: test_sext_inreg_s64_16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 8 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX6-LABEL: name: test_sext_inreg_s64_16 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -278,12 +296,14 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 31 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX8-LABEL: name: test_sext_inreg_s64_31 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 31 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX6-LABEL: name: test_sext_inreg_s64_31 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -307,12 +327,14 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX8-LABEL: name: test_sext_inreg_s64_32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX6-LABEL: name: test_sext_inreg_s64_32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -336,12 +358,14 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX8-LABEL: name: test_sext_inreg_s64_33 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX6-LABEL: name: test_sext_inreg_s64_33 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -365,12 +389,14 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 63 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX8-LABEL: name: test_sext_inreg_s64_63 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 63 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX6-LABEL: name: test_sext_inreg_s64_63 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -395,6 +421,7 @@ body: | ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) ; GFX9-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16) + ; ; GFX8-LABEL: name: test_sext_inreg_s16_1 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -404,6 +431,7 @@ body: | ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; ; GFX6-LABEL: name: test_sext_inreg_s16_1 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} @@ -431,6 +459,7 @@ body: | ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 15 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) ; GFX9-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16) + ; ; GFX8-LABEL: name: test_sext_inreg_s16_15 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -440,6 +469,7 @@ body: | ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; ; GFX6-LABEL: name: test_sext_inreg_s16_15 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} @@ -473,6 +503,7 @@ body: | ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; GFX8-LABEL: name: test_sext_inreg_s96_8 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -486,6 +517,7 @@ body: | ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; GFX6-LABEL: name: test_sext_inreg_s96_8 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} @@ -520,6 +552,7 @@ body: | ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX8-LABEL: name: test_sext_inreg_s128_8 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -530,6 +563,7 @@ body: | ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX6-LABEL: name: test_sext_inreg_s128_8 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} @@ -564,6 +598,7 @@ body: | ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s160) = G_TRUNC [[MV1]](s320) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[TRUNC1]](s160) + ; ; GFX8-LABEL: name: test_sext_inreg_s160_8 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; GFX8-NEXT: {{ $}} @@ -577,6 +612,7 @@ body: | ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s160) = G_TRUNC [[MV1]](s320) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[TRUNC1]](s160) + ; ; GFX6-LABEL: name: test_sext_inreg_s160_8 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; GFX6-NEXT: {{ $}} @@ -611,6 +647,7 @@ body: | ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV]](s256) + ; ; GFX8-LABEL: name: test_sext_inreg_256_8 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -621,6 +658,7 @@ body: | ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV]](s256) + ; ; GFX6-LABEL: name: test_sext_inreg_256_8 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} @@ -652,6 +690,7 @@ body: | ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](s512) + ; ; GFX8-LABEL: name: test_sext_inreg_512_8 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX8-NEXT: {{ $}} @@ -662,6 +701,7 @@ body: | ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](s512) + ; ; GFX6-LABEL: name: test_sext_inreg_512_8 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX6-NEXT: {{ $}} @@ -693,6 +733,7 @@ body: | ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[MV]](s1024) + ; ; GFX8-LABEL: name: test_sext_inreg_1024_8 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; GFX8-NEXT: {{ $}} @@ -703,6 +744,7 @@ body: | ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[MV]](s1024) + ; ; GFX6-LABEL: name: test_sext_inreg_1024_8 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; GFX6-NEXT: {{ $}} @@ -733,6 +775,7 @@ body: | ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_sext_inreg_v2s32_1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -742,6 +785,7 @@ body: | ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1 ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX6-LABEL: name: test_sext_inreg_v2s32_1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -771,6 +815,7 @@ body: | ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[BUILD_VECTOR]](<2 x s16>) ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR]](<2 x s16>) ; GFX9-NEXT: $vgpr0 = COPY [[ASHR]](<2 x s16>) + ; ; GFX8-LABEL: name: test_sext_inreg_v2s16_1 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -791,6 +836,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX6-LABEL: name: test_sext_inreg_v2s16_1 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} @@ -854,6 +900,7 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX8-LABEL: name: test_sext_inreg_v3s16_1 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -889,13 +936,13 @@ body: | ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL5]] ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX6-LABEL: name: test_sext_inreg_v3s16_1 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} @@ -924,10 +971,9 @@ body: | ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) @@ -955,6 +1001,7 @@ body: | ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 1 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX8-LABEL: name: test_sext_inreg_v3s32_1 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -965,6 +1012,7 @@ body: | ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 1 ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX6-LABEL: name: test_sext_inreg_v3s32_1 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} @@ -997,6 +1045,7 @@ body: | ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV3]], 1 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32), [[SEXT_INREG3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX8-LABEL: name: test_sext_inreg_v4s32_1 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -1008,6 +1057,7 @@ body: | ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV3]], 1 ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32), [[SEXT_INREG3]](s32) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX6-LABEL: name: test_sext_inreg_v4s32_1 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} @@ -1044,6 +1094,7 @@ body: | ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[ASHR]](<2 x s16>), [[ASHR1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: test_sext_inreg_v4s16_1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -1079,6 +1130,7 @@ body: | ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX6-LABEL: name: test_sext_inreg_v4s16_1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -1131,6 +1183,7 @@ body: | ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL2]], [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[ASHR]](<2 x s16>), [[ASHR1]](<2 x s16>), [[ASHR2]](<2 x s16>) ; GFX9-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX8-LABEL: name: test_sext_inreg_v6s16_1 ; GFX8: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) @@ -1177,6 +1230,7 @@ body: | ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX6-LABEL: name: test_sext_inreg_v6s16_1 ; GFX6: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) @@ -1241,6 +1295,7 @@ body: | ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG1]](s64), [[ASHR1]](s64) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; ; GFX8-LABEL: name: test_sext_inreg_v2s128_1 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -1258,6 +1313,7 @@ body: | ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG1]](s64), [[ASHR1]](s64) ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; ; GFX6-LABEL: name: test_sext_inreg_v2s128_1 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir index 12d1ce39c0a982..847ffc8aadc073 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir @@ -659,8 +659,8 @@ body: | ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C3]](s16) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 7 ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[C4]](s16) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C5]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C5]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir index 1b620f65d164a1..b1de5dbdff9f1a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -18,6 +18,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; ; VI-LABEL: name: test_shl_s32_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) ; VI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; ; GFX9-LABEL: name: test_shl_s32_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -51,6 +53,7 @@ body: | ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; ; VI-LABEL: name: test_shl_s64_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -59,6 +62,7 @@ body: | ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; ; GFX9-LABEL: name: test_shl_s64_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -85,6 +89,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[COPY1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; ; VI-LABEL: name: test_shl_s64_s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -92,6 +97,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[COPY1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; ; GFX9-LABEL: name: test_shl_s64_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -119,6 +125,7 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[AND]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; ; VI-LABEL: name: test_shl_s64_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -128,6 +135,7 @@ body: | ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[AND]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; ; GFX9-LABEL: name: test_shl_s64_s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -157,6 +165,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; ; VI-LABEL: name: test_shl_s16_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -167,6 +176,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_shl_s16_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -200,6 +210,7 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; ; VI-LABEL: name: test_shl_s16_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -210,6 +221,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_shl_s16_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -244,26 +256,28 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; ; VI-LABEL: name: test_shl_s16_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_shl_s16_i8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) @@ -292,25 +306,27 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; ; VI-LABEL: name: test_shl_i8_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_shl_i8_i8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND]](s16) @@ -343,6 +359,7 @@ body: | ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SHL]](s32), [[SHL1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_shl_v2s32_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -354,6 +371,7 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SHL]](s32), [[SHL1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_shl_v2s32_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -389,6 +407,7 @@ body: | ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[UV5]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SHL]](s32), [[SHL1]](s32), [[SHL2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_shl_v3s32_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -401,6 +420,7 @@ body: | ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[UV5]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SHL]](s32), [[SHL1]](s32), [[SHL2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_shl_v3s32_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -436,6 +456,7 @@ body: | ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV3]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_shl_v2s64_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -447,6 +468,7 @@ body: | ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_shl_v2s64_v2s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -484,6 +506,7 @@ body: | ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_shl_v3s64_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} @@ -498,6 +521,7 @@ body: | ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_shl_v3s64_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} @@ -540,14 +564,14 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[AND]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[AND1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[LSHR1]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_shl_v2s16_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -570,6 +594,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_shl_v2s16_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -607,6 +632,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; VI-LABEL: name: test_shl_v2s16_v2s32 ; VI: liveins: $vgpr0, $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -628,6 +654,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: test_shl_v2s16_v2s32 ; GFX9: liveins: $vgpr0, $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -673,32 +700,31 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[AND]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[AND1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[AND2]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[LSHR1]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[AND1]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C1]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL3]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C1]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]] ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C1]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL4]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL5]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_shl_v3s16_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -738,13 +764,13 @@ body: | ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL5]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_shl_v3s16_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -814,24 +840,23 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[AND]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[AND1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[AND2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[AND3]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C1]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[LSHR2]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[AND1]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[LSHR3]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C1]] + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C1]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C1]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL5]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C1]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_shl_v4s16_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -872,6 +897,7 @@ body: | ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_shl_v4s16_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -904,25 +930,27 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; ; VI-LABEL: name: test_shl_s7_s7 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_shl_s7_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND]](s16) @@ -950,6 +978,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; ; VI-LABEL: name: test_shl_i24_i32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -957,6 +986,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) ; VI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; ; GFX9-LABEL: name: test_shl_i24_i32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -983,66 +1013,68 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[COPY1]](s32) ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[SUB1]](s32) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[COPY1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C1]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_shl_s128_s128 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[COPY1]](s32) ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[SUB1]](s32) ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[COPY1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C1]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_shl_s128_s128 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[COPY1]](s32) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[SUB1]](s32) ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[COPY1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) @@ -1083,6 +1115,7 @@ body: | ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_shl_s128_s132 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1106,6 +1139,7 @@ body: | ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_shl_s128_s132 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1146,11 +1180,13 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; ; VI-LABEL: name: test_shl_s128_s32_0 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; ; GFX9-LABEL: name: test_shl_s128_s32_0 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1182,6 +1218,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_shl_s128_s32_23 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1195,6 +1232,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_shl_s128_s32_23 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1233,6 +1271,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_shl_s128_s32_31 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1246,6 +1285,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_shl_s128_s32_31 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1283,6 +1323,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_shl_s128_s32_32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1295,6 +1336,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_shl_s128_s32_32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1332,6 +1374,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_shl_s128_s32_33 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1345,6 +1388,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_shl_s128_s32_33 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1380,6 +1424,7 @@ body: | ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[C1]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](s64), [[SHL]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_shl_s128_s32_127 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1390,6 +1435,7 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[C1]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](s64), [[SHL]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_shl_s128_s32_127 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1417,33 +1463,33 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; SI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[COPY1]](s32) ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[SUB3]](s32) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[COPY1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[SUB2]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C1]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C3]] ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[SHL2]] ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV3]], [[SELECT1]] ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; SI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; SI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB1]](s32) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[SUB1]](s32) ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) @@ -1451,37 +1497,37 @@ body: | ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] + ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C3]] ; SI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; SI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[COPY1]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[COPY1]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL5]] ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C1]] + ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C3]] ; SI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL6]] ; SI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; SI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; SI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; SI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; SI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; SI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; SI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB]](s32) ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB9]](s32) ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB]](s32) ; SI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL8]] ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB8]](s32) - ; SI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C1]] + ; SI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C3]] ; SI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[SHL9]] ; SI-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV9]], [[SELECT10]] - ; SI-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C1]] - ; SI-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C1]] + ; SI-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C3]] + ; SI-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT12]](s64), [[SELECT13]](s64) ; SI-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT9]] ; SI-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] @@ -1491,38 +1537,39 @@ body: | ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) ; SI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; ; VI-LABEL: name: test_shl_s256_s256 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; VI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[COPY1]](s32) ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[SUB3]](s32) ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[COPY1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[SUB2]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C1]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C3]] ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[SHL2]] ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV3]], [[SELECT1]] ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; VI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; VI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB1]](s32) ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[SUB1]](s32) ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) @@ -1530,37 +1577,37 @@ body: | ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; VI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; VI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] + ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C3]] ; VI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; VI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[COPY1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL5]] ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C1]] + ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C3]] ; VI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL6]] ; VI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; VI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; VI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; VI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; VI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; VI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; VI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB]](s32) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB9]](s32) ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB]](s32) ; VI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL8]] ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB8]](s32) - ; VI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C1]] + ; VI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C3]] ; VI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[SHL9]] ; VI-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV9]], [[SELECT10]] - ; VI-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C1]] - ; VI-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C1]] + ; VI-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C3]] + ; VI-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT12]](s64), [[SELECT13]](s64) ; VI-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT9]] ; VI-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] @@ -1570,38 +1617,39 @@ body: | ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) ; VI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; ; GFX9-LABEL: name: test_shl_s256_s256 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[COPY1]](s32) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[SUB3]](s32) ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[COPY1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[SUB2]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C3]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[SHL2]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV3]], [[SELECT1]] ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB1]](s32) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[SUB1]](s32) ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) @@ -1609,37 +1657,37 @@ body: | ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C3]] ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[COPY1]](s32) ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[COPY1]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL5]] ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C1]] + ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C3]] ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL6]] ; GFX9-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; GFX9-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB]](s32) ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB9]](s32) ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB]](s32) ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL8]] ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB8]](s32) - ; GFX9-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C1]] + ; GFX9-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C3]] ; GFX9-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[SHL9]] ; GFX9-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV9]], [[SELECT10]] - ; GFX9-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C1]] - ; GFX9-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C1]] + ; GFX9-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C3]] + ; GFX9-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT12]](s64), [[SELECT13]](s64) ; GFX9-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT9]] ; GFX9-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] @@ -1702,6 +1750,7 @@ body: | ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT3]](s64), [[SELECT5]](s64) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; ; VI-LABEL: name: test_shl_v2s128_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -1742,6 +1791,7 @@ body: | ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT3]](s64), [[SELECT5]](s64) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; ; GFX9-LABEL: name: test_shl_v2s128_v2s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -1822,6 +1872,7 @@ body: | ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; VI-LABEL: name: test_shl_s65_s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} @@ -1850,6 +1901,7 @@ body: | ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; GFX9-LABEL: name: test_shl_s65_s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -1919,6 +1971,7 @@ body: | ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; VI-LABEL: name: test_shl_s65_s32_constant8 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -1946,6 +1999,7 @@ body: | ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; GFX9-LABEL: name: test_shl_s65_s32_constant8 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -2016,6 +2070,7 @@ body: | ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; ; VI-LABEL: name: test_shl_s65_s32_known_pow2 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} @@ -2045,6 +2100,7 @@ body: | ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; ; GFX9-LABEL: name: test_shl_s65_s32_known_pow2 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir index 402454d989d605..c8bd8ab33f18cb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir @@ -291,8 +291,7 @@ body: | ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) @@ -323,10 +322,9 @@ body: | ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -358,14 +356,13 @@ body: | ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir index 9388d31b7d3c20..a99a34f090466c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir @@ -23,6 +23,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_undef ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -62,6 +63,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_undef ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -105,6 +107,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -145,11 +148,11 @@ body: | ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -192,12 +195,12 @@ body: | ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_1_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -246,6 +249,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -285,13 +289,11 @@ body: | ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_1_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -339,6 +341,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -384,6 +387,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_undef ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -427,6 +431,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -467,11 +472,11 @@ body: | ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -514,12 +519,12 @@ body: | ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_3_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -559,12 +564,11 @@ body: | ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -600,13 +604,12 @@ body: | ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_3_undef ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -644,13 +647,11 @@ body: | ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_3_3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -698,6 +699,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -745,6 +747,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -786,12 +789,12 @@ body: | ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_3_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -835,11 +838,11 @@ body: | ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -882,12 +885,12 @@ body: | ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_1_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -931,11 +934,11 @@ body: | ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -980,17 +983,17 @@ body: | ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v4s16_v3s16_2_0 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -998,7 +1001,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -1006,9 +1009,9 @@ body: | ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -1044,12 +1047,12 @@ body: | ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v4s16_1_0 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1098,13 +1101,11 @@ body: | ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v4s16_1_3 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir index 4b1ac64b32b5a0..51fffb7551a125 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir @@ -15,6 +15,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[SMULH:%[0-9]+]]:_(s32) = G_SMULH [[COPY]], [[COPY1]] ; GFX8-NEXT: $vgpr0 = COPY [[SMULH]](s32) + ; ; GFX9-LABEL: name: test_smulh_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -45,6 +46,7 @@ body: | ; GFX8-NEXT: [[SMULH1:%[0-9]+]]:_(s32) = G_SMULH [[UV1]], [[UV3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMULH]](s32), [[SMULH1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_smulh_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -80,6 +82,7 @@ body: | ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[MUL]], [[C]](s32) ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ASHR]], 16 ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG2]](s32) + ; ; GFX9-LABEL: name: test_smulh_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -124,6 +127,7 @@ body: | ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR2]](s16) ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 8 ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX9-LABEL: name: test_smulh_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -181,6 +185,7 @@ body: | ; GFX8-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG4]](s32), [[SEXT_INREG5]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_smulh_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -246,6 +251,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_smulh_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -349,8 +355,8 @@ body: | ; GFX8-NEXT: [[ASHR10:%[0-9]+]]:_(s16) = G_ASHR [[SHL7]], [[C3]](s16) ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s16) = G_MUL [[ASHR9]], [[ASHR10]] ; GFX8-NEXT: [[ASHR11:%[0-9]+]]:_(s16) = G_ASHR [[MUL3]], [[C3]](s16) - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR2]](s16) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C4]] ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR5]](s16) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C4]] @@ -365,6 +371,7 @@ body: | ; GFX8-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL10]] ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_smulh_v4s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulo.mir index 9ba2ebe8c4b4d9..cd75462271a36b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulo.mir @@ -21,6 +21,7 @@ body: | ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[MUL]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_smulo_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -72,6 +73,7 @@ body: | ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; ; GFX9-LABEL: name: test_smulo_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -124,6 +126,7 @@ body: | ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG3]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_smulo_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -169,6 +172,7 @@ body: | ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG3]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_smulo_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -235,6 +239,7 @@ body: | ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG8]](s32), [[SEXT_INREG9]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_smulo_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -296,8 +301,8 @@ body: | ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG3]], [[SEXT_INREG4]] ; GFX8-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL1]], 8 ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[SEXT_INREG5]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -310,6 +315,7 @@ body: | ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) ; GFX8-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; ; GFX9-LABEL: name: test_smulo_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -327,8 +333,8 @@ body: | ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG3]], [[SEXT_INREG4]] ; GFX9-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL1]], 8 ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[SEXT_INREG5]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -411,6 +417,7 @@ body: | ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_smulo_v4s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -492,6 +499,7 @@ body: | ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[OR]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG3]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_smulo_s24 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir index 43496a8aec8c4a..5bc314d29c97fe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir @@ -31,14 +31,15 @@ body: | ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[ASHR1]](s32) + ; ; GFX8-LABEL: name: sshlsat_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) @@ -54,14 +55,15 @@ body: | ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: sshlsat_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) @@ -112,14 +114,15 @@ body: | ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[ASHR1]](s32) + ; ; GFX8-LABEL: name: sshlsat_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) @@ -135,14 +138,15 @@ body: | ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: sshlsat_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) @@ -204,8 +208,8 @@ body: | ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C1]](s32) - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C6]] ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C2]] @@ -214,6 +218,7 @@ body: | ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: sshlsat_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -223,8 +228,8 @@ body: | ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) @@ -255,6 +260,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: sshlsat_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -264,8 +270,8 @@ body: | ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) @@ -334,6 +340,7 @@ body: | ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[ASHR1]](s32) + ; ; GFX8-LABEL: name: sshlsat_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -352,6 +359,7 @@ body: | ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: sshlsat_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -408,21 +416,21 @@ body: | ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[LSHR1]](s32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[LSHR1]](s32) ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL4]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: sshlsat_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -458,6 +466,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: sshlsat_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -513,34 +522,32 @@ body: | ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[LSHR1]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[LSHR1]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C3]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND]](s32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C3]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) - ; GFX6-NEXT: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[AND2]](s32) - ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL4]](s32), [[C4]] - ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[LSHR2]](s32) + ; GFX6-NEXT: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[LSHR2]](s32) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL4]](s32), [[C3]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[ASHR4]] ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]] ; GFX6-NEXT: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) @@ -549,23 +556,23 @@ body: | ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C4]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL6]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C1]] - ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C4]] + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL7]] ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL8]] ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX8-LABEL: name: sshlsat_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -621,13 +628,13 @@ body: | ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL5]] ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: sshlsat_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -723,45 +730,44 @@ body: | ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[LSHR2]](s32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[LSHR2]](s32) ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) - ; GFX6-NEXT: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[AND2]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND1]](s32) + ; GFX6-NEXT: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[AND1]](s32) ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL4]](s32), [[C4]] ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C2]], [[C3]] ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[ASHR4]] ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]] ; GFX6-NEXT: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[AND3]](s32) - ; GFX6-NEXT: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[AND3]](s32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[LSHR3]](s32) + ; GFX6-NEXT: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[LSHR3]](s32) ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL6]](s32), [[C4]] ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[C2]], [[C3]] ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL6]](s32), [[ASHR6]] ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s1), [[SELECT6]], [[SHL7]] ; GFX6-NEXT: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[SELECT7]], [[C]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C1]] - ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ASHR7]], [[C1]] - ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL9]] + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C1]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR7]], [[C1]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: sshlsat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -825,6 +831,7 @@ body: | ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: sshlsat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -907,6 +914,7 @@ body: | ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] ; GFX6-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX8-LABEL: name: sshlsat_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -922,6 +930,7 @@ body: | ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX9-LABEL: name: sshlsat_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -973,6 +982,7 @@ body: | ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: sshlsat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -997,6 +1007,7 @@ body: | ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: sshlsat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1049,6 +1060,7 @@ body: | ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) + ; ; GFX8-LABEL: name: sshlsat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -1065,6 +1077,7 @@ body: | ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) + ; ; GFX9-LABEL: name: sshlsat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1119,6 +1132,7 @@ body: | ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: sshlsat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -1145,6 +1159,7 @@ body: | ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: sshlsat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir index 896d057fd74f67..57b1ab9b194ec5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir @@ -256,17 +256,16 @@ body: | ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL2]] ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32), [[AND8]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir index 7d6b8c5f621902..ba5d2908094875 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir @@ -31,6 +31,7 @@ body: | ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX8-LABEL: name: ssubsat_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -54,6 +55,7 @@ body: | ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SUB2]], [[C]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: ssubsat_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -103,6 +105,7 @@ body: | ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX8-LABEL: name: ssubsat_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -126,6 +129,7 @@ body: | ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SUB2]], [[C]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: ssubsat_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -187,8 +191,8 @@ body: | ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C1]](s32) - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -198,6 +202,7 @@ body: | ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: ssubsat_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -241,6 +246,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: ssubsat_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -311,6 +317,7 @@ body: | ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX8-LABEL: name: ssubsat_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -330,6 +337,7 @@ body: | ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB2]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: ssubsat_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -395,6 +403,7 @@ body: | ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: ssubsat_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -432,6 +441,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: ssubsat_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -512,13 +522,13 @@ body: | ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL8]] ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX8-LABEL: name: ssubsat_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -577,13 +587,13 @@ body: | ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: ssubsat_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -709,6 +719,7 @@ body: | ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: ssubsat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -776,6 +787,7 @@ body: | ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: ssubsat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -815,6 +827,7 @@ body: | ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[SMIN1]] ; GFX6-NEXT: $vgpr0 = COPY [[SUB2]](s32) + ; ; GFX8-LABEL: name: ssubsat_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -831,6 +844,7 @@ body: | ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[SMIN1]] ; GFX8-NEXT: $vgpr0 = COPY [[SUB2]](s32) + ; ; GFX9-LABEL: name: ssubsat_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -876,6 +890,7 @@ body: | ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[SMIN3]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB2]](s32), [[SUB5]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: ssubsat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -902,6 +917,7 @@ body: | ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[SMIN3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB2]](s32), [[SUB5]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: ssubsat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -949,6 +965,7 @@ body: | ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX8-LABEL: name: ssubsat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -973,6 +990,7 @@ body: | ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX9-LABEL: name: ssubsat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1051,6 +1069,7 @@ body: | ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: ssubsat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -1093,6 +1112,7 @@ body: | ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: ssubsat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir index fb1e0537556fa3..c2fdbff77868f0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -21,6 +21,7 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] ; SI-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; CI-LABEL: name: test_store_global_s1_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} @@ -30,6 +31,7 @@ body: | ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] ; CI-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; VI-LABEL: name: test_store_global_s1_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -39,6 +41,7 @@ body: | ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] ; VI-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s1_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -68,6 +71,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; CI-LABEL: name: test_store_global_s7_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} @@ -76,6 +80,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; VI-LABEL: name: test_store_global_s7_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -84,6 +89,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s7_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -110,18 +116,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; CI-LABEL: name: test_store_global_s8_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; VI-LABEL: name: test_store_global_s8_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s8_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -153,12 +162,14 @@ body: | ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s16_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s16_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -172,6 +183,7 @@ body: | ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s16_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -196,18 +208,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; ; CI-LABEL: name: test_store_global_s16_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; ; VI-LABEL: name: test_store_global_s16_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s16_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -232,18 +247,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -274,6 +292,7 @@ body: | ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s24_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} @@ -286,6 +305,7 @@ body: | ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s24_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -298,6 +318,7 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s24_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -334,6 +355,7 @@ body: | ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s24_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} @@ -346,6 +368,7 @@ body: | ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s24_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -358,6 +381,7 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s24_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -401,6 +425,7 @@ body: | ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s24_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} @@ -413,6 +438,7 @@ body: | ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s24_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -432,6 +458,7 @@ body: | ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s24_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -464,6 +491,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; ; CI-LABEL: name: test_store_global_s25_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} @@ -472,6 +500,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; ; VI-LABEL: name: test_store_global_s25_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -480,6 +509,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s25_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -543,17 +573,18 @@ body: | ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -578,6 +609,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -607,12 +639,14 @@ body: | ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -625,6 +659,7 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -648,18 +683,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; ; CI-LABEL: name: test_store_global_s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; ; VI-LABEL: name: test_store_global_s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -697,17 +735,18 @@ body: | ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p3_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p3_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -733,6 +772,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p3_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -763,12 +803,14 @@ body: | ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p3_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p3_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -782,6 +824,7 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p3_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -805,18 +848,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1) + ; ; CI-LABEL: name: test_store_global_p3_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1) + ; ; VI-LABEL: name: test_store_global_p3_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; VI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p3_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -858,18 +904,18 @@ body: | ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C5]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C5]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) ; SI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s48_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} @@ -883,6 +929,7 @@ body: | ; CI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; CI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s48_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -919,6 +966,7 @@ body: | ; VI-NEXT: G_STORE [[TRUNC3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s48_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -963,6 +1011,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; SI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s48_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} @@ -976,6 +1025,7 @@ body: | ; CI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; CI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s48_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -995,6 +1045,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; VI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s48_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1070,8 +1121,7 @@ body: | ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY4]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -1080,23 +1130,24 @@ body: | ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY6]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[COPY7]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s64_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s64_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -1143,6 +1194,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s64_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1184,12 +1236,14 @@ body: | ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s64_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s64_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -1214,6 +1268,7 @@ body: | ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s64_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1237,18 +1292,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s64_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s64_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s64_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1272,18 +1330,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) + ; ; CI-LABEL: name: test_store_global_s64_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) + ; ; VI-LABEL: name: test_store_global_s64_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s64_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1307,18 +1368,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s64_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s64_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s64_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1362,8 +1426,7 @@ body: | ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY4]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -1372,23 +1435,24 @@ body: | ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY6]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[COPY7]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p0_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p0_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -1436,6 +1500,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p0_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1478,12 +1543,14 @@ body: | ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p0_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p0_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -1509,6 +1576,7 @@ body: | ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p0_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1532,18 +1600,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p0_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p0_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p0_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1567,18 +1638,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), addrspace 1) + ; ; CI-LABEL: name: test_store_global_p0_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), addrspace 1) + ; ; VI-LABEL: name: test_store_global_p0_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p0_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1602,18 +1676,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p0_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p0_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p0_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1657,8 +1734,7 @@ body: | ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY4]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -1667,23 +1743,24 @@ body: | ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY6]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[COPY7]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p999_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p999_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -1731,6 +1808,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p999_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1773,12 +1851,14 @@ body: | ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p999_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p999_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -1804,6 +1884,7 @@ body: | ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p999_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1827,18 +1908,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p999_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p999_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p999_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1862,18 +1946,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1) + ; ; CI-LABEL: name: test_store_global_p999_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1) + ; ; VI-LABEL: name: test_store_global_p999_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p999_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1897,18 +1984,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p999_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p999_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p999_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1946,8 +2036,7 @@ body: | ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -1957,23 +2046,24 @@ body: | ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -2016,6 +2106,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2053,12 +2144,14 @@ body: | ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -2079,6 +2172,7 @@ body: | ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2102,18 +2196,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2137,18 +2234,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s32_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s32_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2172,18 +2272,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s32_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s32_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s32_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2222,8 +2325,7 @@ body: | ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -2234,23 +2336,24 @@ body: | ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p3_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p3_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -2295,6 +2398,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p3_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2334,12 +2438,14 @@ body: | ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p3_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p3_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -2362,6 +2468,7 @@ body: | ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p3_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2385,18 +2492,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p3_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p3_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p3_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2420,18 +2530,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p3_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p3_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p3_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2455,18 +2568,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p3_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p3_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p3_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2527,12 +2643,14 @@ body: | ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s16_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s16_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -2576,6 +2694,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s16_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2614,12 +2733,14 @@ body: | ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s16_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s16_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -2641,6 +2762,7 @@ body: | ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s16_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2664,18 +2786,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2699,18 +2824,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s16_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s16_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s16_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2734,18 +2862,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s16_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s16_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s16_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2783,8 +2914,7 @@ body: | ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -2794,14 +2924,13 @@ body: | ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -2811,23 +2940,24 @@ body: | ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v3s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v3s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -2887,6 +3017,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v3s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -2931,12 +3062,14 @@ body: | ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v3s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v3s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -2964,6 +3097,7 @@ body: | ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v3s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -2992,18 +3126,21 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v3s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v3s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v3s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -3032,18 +3169,21 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v3s32_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v3s32_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v3s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -3072,18 +3212,21 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v3s32_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v3s32_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v3s32_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -3121,8 +3264,7 @@ body: | ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -3132,14 +3274,13 @@ body: | ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -3149,14 +3290,13 @@ body: | ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -3166,23 +3306,24 @@ body: | ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -3259,6 +3400,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3310,12 +3452,14 @@ body: | ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -3350,6 +3494,7 @@ body: | ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3373,18 +3518,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3408,18 +3556,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s32_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s32_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3443,18 +3594,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s32_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s32_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s32_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3498,8 +3652,7 @@ body: | ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY4]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -3508,14 +3661,13 @@ body: | ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY6]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[COPY7]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -3530,14 +3682,13 @@ body: | ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY11]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LSHR8]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -3546,23 +3697,24 @@ body: | ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C5]] - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY14]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C5]] + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY14]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C6]](s64) ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C5]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LSHR11]], [[COPY15]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s64_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s64_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -3648,6 +3800,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s64_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3708,12 +3861,14 @@ body: | ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s64_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s64_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -3757,6 +3912,7 @@ body: | ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s64_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3780,18 +3936,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s64_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s64_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s64_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3815,18 +3974,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s64_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s64_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s64_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3850,18 +4012,21 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s64_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s64_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s64_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3900,8 +4065,7 @@ body: | ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -3911,14 +4075,13 @@ body: | ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -3928,14 +4091,13 @@ body: | ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -3945,17 +4107,17 @@ body: | ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s16_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -3963,6 +4125,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s16_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4040,6 +4203,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s16_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4093,6 +4257,7 @@ body: | ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s16_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -4100,6 +4265,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s16_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4135,6 +4301,7 @@ body: | ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s16_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4160,6 +4327,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -4167,6 +4335,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4174,6 +4343,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4199,6 +4369,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s16_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -4206,6 +4377,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s16_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4213,6 +4385,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s16_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4238,6 +4411,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s16_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -4245,6 +4419,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s16_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4252,6 +4427,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s16_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4291,8 +4467,7 @@ body: | ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -4302,14 +4477,13 @@ body: | ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -4319,14 +4493,13 @@ body: | ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -4336,17 +4509,17 @@ body: | ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p0_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -4354,6 +4527,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p0_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4431,6 +4605,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p0_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4484,6 +4659,7 @@ body: | ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p0_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -4491,6 +4667,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p0_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4526,6 +4703,7 @@ body: | ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p0_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4551,6 +4729,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p0_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -4558,6 +4737,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p0_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4565,6 +4745,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p0_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4590,6 +4771,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p0_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -4597,6 +4779,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p0_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4604,6 +4787,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p0_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4629,6 +4813,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p0_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -4636,6 +4821,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p0_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4643,6 +4829,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p0_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4682,8 +4869,7 @@ body: | ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -4693,14 +4879,13 @@ body: | ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -4710,17 +4895,17 @@ body: | ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s96_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} @@ -4728,6 +4913,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s96_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -4788,6 +4974,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s96_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -4834,6 +5021,7 @@ body: | ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s96_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} @@ -4841,6 +5029,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s96_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -4869,6 +5058,7 @@ body: | ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s96_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -4899,6 +5089,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s96_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} @@ -4906,6 +5097,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s96_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -4913,6 +5105,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; VI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s96_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -4943,6 +5136,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s96_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} @@ -4950,6 +5144,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s96_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -4957,6 +5152,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; VI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s96_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -4987,6 +5183,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s96_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} @@ -4994,6 +5191,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s96_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -5001,6 +5199,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; VI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s96_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -5040,8 +5239,7 @@ body: | ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -5051,14 +5249,13 @@ body: | ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -5068,14 +5265,13 @@ body: | ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -5085,17 +5281,17 @@ body: | ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s128_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -5103,6 +5299,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s128_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -5180,6 +5377,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s128_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -5233,6 +5431,7 @@ body: | ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s128_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -5240,6 +5439,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s128_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -5275,6 +5475,7 @@ body: | ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s128_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -5300,6 +5501,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s128_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -5307,6 +5509,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s128_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -5314,6 +5517,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s128_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -5339,6 +5543,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s128_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -5346,6 +5551,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s128_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -5353,6 +5559,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s128_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -5378,6 +5585,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; CI-LABEL: name: test_store_global_s128_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -5385,6 +5593,7 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_s128_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -5392,6 +5601,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s128_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -5430,8 +5640,7 @@ body: | ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -5441,14 +5650,13 @@ body: | ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -5458,14 +5666,13 @@ body: | ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -5475,14 +5682,13 @@ body: | ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -5492,17 +5698,17 @@ body: | ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY14]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[COPY15]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -5514,6 +5720,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -5607,6 +5814,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -5670,6 +5878,7 @@ body: | ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -5681,6 +5890,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -5722,6 +5932,7 @@ body: | ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -5755,6 +5966,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -5766,6 +5978,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -5777,6 +5990,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -5810,6 +6024,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5s32_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -5821,6 +6036,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5s32_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -5832,6 +6048,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -5865,6 +6082,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5s32_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -5876,6 +6094,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5s32_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -5887,6 +6106,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5s32_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -5929,8 +6149,7 @@ body: | ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -5940,14 +6159,13 @@ body: | ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -5957,14 +6175,13 @@ body: | ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -5974,14 +6191,13 @@ body: | ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -5991,17 +6207,17 @@ body: | ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY14]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[COPY15]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5p3_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6014,6 +6230,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5p3_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6108,6 +6325,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5p3_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -6173,6 +6391,7 @@ body: | ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5p3_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6185,6 +6404,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5p3_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6227,6 +6447,7 @@ body: | ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5p3_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -6262,6 +6483,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5p3_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6274,6 +6496,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5p3_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6286,6 +6509,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5p3_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -6321,6 +6545,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5p3_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6333,6 +6558,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5p3_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6345,6 +6571,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5p3_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -6380,6 +6607,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5p3_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6392,6 +6620,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5p3_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6404,6 +6633,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5p3_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -6439,6 +6669,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v10s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6451,6 +6682,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v10s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6463,6 +6695,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v10s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -6508,6 +6741,7 @@ body: | ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; SI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v11s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6530,6 +6764,7 @@ body: | ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; CI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v11s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6552,6 +6787,7 @@ body: | ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; VI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v11s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -6598,6 +6834,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV8]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v12s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6611,6 +6848,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV8]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v12s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6624,6 +6862,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV8]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v12s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -6669,8 +6908,7 @@ body: | ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -6680,14 +6918,13 @@ body: | ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -6697,14 +6934,13 @@ body: | ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -6714,14 +6950,13 @@ body: | ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -6731,17 +6966,17 @@ body: | ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY14]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[COPY15]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s160_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6754,6 +6989,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s160_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6848,6 +7084,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s160_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -6913,6 +7150,7 @@ body: | ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s160_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6925,6 +7163,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s160_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6967,6 +7206,7 @@ body: | ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s160_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -7002,6 +7242,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s160_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -7014,6 +7255,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s160_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -7026,6 +7268,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s160_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -7061,6 +7304,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s160_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -7073,6 +7317,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s160_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -7085,6 +7330,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s160_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -7120,6 +7366,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s160_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -7132,6 +7379,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s160_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -7144,6 +7392,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s160_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -7188,8 +7437,7 @@ body: | ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -7199,14 +7447,13 @@ body: | ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -7216,14 +7463,13 @@ body: | ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -7233,14 +7479,13 @@ body: | ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -7251,14 +7496,13 @@ body: | ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY14]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[COPY15]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) @@ -7267,14 +7511,13 @@ body: | ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY16]], [[C]](s32) ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY17]](s32) + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY17]](s32) ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY16]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) ; SI-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]] - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[COPY18]](s32) + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LSHR15]], [[COPY18]](s32) ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) @@ -7283,14 +7526,13 @@ body: | ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY19]], [[C]](s32) ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]] - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[COPY20]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]] + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY20]](s32) ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY19]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) ; SI-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY21]](s32) + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LSHR18]], [[COPY21]](s32) ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) @@ -7299,17 +7541,17 @@ body: | ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY22]], [[C]](s32) ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY23]](s32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY23]](s32) ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY22]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) ; SI-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]] - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[COPY24]](s32) + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LSHR21]], [[COPY24]](s32) ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -7320,6 +7562,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -7463,6 +7706,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; VI-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR23]](s16) ; VI-NEXT: G_STORE [[ANYEXT15]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -7545,6 +7789,7 @@ body: | ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -7555,6 +7800,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -7616,6 +7862,7 @@ body: | ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -7647,6 +7894,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -7657,6 +7905,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -7667,6 +7916,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -7698,6 +7948,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s32_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -7708,6 +7959,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s32_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -7718,6 +7970,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -7749,6 +8002,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s32_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -7759,6 +8013,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s32_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -7769,6 +8024,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s32_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -7801,6 +8057,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s128_align32 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -7812,6 +8069,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s128_align32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -7823,6 +8081,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s128_align32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -7867,8 +8126,7 @@ body: | ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -7878,14 +8136,13 @@ body: | ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -7895,14 +8152,13 @@ body: | ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -7912,14 +8168,13 @@ body: | ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -7930,14 +8185,13 @@ body: | ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY14]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[COPY15]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) @@ -7946,14 +8200,13 @@ body: | ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY16]], [[C]](s32) ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY17]](s32) + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY17]](s32) ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY16]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) ; SI-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]] - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[COPY18]](s32) + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LSHR15]], [[COPY18]](s32) ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) @@ -7962,14 +8215,13 @@ body: | ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY19]], [[C]](s32) ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]] - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[COPY20]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]] + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY20]](s32) ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY19]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) ; SI-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY21]](s32) + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LSHR18]], [[COPY21]](s32) ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) @@ -7978,17 +8230,17 @@ body: | ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY22]], [[C]](s32) ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY23]](s32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY23]](s32) ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY22]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) ; SI-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]] - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[COPY24]](s32) + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LSHR21]], [[COPY24]](s32) ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s256_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -8000,6 +8252,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s256_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -8144,6 +8397,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; VI-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR23]](s16) ; VI-NEXT: G_STORE [[ANYEXT15]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s256_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -8228,6 +8482,7 @@ body: | ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s256_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -8239,6 +8494,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s256_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -8301,6 +8557,7 @@ body: | ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s256_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -8334,6 +8591,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s256_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -8345,6 +8603,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s256_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -8356,6 +8615,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s256_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -8389,6 +8649,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s256_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -8400,6 +8661,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s256_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -8411,6 +8673,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s256_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -8444,6 +8707,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s256_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -8455,6 +8719,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s256_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -8466,6 +8731,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s256_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -8499,6 +8765,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s256_align32 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -8510,6 +8777,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s256_align32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -8521,6 +8789,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s256_align32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -8553,6 +8822,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s32_align32 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -8563,6 +8833,7 @@ body: | ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s32_align32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -8573,6 +8844,7 @@ body: | ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s32_align32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -8618,8 +8890,7 @@ body: | ; SI-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -8629,14 +8900,13 @@ body: | ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY7]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY7]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY8]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -8646,14 +8916,13 @@ body: | ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY10]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY10]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY11]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -8663,14 +8932,13 @@ body: | ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY13]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY13]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY14]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY14]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -8680,14 +8948,13 @@ body: | ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY15]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY16]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY16]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY15]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY17]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[COPY17]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) @@ -8696,14 +8963,13 @@ body: | ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY18]], [[C]](s32) ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]] - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY19]](s32) + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]] + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY19]](s32) ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY18]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]] - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[COPY20]](s32) + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LSHR15]], [[COPY20]](s32) ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) @@ -8712,14 +8978,13 @@ body: | ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY21]], [[C]](s32) ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[COPY22]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY22]](s32) ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY21]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY23]](s32) + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LSHR18]], [[COPY23]](s32) ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) @@ -8728,14 +8993,13 @@ body: | ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY24]], [[C]](s32) ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) ; SI-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY25]](s32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY25]](s32) ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY24]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) ; SI-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]] - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[COPY26]](s32) + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LSHR21]], [[COPY26]](s32) ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) @@ -8745,17 +9009,17 @@ body: | ; SI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[COPY27]], [[C]](s32) ; SI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C1]](s64) ; SI-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C3]] - ; SI-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[AND16]], [[COPY28]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C3]] + ; SI-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY28]](s32) ; SI-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY27]](s32), [[PTR_ADD31]](p1) :: (store (s8) into unknown-address + 32, addrspace 1) ; SI-NEXT: G_STORE [[LSHR25]](s32), [[PTR_ADD33]](p1) :: (store (s8) into unknown-address + 33, addrspace 1) ; SI-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR24]], [[C3]] - ; SI-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[AND17]], [[COPY29]](s32) + ; SI-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LSHR24]], [[COPY29]](s32) ; SI-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD32]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR24]](s32), [[PTR_ADD32]](p1) :: (store (s8) into unknown-address + 34, addrspace 1) ; SI-NEXT: G_STORE [[LSHR26]](s32), [[PTR_ADD34]](p1) :: (store (s8) into unknown-address + 35, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v9s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; CI-NEXT: {{ $}} @@ -8775,6 +9039,7 @@ body: | ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v9s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} @@ -8937,6 +9202,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR24]](s32), [[PTR_ADD32]](p1) :: (store (s8) into unknown-address + 34, addrspace 1) ; VI-NEXT: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR26]](s16) ; VI-NEXT: G_STORE [[ANYEXT17]](s32), [[PTR_ADD34]](p1) :: (store (s8) into unknown-address + 35, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v9s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} @@ -9040,6 +9306,7 @@ body: | ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD16]](p1) :: (store (s16) into unknown-address + 34, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v9s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; CI-NEXT: {{ $}} @@ -9059,6 +9326,7 @@ body: | ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v9s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} @@ -9129,6 +9397,7 @@ body: | ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) ; VI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD16]](p1) :: (store (s16) into unknown-address + 34, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v9s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} @@ -9181,6 +9450,7 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v9s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; CI-NEXT: {{ $}} @@ -9200,6 +9470,7 @@ body: | ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v9s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} @@ -9219,6 +9490,7 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v9s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} @@ -9271,6 +9543,7 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v9s32_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; CI-NEXT: {{ $}} @@ -9290,6 +9563,7 @@ body: | ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v9s32_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} @@ -9309,6 +9583,7 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v9s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} @@ -9361,6 +9636,7 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v9s32_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; CI-NEXT: {{ $}} @@ -9380,6 +9656,7 @@ body: | ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v9s32_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} @@ -9399,6 +9676,7 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v9s32_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir index 30c7f4fb3e7d2f..22d792abe36245 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -22,6 +22,7 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; ; VI-LABEL: name: test_store_global_i32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -45,6 +46,7 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) + ; ; VI-LABEL: name: test_store_global_i64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -68,6 +70,7 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store (p1), addrspace 1) + ; ; VI-LABEL: name: test_store_global_p1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -91,6 +94,7 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p4) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](p4), [[COPY]](p1) :: (store (p4), addrspace 1) + ; ; VI-LABEL: name: test_store_global_p4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -114,6 +118,7 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1) + ; ; VI-LABEL: name: test_store_global_p3 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -137,6 +142,7 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -160,6 +166,7 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store (<2 x s16>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -188,6 +195,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v3s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -212,6 +220,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; SI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_s64_to_s8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -237,6 +246,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; SI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_s64_to_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -269,6 +279,7 @@ body: | ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_s64_to_s16_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -301,6 +312,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; SI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_s64_to_s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -332,6 +344,7 @@ body: | ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_s64_to_s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -376,11 +389,11 @@ body: | ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_s64_to_s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -424,6 +437,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) ; SI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_s128_to_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -449,6 +463,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_s128_to_s8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -476,6 +491,7 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] ; SI-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; VI-LABEL: name: test_store_global_i1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -503,6 +519,7 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; VI-LABEL: name: test_store_global_i8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -527,6 +544,7 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; ; VI-LABEL: name: test_store_global_i16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -557,6 +575,7 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_96 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -583,6 +602,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_i128 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -607,6 +627,7 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -631,8 +652,8 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) @@ -649,14 +670,15 @@ body: | ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s8_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -688,8 +710,8 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -699,14 +721,15 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s8_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -733,8 +756,8 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -744,14 +767,15 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s8_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -779,8 +803,8 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) @@ -813,6 +837,7 @@ body: | ; SI-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v3s8_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -820,8 +845,8 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -870,8 +895,8 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) @@ -897,6 +922,7 @@ body: | ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v3s8_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -904,8 +930,8 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -949,8 +975,8 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) @@ -976,6 +1002,7 @@ body: | ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v3s8_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -983,8 +1010,8 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -1053,11 +1080,11 @@ body: | ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s8_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -1082,8 +1109,8 @@ body: | ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C5]] @@ -1141,6 +1168,7 @@ body: | ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s8_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -1201,6 +1229,7 @@ body: | ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI-NEXT: G_STORE [[OR2]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s8_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -1241,6 +1270,7 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s8>) = G_IMPLICIT_DEF ; SI-NEXT: G_STORE [[DEF]](<2 x s8>), [[COPY]](p1) :: (store (<2 x s4>), addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_v2s8_to_1_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -1266,6 +1296,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>) ; SI-NEXT: G_STORE [[TRUNC]](<3 x s8>), [[COPY]](p1) :: (store (<3 x s2>), addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_v3s8_to_1_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -1293,6 +1324,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>) ; SI-NEXT: G_STORE [[TRUNC]](<3 x s8>), [[COPY]](p1) :: (store (<3 x s4>), addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_v3s8_to_2_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir index b89e5d684b0ff5..5205386c8ea713 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir @@ -126,8 +126,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] @@ -153,8 +153,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] @@ -355,8 +355,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s128) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s128) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] @@ -403,8 +403,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s96) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s96) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir index 54ce45c0dc0887..b2fe9b8ddc903b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir @@ -139,22 +139,20 @@ body: | ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[AND3]], [[AND4]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD1]](s32), [[AND5]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD1]](s32), [[AND3]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -190,16 +188,14 @@ body: | ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[AND3]], [[AND4]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD1]](s32), [[AND5]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND6]], [[AND7]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C1]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD2]](s32), [[AND8]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD1]](s32), [[AND3]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND4]], [[AND5]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C1]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD2]](s32), [[AND6]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) @@ -209,26 +205,25 @@ body: | ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND8]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND6]](s32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL2]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND12]](s32), [[AND13]](s32), [[AND14]](s32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND9]](s32), [[AND10]](s32), [[AND11]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -271,28 +266,24 @@ body: | ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[AND3]], [[AND4]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD1]](s32), [[AND5]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND6]], [[AND7]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C1]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD2]](s32), [[AND8]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[AND9]], [[AND10]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C1]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD3]](s32), [[AND11]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD1]](s32), [[AND3]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND4]], [[AND5]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C1]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD2]](s32), [[AND6]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C1]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD3]](s32), [[AND7]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND8]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND11]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND6]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND7]](s32) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) @@ -302,11 +293,11 @@ body: | ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND12]](s32), [[AND13]](s32), [[AND14]](s32), [[AND15]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32), [[AND10]](s32), [[AND11]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir index ca2dacce5457ec..0d2366489c2a05 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir @@ -25,6 +25,7 @@ body: | ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; GFX8-LABEL: name: uaddsat_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -39,6 +40,7 @@ body: | ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: uaddsat_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -82,6 +84,7 @@ body: | ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; GFX8-LABEL: name: uaddsat_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -96,6 +99,7 @@ body: | ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: uaddsat_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -147,17 +151,14 @@ body: | ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C1]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[COPY2]](s32) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: uaddsat_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -179,13 +180,11 @@ body: | ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL2]], [[SHL3]] ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT1]], [[C1]](s16) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[LSHR2]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C2]] - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[LSHR3]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR2]], [[SHL4]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: uaddsat_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -250,6 +249,7 @@ body: | ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; GFX8-LABEL: name: uaddsat_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -260,6 +260,7 @@ body: | ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: uaddsat_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -308,13 +309,11 @@ body: | ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C2]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL4]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: uaddsat_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -337,6 +336,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: uaddsat_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -392,24 +392,21 @@ body: | ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL7]] ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL8]] ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX8-LABEL: name: uaddsat_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -447,13 +444,13 @@ body: | ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: uaddsat_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -548,19 +545,15 @@ body: | ; GFX6-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[XOR3]], [[SHL7]] ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SHL6]], [[UMIN3]] ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[ADD3]], [[C]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL8]] ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C2]] - ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR7]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL9]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: uaddsat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -601,6 +594,7 @@ body: | ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: uaddsat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -634,6 +628,7 @@ body: | ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[COPY1]] ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[UMIN]] ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; ; GFX8-LABEL: name: uaddsat_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -641,6 +636,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] ; GFX8-NEXT: $vgpr0 = COPY [[UADDSAT]](s32) + ; ; GFX9-LABEL: name: uaddsat_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -676,6 +672,7 @@ body: | ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UMIN1]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: uaddsat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -687,6 +684,7 @@ body: | ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s32) = G_UADDSAT [[UV1]], [[UV3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDSAT]](s32), [[UADDSAT1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: uaddsat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -723,6 +721,7 @@ body: | ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[UADDE1]](s1), [[C]], [[MV]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX8-LABEL: name: uaddsat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -736,6 +735,7 @@ body: | ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[UADDE1]](s1), [[C]], [[MV]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX9-LABEL: name: uaddsat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -783,6 +783,7 @@ body: | ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[UADDE3]](s1), [[C]], [[MV1]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: uaddsat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -805,6 +806,7 @@ body: | ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[UADDE3]](s1), [[C]], [[MV1]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: uaddsat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir index 2143d94da2ede6..bfba201e264b14 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir @@ -39,6 +39,7 @@ body: | ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX6-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; ; GFX8-LABEL: name: test_udiv_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -67,6 +68,7 @@ body: | ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; ; GFX9-LABEL: name: test_udiv_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -95,6 +97,7 @@ body: | ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; ; GFX10-LABEL: name: test_udiv_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -185,6 +188,7 @@ body: | ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_udiv_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -235,6 +239,7 @@ body: | ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_udiv_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -285,6 +290,7 @@ body: | ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_udiv_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -497,6 +503,7 @@ body: | ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX8-LABEL: name: test_udiv_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -647,6 +654,7 @@ body: | ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX9-LABEL: name: test_udiv_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -797,6 +805,7 @@ body: | ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX10-LABEL: name: test_udiv_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -1249,6 +1258,7 @@ body: | ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: test_udiv_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -1539,6 +1549,7 @@ body: | ; GFX8-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_udiv_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -1829,6 +1840,7 @@ body: | ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX10-LABEL: name: test_udiv_v2s64 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: {{ $}} @@ -2163,6 +2175,7 @@ body: | ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C]] ; GFX6-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; ; GFX8-LABEL: name: test_udiv_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2195,6 +2208,7 @@ body: | ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C]] ; GFX8-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; ; GFX9-LABEL: name: test_udiv_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2227,6 +2241,7 @@ body: | ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; ; GFX10-LABEL: name: test_udiv_s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2309,33 +2324,32 @@ body: | ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD3]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[AND3]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD3]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[LSHR1]] ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] - ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[AND3]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[LSHR1]] ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[AND3]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[LSHR1]] ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C1]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SELECT5]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT5]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: test_udiv_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2371,33 +2385,32 @@ body: | ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD3]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[AND3]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD3]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[LSHR1]] ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] - ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[AND3]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[LSHR1]] ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[AND3]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[LSHR1]] ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C1]] - ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SELECT5]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT5]], [[C1]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_udiv_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2434,30 +2447,29 @@ body: | ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT2]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD3]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[AND3]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD3]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[LSHR1]] ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] - ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[AND3]] + ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[LSHR1]] ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[AND3]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[LSHR1]] ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT5]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX10-LABEL: name: test_udiv_v2s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2494,25 +2506,23 @@ body: | ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT2]](s32) - ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD3]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] - ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[AND3]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD3]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[LSHR1]] ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] - ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[AND3]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[LSHR1]] ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[AND3]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[LSHR1]] ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT5]](s32) @@ -2561,6 +2571,7 @@ body: | ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX6-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; ; GFX8-LABEL: name: test_udiv_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2592,6 +2603,7 @@ body: | ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; ; GFX9-LABEL: name: test_udiv_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2623,6 +2635,7 @@ body: | ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; ; GFX10-LABEL: name: test_udiv_s7 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2700,6 +2713,7 @@ body: | ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX6-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; ; GFX8-LABEL: name: test_udiv_s17 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2731,6 +2745,7 @@ body: | ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; ; GFX9-LABEL: name: test_udiv_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2762,6 +2777,7 @@ body: | ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; ; GFX10-LABEL: name: test_udiv_s17 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2961,6 +2977,7 @@ body: | ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX8-LABEL: name: test_udiv_s33 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -3114,6 +3131,7 @@ body: | ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX9-LABEL: name: test_udiv_s33 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -3267,6 +3285,7 @@ body: | ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX10-LABEL: name: test_udiv_s33 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir index 18e24e8105d12f..3f8bbbce2d31d0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir @@ -18,6 +18,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) + ; ; VI-LABEL: name: test_umax_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[UMAX]](s32) + ; ; GFX9-LABEL: name: test_umax_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -52,6 +54,7 @@ body: | ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; VI-LABEL: name: test_umax_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -60,6 +63,7 @@ body: | ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX9-LABEL: name: test_umax_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -90,6 +94,7 @@ body: | ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) + ; ; VI-LABEL: name: test_umax_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -100,6 +105,7 @@ body: | ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_umax_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -135,26 +141,28 @@ body: | ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) + ; ; VI-LABEL: name: test_umax_s8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[AND]], [[AND1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_umax_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -186,6 +194,7 @@ body: | ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) + ; ; VI-LABEL: name: test_umax_s17 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -196,6 +205,7 @@ body: | ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] ; VI-NEXT: $vgpr0 = COPY [[UMAX]](s32) + ; ; GFX9-LABEL: name: test_umax_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -232,6 +242,7 @@ body: | ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_umax_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -243,6 +254,7 @@ body: | ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_umax_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -278,6 +290,7 @@ body: | ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_umax_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -290,6 +303,7 @@ body: | ; VI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_umax_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -328,15 +342,12 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UMAX]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UMAX1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[LSHR]], [[LSHR1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UMAX1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UMAX]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_umax_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -359,6 +370,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_umax_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -396,14 +408,13 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND4]], [[AND5]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[LSHR]], [[LSHR1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_umax_v3s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -432,6 +443,7 @@ body: | ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX2]](s16) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_umax_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -484,27 +496,20 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND4]], [[AND5]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[UMAX3:%[0-9]+]]:_(s32) = G_UMAX [[AND6]], [[AND7]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UMAX]], [[C1]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UMAX1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[LSHR]], [[LSHR2]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] + ; SI-NEXT: [[UMAX3:%[0-9]+]]:_(s32) = G_UMAX [[LSHR1]], [[LSHR3]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UMAX1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UMAX]], [[SHL]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UMAX2]], [[C1]] - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UMAX3]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UMAX3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UMAX2]], [[SHL1]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_umax_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -545,6 +550,7 @@ body: | ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_umax_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir index 50776dafdb7eb3..f01143f0e0a9bc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir @@ -18,6 +18,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) + ; ; VI-LABEL: name: test_umin_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[UMIN]](s32) + ; ; GFX9-LABEL: name: test_umin_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -52,6 +54,7 @@ body: | ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; VI-LABEL: name: test_umin_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -60,6 +63,7 @@ body: | ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX9-LABEL: name: test_umin_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -90,6 +94,7 @@ body: | ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) + ; ; VI-LABEL: name: test_umin_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -100,6 +105,7 @@ body: | ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_umin_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -135,26 +141,28 @@ body: | ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) + ; ; VI-LABEL: name: test_umin_s8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[AND]], [[AND1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_umin_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -186,6 +194,7 @@ body: | ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) + ; ; VI-LABEL: name: test_umin_s17 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -196,6 +205,7 @@ body: | ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] ; VI-NEXT: $vgpr0 = COPY [[UMIN]](s32) + ; ; GFX9-LABEL: name: test_umin_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -232,6 +242,7 @@ body: | ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_umin_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -243,6 +254,7 @@ body: | ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_umin_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -278,6 +290,7 @@ body: | ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_umin_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -290,6 +303,7 @@ body: | ; VI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_umin_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -328,15 +342,12 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UMIN]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UMIN1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[LSHR]], [[LSHR1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UMIN1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UMIN]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_umin_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -359,6 +370,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_umin_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -396,14 +408,13 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND4]], [[AND5]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[LSHR]], [[LSHR1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_umin_v3s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -432,6 +443,7 @@ body: | ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN2]](s16) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_umin_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -484,27 +496,20 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND4]], [[AND5]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[AND6]], [[AND7]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UMIN]], [[C1]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UMIN1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[LSHR]], [[LSHR2]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] + ; SI-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[LSHR1]], [[LSHR3]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UMIN1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UMIN]], [[SHL]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UMIN2]], [[C1]] - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UMIN3]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UMIN3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UMIN2]], [[SHL1]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_umin_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -545,6 +550,7 @@ body: | ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_umin_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir index 50deea30890950..3a919f004964ba 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir @@ -15,6 +15,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] ; GFX8-NEXT: $vgpr0 = COPY [[UMULH]](s32) + ; ; GFX9-LABEL: name: test_umulh_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -45,6 +46,7 @@ body: | ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMULH]](s32), [[UMULH1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_umulh_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -98,6 +100,7 @@ body: | ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_umulh_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -197,6 +200,7 @@ body: | ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_umulh_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -277,8 +281,8 @@ body: | ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX8-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; GFX9-LABEL: name: test_umulh_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -290,8 +294,7 @@ body: | ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -312,33 +315,34 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] ; GFX8-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; ; GFX9-LABEL: name: test_umulh_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32) %0:_(s32) = COPY $vgpr0 @@ -372,17 +376,15 @@ body: | ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND2]], [[AND3]] ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C1]](s32) - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32) + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[LSHR2]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_umulh_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -400,9 +402,7 @@ body: | ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND2]], [[AND3]] ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C1]](s32) - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -427,8 +427,8 @@ body: | ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -448,21 +448,19 @@ body: | ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s16) = G_MUL [[AND4]], [[AND5]] ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[MUL2]], [[C1]](s16) ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C]] - ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[LSHR1]], [[C]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] - ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[LSHR2]], [[C]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[LSHR1]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL1]] + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND6]], [[C1]](s16) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[LSHR2]], [[SHL1]] ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_umulh_v3s8 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} @@ -503,11 +501,10 @@ body: | ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] - ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[LSHR1]], [[C]] ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL1]] + ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND8]], [[C1]](s16) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[LSHR1]], [[SHL1]] ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) @@ -546,8 +543,8 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -560,12 +557,11 @@ body: | ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND2]], [[AND3]] ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C1]](s16) - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C]] - ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR1]], [[C]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[LSHR1]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_umulh_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -635,8 +631,8 @@ body: | ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] @@ -661,8 +657,8 @@ body: | ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s16) = G_MUL [[AND6]], [[AND7]] ; GFX8-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[MUL3]], [[C4]](s16) - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C5]] ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C5]] @@ -677,6 +673,7 @@ body: | ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_umulh_v4s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir index 7f9993d9fce25f..13c52d08b4941f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir @@ -20,6 +20,7 @@ body: | ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[MUL]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) + ; ; GFX9-LABEL: name: test_umulo_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -69,6 +70,7 @@ body: | ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; ; GFX9-LABEL: name: test_umulo_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -84,8 +86,8 @@ body: | ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH1]](s32), [[C]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] @@ -149,6 +151,7 @@ body: | ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64) + ; ; GFX9-LABEL: name: test_umulo_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -283,14 +286,15 @@ body: | ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV24]](s32), [[UV26]](s32) ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C1]] ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C1]] ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) + ; ; GFX9-LABEL: name: test_umulo_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -368,8 +372,8 @@ body: | ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV24]](s32), [[UV26]](s32) ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C1]] ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C1]] @@ -409,6 +413,7 @@ body: | ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) + ; ; GFX9-LABEL: name: test_umulo_s24 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -461,6 +466,7 @@ body: | ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) + ; ; GFX9-LABEL: name: test_umulo_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -508,6 +514,7 @@ body: | ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) + ; ; GFX9-LABEL: name: test_umulo_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -572,10 +579,10 @@ body: | ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; GFX8-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND10]](s32), [[AND11]](s32) + ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND10]](s32), [[LSHR]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_umulo_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -594,8 +601,8 @@ body: | ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] @@ -640,8 +647,8 @@ body: | ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] @@ -654,6 +661,7 @@ body: | ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) ; GFX8-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; ; GFX9-LABEL: name: test_umulo_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -672,8 +680,8 @@ body: | ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] @@ -740,22 +748,21 @@ body: | ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[AND6]] - ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] - ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[AND8]] - ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] - ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL]] - ; GFX8-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[LSHR2]], [[LSHR5]] + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] + ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL]] + ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX8-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32) + ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_umulo_v4s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -782,18 +789,16 @@ body: | ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[AND6]] - ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] - ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[AND8]] - ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] - ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL]] - ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[LSHR2]], [[LSHR5]] + ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] + ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL]] + ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32) + ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir index cb1112043c2139..c231aa8334d451 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir @@ -263,10 +263,9 @@ body: | ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY2]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[LSHR1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[LSHR]](s32) @@ -335,13 +334,11 @@ body: | ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY2]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[C1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[LSHR2]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[LSHR]](s32) @@ -389,13 +386,11 @@ body: | ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY2]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[C1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[LSHR2]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[LSHR]](s32) @@ -451,29 +446,23 @@ body: | ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY4]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY4]](s32) ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY5]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY5]](s32) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY6]](s32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY6]](s32) ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY7]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY8]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY9]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[C1]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C3]](s32) - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[C4]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[C3]](s32) + ; CHECK-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[C4]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[LSHR2]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[LSHR3]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir index 28d5f842140f19..44f44123bb7368 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir @@ -36,6 +36,7 @@ body: | ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX6-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX8-LABEL: name: test_urem_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -61,6 +62,7 @@ body: | ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX9-LABEL: name: test_urem_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -86,6 +88,7 @@ body: | ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX10-LABEL: name: test_urem_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -168,6 +171,7 @@ body: | ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_urem_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -213,6 +217,7 @@ body: | ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_urem_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -258,6 +263,7 @@ body: | ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_urem_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -461,6 +467,7 @@ body: | ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX8-LABEL: name: test_urem_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -607,6 +614,7 @@ body: | ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX9-LABEL: name: test_urem_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -753,6 +761,7 @@ body: | ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX10-LABEL: name: test_urem_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -1194,6 +1203,7 @@ body: | ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: test_urem_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -1477,6 +1487,7 @@ body: | ; GFX8-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_urem_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -1760,6 +1771,7 @@ body: | ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX10-LABEL: name: test_urem_v2s64 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: {{ $}} @@ -2084,6 +2096,7 @@ body: | ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C]] ; GFX6-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; ; GFX8-LABEL: name: test_urem_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2113,6 +2126,7 @@ body: | ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C]] ; GFX8-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; ; GFX9-LABEL: name: test_urem_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2142,6 +2156,7 @@ body: | ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; ; GFX10-LABEL: name: test_urem_s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2218,31 +2233,30 @@ body: | ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD1]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] - ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[AND3]] - ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND3]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD1]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[LSHR1]] + ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[LSHR1]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[AND3]] - ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[AND3]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[LSHR1]] + ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[LSHR1]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C1]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SELECT3]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT3]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: test_urem_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2275,31 +2289,30 @@ body: | ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD1]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] - ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[AND3]] - ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND3]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD1]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[LSHR1]] + ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[LSHR1]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[AND3]] - ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[AND3]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[LSHR1]] + ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[LSHR1]] ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C1]] - ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SELECT3]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT3]], [[C1]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_urem_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2333,28 +2346,27 @@ body: | ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT1]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD1]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] - ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[AND3]] - ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND3]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD1]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[LSHR1]] + ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[LSHR1]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[AND3]] - ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[AND3]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[LSHR1]] + ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[LSHR1]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX10-LABEL: name: test_urem_v2s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2388,24 +2400,22 @@ body: | ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT1]](s32) - ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD1]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] - ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[AND3]] - ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND3]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD1]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[LSHR1]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[LSHR1]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[AND3]] - ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[AND3]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[LSHR1]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[LSHR1]] ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT3]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -2450,6 +2460,7 @@ body: | ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX6-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX8-LABEL: name: test_urem_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2478,6 +2489,7 @@ body: | ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX9-LABEL: name: test_urem_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2506,6 +2518,7 @@ body: | ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX10-LABEL: name: test_urem_s7 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2577,6 +2590,7 @@ body: | ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX6-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX8-LABEL: name: test_urem_s17 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2605,6 +2619,7 @@ body: | ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX9-LABEL: name: test_urem_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2633,6 +2648,7 @@ body: | ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX10-LABEL: name: test_urem_s17 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2825,6 +2841,7 @@ body: | ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX8-LABEL: name: test_urem_s33 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -2974,6 +2991,7 @@ body: | ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX9-LABEL: name: test_urem_s33 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -3123,6 +3141,7 @@ body: | ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX10-LABEL: name: test_urem_s33 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir index a8a3ab3e79efbd..c5bf23a15e1db1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir @@ -27,14 +27,15 @@ body: | ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[LSHR1]](s32) + ; ; GFX8-LABEL: name: ushlsat_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) @@ -46,14 +47,15 @@ body: | ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: ushlsat_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) @@ -96,14 +98,15 @@ body: | ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[LSHR1]](s32) + ; ; GFX8-LABEL: name: ushlsat_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) @@ -115,14 +118,15 @@ body: | ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: ushlsat_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) @@ -174,16 +178,14 @@ body: | ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR4]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C1]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[COPY2]](s32) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC1]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: ushlsat_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -193,8 +195,8 @@ body: | ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) @@ -213,12 +215,11 @@ body: | ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[LSHR4]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SELECT1]], [[C2]](s16) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[LSHR5]], [[C1]] - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[LSHR5]], [[C2]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR3]], [[SHL4]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: ushlsat_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -228,8 +229,8 @@ body: | ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) @@ -248,10 +249,8 @@ body: | ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[LSHR4]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SELECT1]], [[C2]](s16) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C1]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[LSHR5]], [[C1]] - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[LSHR5]], [[C2]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR3]], [[SHL4]] ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 @@ -288,6 +287,7 @@ body: | ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[LSHR1]](s32) + ; ; GFX8-LABEL: name: ushlsat_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -302,6 +302,7 @@ body: | ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: ushlsat_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -350,19 +351,17 @@ body: | ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR2]] ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[LSHR1]](s32) + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[LSHR1]](s32) ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR4]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL4]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: ushlsat_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -392,6 +391,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: ushlsat_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -441,51 +441,46 @@ body: | ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[LSHR1]](s32) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[LSHR1]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR3]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL1]] ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND]](s32) + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND]](s32) ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR5]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL3]] ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) - ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[AND2]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[LSHR2]](s32) + ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[LSHR2]](s32) ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[LSHR7]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[SHL5]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL5]] ; GFX6-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL6]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C1]] - ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR8]], [[SHL7]] ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C1]] - ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR9]], [[SHL8]] ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX8-LABEL: name: ushlsat_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -533,13 +528,13 @@ body: | ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL5]] ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: ushlsat_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -623,39 +618,34 @@ body: | ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR4]] ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[LSHR2]](s32) + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[LSHR2]](s32) ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR6]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) - ; GFX6-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[AND2]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND1]](s32) + ; GFX6-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[AND1]](s32) ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[LSHR8]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[SHL5]] ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[AND3]](s32) - ; GFX6-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[SHL7]], [[AND3]](s32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[LSHR3]](s32) + ; GFX6-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[SHL7]], [[LSHR3]](s32) ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL6]](s32), [[LSHR10]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[C2]], [[SHL7]] ; GFX6-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[SELECT3]], [[C]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LSHR7]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL8]] ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C1]] - ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C1]] - ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL9]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR11]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR9]], [[SHL9]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: ushlsat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -709,6 +699,7 @@ body: | ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: ushlsat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -777,6 +768,7 @@ body: | ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] ; GFX6-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; ; GFX8-LABEL: name: ushlsat_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -788,6 +780,7 @@ body: | ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; ; GFX9-LABEL: name: ushlsat_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -829,6 +822,7 @@ body: | ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: ushlsat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -847,6 +841,7 @@ body: | ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: ushlsat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -889,6 +884,7 @@ body: | ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX8-LABEL: name: ushlsat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -901,6 +897,7 @@ body: | ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX9-LABEL: name: ushlsat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -945,6 +942,7 @@ body: | ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: ushlsat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -965,6 +963,7 @@ body: | ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: ushlsat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir index 249695a7616369..6aff25387fc3e8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir @@ -139,22 +139,20 @@ body: | ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[AND4]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB1]](s32), [[AND5]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB1]](s32), [[AND3]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -190,16 +188,14 @@ body: | ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[AND4]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB1]](s32), [[AND5]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND6]], [[AND7]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C1]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB2]](s32), [[AND8]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB1]](s32), [[AND3]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND4]], [[AND5]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C1]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB2]](s32), [[AND6]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) @@ -209,26 +205,25 @@ body: | ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND8]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND6]](s32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL2]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND12]](s32), [[AND13]](s32), [[AND14]](s32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND9]](s32), [[AND10]](s32), [[AND11]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -271,28 +266,24 @@ body: | ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[AND4]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB1]](s32), [[AND5]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND6]], [[AND7]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C1]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB2]](s32), [[AND8]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[AND9]], [[AND10]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[SUB3]], [[C1]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB3]](s32), [[AND11]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB1]](s32), [[AND3]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND4]], [[AND5]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C1]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB2]](s32), [[AND6]] + ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[LSHR1]], [[LSHR3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SUB3]], [[C1]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB3]](s32), [[AND7]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND8]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND11]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND6]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND7]](s32) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) @@ -302,11 +293,11 @@ body: | ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND12]](s32), [[AND13]](s32), [[AND14]](s32), [[AND15]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32), [[AND10]](s32), [[AND11]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir index 4b8a067ab74a95..663ffdcd5364cc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir @@ -23,6 +23,7 @@ body: | ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; GFX8-LABEL: name: usubsat_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -37,6 +38,7 @@ body: | ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: usubsat_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -78,6 +80,7 @@ body: | ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; GFX8-LABEL: name: usubsat_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -92,6 +95,7 @@ body: | ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: usubsat_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -140,17 +144,14 @@ body: | ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C1]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[COPY2]](s32) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: usubsat_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -172,13 +173,11 @@ body: | ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL2]], [[SHL3]] ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT1]], [[C1]](s16) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[LSHR2]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C2]] - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[LSHR3]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR2]], [[SHL4]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: usubsat_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -241,6 +240,7 @@ body: | ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; GFX8-LABEL: name: usubsat_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -251,6 +251,7 @@ body: | ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: usubsat_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -296,13 +297,11 @@ body: | ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL4]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: usubsat_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -325,6 +324,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: usubsat_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -376,24 +376,21 @@ body: | ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL7]] ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL8]] ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX8-LABEL: name: usubsat_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -431,13 +428,13 @@ body: | ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: usubsat_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -527,19 +524,15 @@ body: | ; GFX6-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[SHL6]], [[SHL7]] ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SHL6]], [[UMIN3]] ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SUB3]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL8]] ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] - ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR7]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL9]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: usubsat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -580,6 +573,7 @@ body: | ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: usubsat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -611,6 +605,7 @@ body: | ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[UMIN]] ; GFX6-NEXT: $vgpr0 = COPY [[SUB]](s32) + ; ; GFX8-LABEL: name: usubsat_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -618,6 +613,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] ; GFX8-NEXT: $vgpr0 = COPY [[USUBSAT]](s32) + ; ; GFX9-LABEL: name: usubsat_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -650,6 +646,7 @@ body: | ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[UMIN1]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB]](s32), [[SUB1]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: usubsat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -661,6 +658,7 @@ body: | ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s32) = G_USUBSAT [[UV1]], [[UV3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBSAT]](s32), [[USUBSAT1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: usubsat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -697,6 +695,7 @@ body: | ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[USUBE1]](s1), [[C]], [[MV]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX8-LABEL: name: usubsat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -710,6 +709,7 @@ body: | ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[USUBE1]](s1), [[C]], [[MV]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX9-LABEL: name: usubsat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -757,6 +757,7 @@ body: | ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[USUBE3]](s1), [[C]], [[MV1]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: usubsat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -779,6 +780,7 @@ body: | ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[USUBE3]](s1), [[C]], [[MV1]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: usubsat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir index bfbbf03ebeb201..9353813a8dbfc8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -472,10 +472,9 @@ body: | ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) @@ -533,10 +532,9 @@ body: | ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -545,10 +543,9 @@ body: | ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL1]] ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) @@ -563,20 +560,17 @@ body: | ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL5]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL5]] ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[UV13]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir index 98b210cbcdaadd..8db6a7c78aaa45 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir @@ -28,8 +28,8 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) %0:_(s32) = COPY $vgpr0 @@ -138,8 +138,7 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s32>) = G_ZEXT %0 @@ -163,9 +162,8 @@ body: | ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32), [[AND1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 @@ -191,10 +189,8 @@ body: | ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32), [[AND1]](s32), [[LSHR1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = G_ZEXT %0 @@ -275,8 +271,8 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[AND]](s16) %0:_(s32) = COPY $vgpr0 @@ -654,8 +650,8 @@ body: | ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] @@ -665,13 +661,11 @@ body: | ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C4]](s16) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C5]], [[C3]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND4]], [[C4]](s16) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[C5]], [[C4]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C5]], [[SHL2]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[OR2]](s16) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) @@ -715,67 +709,61 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C2]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL3]] - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV1]](s64) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV]], [[SHL2]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[UV1]], [[SHL3]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64) ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s48) = G_EXTRACT [[DEF]](s64), 0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s48) = G_EXTRACT [[MV]](s64), 0 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C3]](s64) + ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s48) = G_EXTRACT [[MV1]](s64), 0 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s48) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT1]](s48) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND6]](s64) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND2]](s64) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C2]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]] - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL1]] + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL4]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C1]] + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL5]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL5]] + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL5]] ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[OR8]](s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR7]](s32) ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[UV4]](s32) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL1]] - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL1]] + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]] ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR9]](s32), [[OR10]](s32) - ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[AND5]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[AND1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV7]](s384) ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s112) %0:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll index d7b9eebff77c03..b014a17241c14d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll @@ -31,7 +31,7 @@ define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %res ; Make sure we match constant bases with register offests, in which case ; the base may be the RHS operand of the load in SDAG. ; GCN-LABEL: name: test_complex_reg_offset -; GCN-DAG: %[[BASE:.*]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @0 + 4, +; GCN-DAG: %[[BASE:.*]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @0, ; SDAG-DAG: %[[OFFSET:.*]]:sreg_32 = nuw nsw S_LSHL_B32 ; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = S_LSHL_B32 ; SDAG: S_LOAD_DWORD_SGPR_IMM killed %[[BASE]], killed %[[OFFSET]], 0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll index 50d857bd238e51..03146afaa7d877 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll @@ -204,7 +204,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr8 = S_ADD_U32 renamable $sgpr6, 48, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr9 = S_ADDC_U32 killed renamable $sgpr7, 0, implicit-def dead $scc, implicit killed $scc - ; GFX90A-NEXT: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @f2 + 4, target-flags(amdgpu-gotprel32-hi) @f2 + 12, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @f2, target-flags(amdgpu-gotprel32-hi) @f2, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM killed renamable $sgpr6_sgpr7, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; GFX90A-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @f2, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit undef $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit undef $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1 ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_OR_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc diff --git a/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll b/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll new file mode 100644 index 00000000000000..ac196635b363a4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -march=amdgcn -mcpu=gfx1030 < %s | FileCheck %s + +define amdgpu_cs <2 x i32> @f() { +; CHECK-LABEL: f: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: s_mov_b32 s0, 0 +; CHECK-NEXT: s_mov_b32 s1, s0 +; CHECK-NEXT: s_mov_b32 s2, s0 +; CHECK-NEXT: s_mov_b32 s3, s0 +; CHECK-NEXT: s_mov_b32 s4, s0 +; CHECK-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 +; CHECK-NEXT: s_mov_b32 s5, s0 +; CHECK-NEXT: v_mov_b32_e32 v2, s0 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[4:5], v[0:1] +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; CHECK-NEXT: buffer_store_dwordx2 v[1:2], off, s[0:3], 0 +; CHECK-NEXT: ; return to shader part epilog +bb: + %i = call <2 x i32> @llvm.amdgcn.raw.buffer.load.v2i32(<4 x i32> zeroinitializer, i32 0, i32 0, i32 0) + %i1 = bitcast <2 x i32> %i to i64 + %i2 = insertelement <3 x i64> zeroinitializer, i64 %i1, i64 2 + %i3 = icmp ne <3 x i64> %i2, zeroinitializer + %i4 = zext <3 x i1> %i3 to <3 x i64> + %i5 = bitcast <3 x i64> %i4 to <6 x i32> + %i6 = shufflevector <6 x i32> %i5, <6 x i32> zeroinitializer, <2 x i32> + call void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32> %i6, <4 x i32> zeroinitializer, i32 0, i32 0, i32 0) + ret <2 x i32> %i6 +} + +declare <2 x i32> @llvm.amdgcn.raw.buffer.load.v2i32(<4 x i32>, i32, i32, i32 immarg) +declare void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir index 8d0a9899b5dbcc..60673bf780a05e 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir @@ -233,3 +233,22 @@ body: | %0:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec %1:sreg_32 = COPY %0.sub0 ... + +--- +# GCN-LABEL: name: s_cselect_b64 +# GCN: %0:vgpr_32 = IMPLICIT_DEF +# GCN: %1:vreg_64 = IMPLICIT_DEF +# GCN: %2:sreg_32 = IMPLICIT_DEF +# GCN: %3:sreg_64 = IMPLICIT_DEF +# GCN: %7:sreg_64_xexec = V_CMP_EQ_U32_e64 %0, 0, implicit $exec +# GCN: %6:vreg_64 = V_CNDMASK_B64_PSEUDO 0, %1, %7, implicit $exec +name: s_cselect_b64 +body: | + bb.0: + %0:vgpr_32 = IMPLICIT_DEF + %1:vreg_64 = IMPLICIT_DEF + %2:sreg_32 = COPY %0 + %3:sreg_64 = COPY %1 + S_CMP_EQ_U32 %2, 0, implicit-def $scc + %4:sreg_64 = S_CSELECT_B64 %3, 0, implicit $scc +... diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-cc.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-cc.ll index a6ca056211fff9..f75d83ab6cbb54 100644 --- a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-cc.ll +++ b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-cc.ll @@ -34,7 +34,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc(<4 x i32> inreg %a, <4 x i32> %b ; GISEL-GFX11-NEXT: $vgpr5 = COPY [[COPY5]] ; GISEL-GFX11-NEXT: $vgpr6 = COPY [[COPY6]] ; GISEL-GFX11-NEXT: $vgpr7 = COPY [[COPY7]] - ; GISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def $scc + ; GISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def $scc ; GISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GISEL-GFX11-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -63,7 +63,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc(<4 x i32> inreg %a, <4 x i32> %b ; GISEL-GFX10-NEXT: $vgpr7 = COPY [[COPY7]] ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 ; GISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY8]] - ; GISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def $scc + ; GISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def $scc ; GISEL-GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GISEL-GFX10-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -82,7 +82,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc(<4 x i32> inreg %a, <4 x i32> %b ; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr1 ; DAGISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; DAGISEL-GFX11-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; DAGISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def dead $scc + ; DAGISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def dead $scc ; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY7]] ; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY6]] @@ -109,7 +109,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc(<4 x i32> inreg %a, <4 x i32> %b ; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr1 ; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; DAGISEL-GFX10-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; DAGISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def dead $scc + ; DAGISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def dead $scc ; DAGISEL-GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 ; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY8]] @@ -158,7 +158,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_ptr(ptr inreg %a, ptr %b, ptr ad ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY9]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY10]] ; GISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY11]] - ; GISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def $scc + ; GISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def $scc ; GISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GISEL-GFX11-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; GISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -195,7 +195,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_ptr(ptr inreg %a, ptr %b, ptr ad ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY11]] ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 ; GISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]] - ; GISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def $scc + ; GISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def $scc ; GISEL-GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GISEL-GFX10-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -218,7 +218,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_ptr(ptr inreg %a, ptr %b, ptr ad ; DAGISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr1 ; DAGISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; DAGISEL-GFX11-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; DAGISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def dead $scc + ; DAGISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def dead $scc ; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY11]] ; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY10]] @@ -253,7 +253,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_ptr(ptr inreg %a, ptr %b, ptr ad ; DAGISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr1 ; DAGISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; DAGISEL-GFX10-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; DAGISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def dead $scc + ; DAGISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def dead $scc ; DAGISEL-GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 ; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]] @@ -310,7 +310,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_struct( {ptr, i32, <4 x i32>} in ; GISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY11]] ; GISEL-GFX11-NEXT: $vgpr12 = COPY [[COPY12]] ; GISEL-GFX11-NEXT: $vgpr13 = COPY [[COPY13]] - ; GISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def $scc + ; GISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def $scc ; GISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GISEL-GFX11-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13 ; GISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -351,7 +351,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_struct( {ptr, i32, <4 x i32>} in ; GISEL-GFX10-NEXT: $vgpr13 = COPY [[COPY13]] ; GISEL-GFX10-NEXT: [[COPY14:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 ; GISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]] - ; GISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def $scc + ; GISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def $scc ; GISEL-GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GISEL-GFX10-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -384,7 +384,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_struct( {ptr, i32, <4 x i32>} in ; DAGISEL-GFX11-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 ; DAGISEL-GFX11-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; DAGISEL-GFX11-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; DAGISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def dead $scc + ; DAGISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def dead $scc ; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY16]] ; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY14]] @@ -431,7 +431,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_struct( {ptr, i32, <4 x i32>} in ; DAGISEL-GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 ; DAGISEL-GFX10-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; DAGISEL-GFX10-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; DAGISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def dead $scc + ; DAGISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def dead $scc ; DAGISEL-GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX10-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 ; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]] @@ -466,7 +466,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_float(float inreg %a, float %b) ; GISEL-GFX11-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 ; GISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY]] ; GISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def $scc + ; GISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def $scc ; GISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GISEL-GFX11-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1 ; GISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -483,7 +483,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_float(float inreg %a, float %b) ; GISEL-GFX10-NEXT: $vgpr1 = COPY [[COPY1]] ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 ; GISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]] - ; GISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def $scc + ; GISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def $scc ; GISEL-GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GISEL-GFX10-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -496,7 +496,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_float(float inreg %a, float %b) ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; DAGISEL-GFX11-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; DAGISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def dead $scc + ; DAGISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def dead $scc ; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY1]] ; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY]] @@ -511,7 +511,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_float(float inreg %a, float %b) ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; DAGISEL-GFX10-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; DAGISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def dead $scc + ; DAGISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def dead $scc ; DAGISEL-GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 ; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]] @@ -534,7 +534,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_half(half inreg %a, half %b) { ; GISEL-GFX11-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 ; GISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY]] ; GISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def $scc + ; GISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def $scc ; GISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GISEL-GFX11-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1 ; GISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -551,7 +551,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_half(half inreg %a, half %b) { ; GISEL-GFX10-NEXT: $vgpr1 = COPY [[COPY1]] ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 ; GISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]] - ; GISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def $scc + ; GISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def $scc ; GISEL-GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GISEL-GFX10-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -564,7 +564,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_half(half inreg %a, half %b) { ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; DAGISEL-GFX11-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; DAGISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def dead $scc + ; DAGISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def dead $scc ; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY1]] ; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY]] @@ -579,7 +579,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_half(half inreg %a, half %b) { ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; DAGISEL-GFX10-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; DAGISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def dead $scc + ; DAGISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def dead $scc ; DAGISEL-GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 ; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]] @@ -602,7 +602,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_bfloat(bfloat inreg %a, bfloat % ; GISEL-GFX11-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 ; GISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY]] ; GISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def $scc + ; GISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def $scc ; GISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GISEL-GFX11-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1 ; GISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -619,7 +619,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_bfloat(bfloat inreg %a, bfloat % ; GISEL-GFX10-NEXT: $vgpr1 = COPY [[COPY1]] ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 ; GISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]] - ; GISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def $scc + ; GISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def $scc ; GISEL-GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GISEL-GFX10-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -632,7 +632,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_bfloat(bfloat inreg %a, bfloat % ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; DAGISEL-GFX11-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; DAGISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def dead $scc + ; DAGISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def dead $scc ; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY1]] ; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY]] @@ -647,7 +647,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_bfloat(bfloat inreg %a, bfloat % ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; DAGISEL-GFX10-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; DAGISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def dead $scc + ; DAGISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def dead $scc ; DAGISEL-GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 ; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]] @@ -670,7 +670,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_i16(i16 inreg %a, i16 %b) { ; GISEL-GFX11-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 ; GISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY]] ; GISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def $scc + ; GISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def $scc ; GISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GISEL-GFX11-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1 ; GISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -687,7 +687,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_i16(i16 inreg %a, i16 %b) { ; GISEL-GFX10-NEXT: $vgpr1 = COPY [[COPY1]] ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 ; GISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]] - ; GISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def $scc + ; GISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def $scc ; GISEL-GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GISEL-GFX10-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -700,7 +700,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_i16(i16 inreg %a, i16 %b) { ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; DAGISEL-GFX11-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; DAGISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def dead $scc + ; DAGISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def dead $scc ; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY1]] ; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY]] @@ -715,7 +715,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_i16(i16 inreg %a, i16 %b) { ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; DAGISEL-GFX10-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; DAGISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def dead $scc + ; DAGISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def dead $scc ; DAGISEL-GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 ; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]] @@ -766,7 +766,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_v16i16(<16 x i16> inreg %a, <16 ; GISEL-GFX11-NEXT: $vgpr13 = COPY [[COPY13]] ; GISEL-GFX11-NEXT: $vgpr14 = COPY [[COPY14]] ; GISEL-GFX11-NEXT: $vgpr15 = COPY [[COPY15]] - ; GISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def $scc + ; GISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def $scc ; GISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GISEL-GFX11-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -811,7 +811,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_v16i16(<16 x i16> inreg %a, <16 ; GISEL-GFX10-NEXT: $vgpr15 = COPY [[COPY15]] ; GISEL-GFX10-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 ; GISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]] - ; GISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def $scc + ; GISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def $scc ; GISEL-GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GISEL-GFX10-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -838,7 +838,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_v16i16(<16 x i16> inreg %a, <16 ; DAGISEL-GFX11-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr1 ; DAGISEL-GFX11-NEXT: [[COPY15:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; DAGISEL-GFX11-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; DAGISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def dead $scc + ; DAGISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def dead $scc ; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY15]] ; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY14]] @@ -881,7 +881,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_v16i16(<16 x i16> inreg %a, <16 ; DAGISEL-GFX10-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr1 ; DAGISEL-GFX10-NEXT: [[COPY15:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; DAGISEL-GFX10-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; DAGISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def dead $scc + ; DAGISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def dead $scc ; DAGISEL-GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX10-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 ; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]] @@ -1511,7 +1511,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_many_regs(<36 x i32> inreg %a, <128 ; GISEL-GFX11-NEXT: $vgpr29 = COPY [[COPY29]] ; GISEL-GFX11-NEXT: $vgpr30 = COPY [[COPY30]] ; GISEL-GFX11-NEXT: $vgpr31 = COPY [[COPY31]] - ; GISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def $scc + ; GISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def $scc ; GISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GISEL-GFX11-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 ; GISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 528, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -1856,7 +1856,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_many_regs(<36 x i32> inreg %a, <128 ; GISEL-GFX10-NEXT: $vgpr31 = COPY [[COPY31]] ; GISEL-GFX10-NEXT: [[COPY168:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 ; GISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY168]] - ; GISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def $scc + ; GISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def $scc ; GISEL-GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GISEL-GFX10-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 528, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -2430,7 +2430,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_many_regs(<36 x i32> inreg %a, <128 ; DAGISEL-GFX11-NEXT: [[S_ADD_I32_130:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 [[COPY164]], killed [[S_MOV_B32_130]], implicit-def dead $scc ; DAGISEL-GFX11-NEXT: [[COPY168:%[0-9]+]]:vgpr_32 = COPY [[COPY130]] ; DAGISEL-GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR [[COPY168]], killed [[S_ADD_I32_130]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into stack + 4, addrspace 5) - ; DAGISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def dead $scc + ; DAGISEL-GFX11-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def dead $scc ; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY163]] ; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY162]] @@ -2775,7 +2775,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_many_regs(<36 x i32> inreg %a, <128 ; DAGISEL-GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY2]], $sgpr48_sgpr49_sgpr50_sgpr51, [[COPY165]], 516, 0, 0, implicit $exec :: (store (s32) into stack + 516, addrspace 5) ; DAGISEL-GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY1]], $sgpr48_sgpr49_sgpr50_sgpr51, [[COPY165]], 520, 0, 0, implicit $exec :: (store (s32) into stack + 520, align 8, addrspace 5) ; DAGISEL-GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr48_sgpr49_sgpr50_sgpr51, [[COPY165]], 524, 0, 0, implicit $exec :: (store (s32) into stack + 524, addrspace 5) - ; DAGISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use + 4, target-flags(amdgpu-gotprel32-hi) @use + 12, implicit-def dead $scc + ; DAGISEL-GFX10-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @use, target-flags(amdgpu-gotprel32-hi) @use, implicit-def dead $scc ; DAGISEL-GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY164]] ; DAGISEL-GFX10-NEXT: $vgpr0 = COPY [[COPY163]] diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index e1c1384b022f71..3d1e290475fe98 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -106,7 +106,6 @@ ; GCN-O0-NEXT: MachineDominator Tree Construction ; GCN-O0-NEXT: Slot index numbering ; GCN-O0-NEXT: Live Interval Analysis -; GCN-O0-NEXT: MachinePostDominator Tree Construction ; GCN-O0-NEXT: SI Whole Quad Mode ; GCN-O0-NEXT: Virtual Register Map ; GCN-O0-NEXT: Live Register Matrix @@ -335,7 +334,6 @@ ; GCN-O1-NEXT: Register Coalescer ; GCN-O1-NEXT: Rename Disconnected Subregister Components ; GCN-O1-NEXT: Machine Instruction Scheduler -; GCN-O1-NEXT: MachinePostDominator Tree Construction ; GCN-O1-NEXT: SI Whole Quad Mode ; GCN-O1-NEXT: Virtual Register Map ; GCN-O1-NEXT: Live Register Matrix @@ -627,7 +625,6 @@ ; GCN-O1-OPTS-NEXT: Rename Disconnected Subregister Components ; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA optimizations ; GCN-O1-OPTS-NEXT: Machine Instruction Scheduler -; GCN-O1-OPTS-NEXT: MachinePostDominator Tree Construction ; GCN-O1-OPTS-NEXT: SI Whole Quad Mode ; GCN-O1-OPTS-NEXT: Virtual Register Map ; GCN-O1-OPTS-NEXT: Live Register Matrix @@ -930,7 +927,6 @@ ; GCN-O2-NEXT: Rename Disconnected Subregister Components ; GCN-O2-NEXT: AMDGPU Pre-RA optimizations ; GCN-O2-NEXT: Machine Instruction Scheduler -; GCN-O2-NEXT: MachinePostDominator Tree Construction ; GCN-O2-NEXT: SI Whole Quad Mode ; GCN-O2-NEXT: Virtual Register Map ; GCN-O2-NEXT: Live Register Matrix @@ -1246,7 +1242,6 @@ ; GCN-O3-NEXT: Rename Disconnected Subregister Components ; GCN-O3-NEXT: AMDGPU Pre-RA optimizations ; GCN-O3-NEXT: Machine Instruction Scheduler -; GCN-O3-NEXT: MachinePostDominator Tree Construction ; GCN-O3-NEXT: SI Whole Quad Mode ; GCN-O3-NEXT: Virtual Register Map ; GCN-O3-NEXT: Live Register Matrix diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll new file mode 100644 index 00000000000000..65443a6efa789d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s + +declare i32 @llvm.amdgcn.s.quadmask.i32(i32) +declare i64 @llvm.amdgcn.s.quadmask.i64(i64) + +define i32 @test_quadmask_constant_i32() { +; GFX11-LABEL: test_quadmask_constant_i32: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_quadmask_b32 s0, 0x85fe3a92 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +entry: + %qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 u0x85FE3A92) + ret i32 %qm +} + +define amdgpu_cs void @test_quadmask_sgpr_i32(i32 inreg %mask, ptr addrspace(1) %out) { +; GFX11-LABEL: test_quadmask_sgpr_i32: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_quadmask_b32 s0, s0 +; GFX11-NEXT: v_mov_b32_e32 v2, s0 +; GFX11-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +entry: + %qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 %mask) + store i32 %qm, ptr addrspace(1) %out + ret void +} + + +define i32 @test_quadmask_vgpr_i32(i32 %mask) { +; GFX11-LABEL: test_quadmask_vgpr_i32: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-NEXT: s_quadmask_b32 s0, s0 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +entry: + %qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 %mask) + ret i32 %qm +} + +define i64 @test_quadmask_constant_i64() { +; GFX11-LABEL: test_quadmask_constant_i64: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_mov_b32 s0, 0x85fe3a92 +; GFX11-NEXT: s_mov_b32 s1, 0x67de48fc +; GFX11-NEXT: s_quadmask_b64 s[0:1], s[0:1] +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: s_setpc_b64 s[30:31] +entry: + %qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 u0x67DE48FC85FE3A92) + ret i64 %qm +} + +define amdgpu_cs void @test_quadmask_sgpr_i64(i64 inreg %mask, ptr addrspace(1) %out) { +; GFX11-LABEL: test_quadmask_sgpr_i64: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_quadmask_b64 s[0:1], s[0:1] +; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +entry: + %qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 %mask) + store i64 %qm, ptr addrspace(1) %out + ret void +} + +define i64 @test_quadmask_vgpr_i64(i64 %mask) { +; GFX11-LABEL: test_quadmask_vgpr_i64: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: s_quadmask_b64 s[0:1], s[0:1] +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: s_setpc_b64 s[30:31] +entry: + %qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 %mask) + ret i64 %qm +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.ttracedata.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.ttracedata.ll new file mode 100644 index 00000000000000..37b5357950e648 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.ttracedata.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s + +declare void @llvm.amdgcn.s.ttracedata(i32) +declare void @llvm.amdgcn.s.ttracedata.imm(i16) + +define amdgpu_cs void @ttracedata_c() { +; GFX11-LABEL: ttracedata_c: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_mov_b32 m0, 0xf4240 +; GFX11-NEXT: s_ttracedata +; GFX11-NEXT: s_endpgm + call void @llvm.amdgcn.s.ttracedata(i32 1000000) + ret void +} + +define amdgpu_cs void @ttracedata_s(i32 inreg %val) { +; GFX11-LABEL: ttracedata_s: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_mov_b32 m0, s0 +; GFX11-NEXT: s_ttracedata +; GFX11-NEXT: s_endpgm + call void @llvm.amdgcn.s.ttracedata(i32 %val) + ret void +} + +define amdgpu_cs void @ttracedata_v(i32 %val) { +; GFX11-SDAG-LABEL: ttracedata_v: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: s_mov_b32 m0, s0 +; GFX11-SDAG-NEXT: s_ttracedata +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: ttracedata_v: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: v_readfirstlane_b32 m0, v0 +; GFX11-GISEL-NEXT: s_ttracedata +; GFX11-GISEL-NEXT: s_endpgm + call void @llvm.amdgcn.s.ttracedata(i32 %val) + ret void +} + +define amdgpu_cs void @ttracedata_imm() { +; GFX11-LABEL: ttracedata_imm: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_ttracedata_imm 0x3e8 +; GFX11-NEXT: s_endpgm + call void @llvm.amdgcn.s.ttracedata.imm(i16 1000) + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir new file mode 100644 index 00000000000000..d53050167e98be --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir @@ -0,0 +1,462 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPU +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp -amdgpu-print-rp-downward %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPD + + +--- +name: trivial +tracksRegLiveness: true +body: | + ; RP-LABEL: name: trivial + ; RP: bb.0: + ; RP-NEXT: Live-in: + ; RP-NEXT: SGPR VGPR + ; RP-NEXT: 0 0 + ; RP-NEXT: 0 1 %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec + ; RP-NEXT: 0 1 + ; RP-NEXT: 2 1 %1:sgpr_64 = IMPLICIT_DEF + ; RP-NEXT: 2 1 + ; RP-NEXT: Live-out: %0:0000000000000003 %1:000000000000000F + ; RP-NEXT: bb.1: + ; RP-NEXT: Live-in: %0:0000000000000003 %1:000000000000000F + ; RP-NEXT: SGPR VGPR + ; RP-NEXT: 2 1 + ; RP-NEXT: Live-out: %0:0000000000000003 %1:000000000000000F + ; RP-NEXT: bb.2: + ; RP-NEXT: Live-in: %0:0000000000000003 %1:000000000000000F + ; RP-NEXT: SGPR VGPR + ; RP-NEXT: 2 1 + ; RP-NEXT: 2 1 S_NOP 0, implicit %0:vgpr_32, implicit %1:sgpr_64 + ; RP-NEXT: 0 0 + ; RP-NEXT: Live-out: + bb.0: + %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec + %1:sgpr_64 = IMPLICIT_DEF + bb.1: + + bb.2: + S_NOP 0, implicit %0, implicit %1 +... +--- +name: live_through_test +tracksRegLiveness: true +body: | + ; RPU-LABEL: name: live_through_test + ; RPU: bb.0: + ; RPU-NEXT: Live-in: + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 0 0 + ; RPU-NEXT: 3 0 %0:sgpr_128 = IMPLICIT_DEF + ; RPU-NEXT: 3 0 + ; RPU-NEXT: Live-out: %0:00000000000000F3 + ; RPU-NEXT: bb.1: + ; RPU-NEXT: Live-in: %0:00000000000000F3 + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 3 0 + ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128 + ; RPU-NEXT: 2 0 + ; RPU-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF + ; RPU-NEXT: 3 0 + ; RPU-NEXT: 3 0 %0.sub1:sgpr_128 = IMPLICIT_DEF + ; RPU-NEXT: 3 0 + ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 + ; RPU-NEXT: 2 0 + ; RPU-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF + ; RPU-NEXT: 3 0 + ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 + ; RPU-NEXT: 2 0 + ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128 + ; RPU-NEXT: 2 0 + ; RPU-NEXT: Live-out: %0:00000000000000C3 + ; RPU-NEXT: bb.2: + ; RPU-NEXT: Live-in: %0:00000000000000C3 + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 2 0 + ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128 + ; RPU-NEXT: 0 0 + ; RPU-NEXT: Live-out: + ; + ; RPD-LABEL: name: live_through_test + ; RPD: bb.0: + ; RPD-NEXT: Live-in: + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 0 0 + ; RPD-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF + ; RPD-NEXT: 3 0 + ; RPD-NEXT: Live-out: %0:00000000000000F3 + ; RPD-NEXT: bb.1: + ; RPD-NEXT: Live-in: %0:00000000000000F3 + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 3 0 + ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128 + ; RPD-NEXT: 2 0 + ; RPD-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF + ; RPD-NEXT: 3 0 + ; RPD-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF + ; RPD-NEXT: 3 0 + ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 + ; RPD-NEXT: 2 0 + ; RPD-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF + ; RPD-NEXT: 3 0 + ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 + ; RPD-NEXT: 2 0 + ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128 + ; RPD-NEXT: 2 0 + ; RPD-NEXT: Live-out: %0:00000000000000C3 + ; RPD-NEXT: bb.2: + ; RPD-NEXT: Live-in: %0:00000000000000C3 + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 2 0 + ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128 + ; RPD-NEXT: 0 0 + ; RPD-NEXT: Live-out: + bb.0: + %0:sgpr_128 = IMPLICIT_DEF + bb.1: + + S_NOP 0, implicit %0.sub0 ; kill sub0 + %0.sub0 = IMPLICIT_DEF ; redef sub0 + + %0.sub1:sgpr_128 = IMPLICIT_DEF ; redef sub1 + + S_NOP 0, implicit %0.sub2 ; kill sub2 + %0.sub2:sgpr_128 = IMPLICIT_DEF ; redef sub2 + S_NOP 0, implicit %0.sub2 ; kill sub2 + + S_NOP 0, implicit %0.sub3 ; use sub3, live-through + + bb.2: + S_NOP 0, implicit %0.sub3, implicit %0.sub0 +... + +# This testcase shows the problem with LiveIntervals: it doesn't create +# subranges for undefined but used subregisters. Upward tracker is able to see +# the use of undefined subregister and tracks it correctly. +--- +name: upward_problem_lis_subregs_mismatch +tracksRegLiveness: true +body: | + ; RPU-LABEL: name: upward_problem_lis_subregs_mismatch + ; RPU: bb.0: + ; RPU-NEXT: Live-in: + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 0 0 + ; RPU-NEXT: 0 1 undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec + ; RPU-NEXT: 0 1 + ; RPU-NEXT: 0 2 undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec + ; RPU-NEXT: 0 2 + ; RPU-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C + ; RPU-NEXT: bb.1: + ; RPU-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 0 2 + ; RPU-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C + ; RPU-NEXT: bb.2: + ; RPU-NEXT: Live-in: %0:000000000000000F %1:000000000000000F + ; RPU-NEXT: mis LIS: %0:0000000000000003 %1:000000000000000C + ; RPU-NEXT: %0 masks doesn't match: LIS reported 0000000000000003, tracked 000000000000000F + ; RPU-NEXT: %1 masks doesn't match: LIS reported 000000000000000C, tracked 000000000000000F + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 0 4 + ; RPU-NEXT: 0 4 S_NOP 0, implicit %0:vreg_64, implicit %1:vreg_64 + ; RPU-NEXT: 0 0 + ; RPU-NEXT: Live-out: + ; + ; RPD-LABEL: name: upward_problem_lis_subregs_mismatch + ; RPD: bb.0: + ; RPD-NEXT: Live-in: + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 0 0 + ; RPD-NEXT: 0 1 undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec + ; RPD-NEXT: 0 1 + ; RPD-NEXT: 0 2 undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec + ; RPD-NEXT: 0 2 + ; RPD-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C + ; RPD-NEXT: bb.1: + ; RPD-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 0 2 + ; RPD-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C + ; RPD-NEXT: bb.2: + ; RPD-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 0 2 + ; RPD-NEXT: 0 2 S_NOP 0, implicit %0:vreg_64, implicit %1:vreg_64 + ; RPD-NEXT: 0 0 + ; RPD-NEXT: Live-out: + bb.0: + undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec + undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec + + bb.1: + + bb.2: + S_NOP 0, implicit %0, implicit %1 +... +--- +name: only_dbg_value_sched_region +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + waveLimiter: true +body: | + ; RPU-LABEL: name: only_dbg_value_sched_region + ; RPU: bb.0: + ; RPU-NEXT: Live-in: + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 0 0 + ; RPU-NEXT: 0 1 %0:vgpr_32 = COPY $vgpr0 + ; RPU-NEXT: 0 1 + ; RPU-NEXT: 0 3 %1:vreg_64 = IMPLICIT_DEF + ; RPU-NEXT: 0 3 + ; RPU-NEXT: 0 5 %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec + ; RPU-NEXT: 0 5 + ; RPU-NEXT: 0 6 %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec + ; RPU-NEXT: 0 6 + ; RPU-NEXT: 0 7 undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec + ; RPU-NEXT: 0 7 + ; RPU-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec + ; RPU-NEXT: 0 8 + ; RPU-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64 + ; RPU-NEXT: 0 9 + ; RPU-NEXT: 0 9 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec + ; RPU-NEXT: 0 8 + ; RPU-NEXT: 0 8 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec + ; RPU-NEXT: 0 7 + ; RPU-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec + ; RPU-NEXT: 0 6 + ; RPU-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF + ; RPU-NEXT: 0 7 + ; RPU-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF + ; RPU-NEXT: 0 9 + ; RPU-NEXT: 0 11 %10:vreg_64 = IMPLICIT_DEF + ; RPU-NEXT: 0 11 + ; RPU-NEXT: 0 12 undef %11.sub1:vreg_64 = IMPLICIT_DEF + ; RPU-NEXT: 0 12 + ; RPU-NEXT: 0 13 %12:vgpr_32 = IMPLICIT_DEF + ; RPU-NEXT: 0 13 + ; RPU-NEXT: 0 14 %13:vgpr_32 = IMPLICIT_DEF + ; RPU-NEXT: 0 14 + ; RPU-NEXT: 0 16 %14:vreg_64 = IMPLICIT_DEF + ; RPU-NEXT: 0 16 + ; RPU-NEXT: 0 18 %15:vreg_64 = IMPLICIT_DEF + ; RPU-NEXT: 0 18 + ; RPU-NEXT: 0 19 %16:vgpr_32 = IMPLICIT_DEF + ; RPU-NEXT: 0 19 + ; RPU-NEXT: 0 20 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; RPU-NEXT: 0 20 + ; RPU-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; RPU-NEXT: 0 21 + ; RPU-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec + ; RPU-NEXT: 0 20 + ; RPU-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec + ; RPU-NEXT: DBG_VALUE + ; RPU-NEXT: 0 20 + ; RPU-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec + ; RPU-NEXT: 0 16 + ; RPU-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec + ; RPU-NEXT: 0 15 + ; RPU-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec + ; RPU-NEXT: 0 14 + ; RPU-NEXT: 0 14 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec + ; RPU-NEXT: DBG_VALUE + ; RPU-NEXT: DBG_VALUE + ; RPU-NEXT: 0 12 + ; RPU-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec + ; RPU-NEXT: 0 10 + ; RPU-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec + ; RPU-NEXT: 0 10 + ; RPU-NEXT: 0 11 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec + ; RPU-NEXT: 0 9 + ; RPU-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32 + ; RPU-NEXT: 0 5 + ; RPU-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec + ; RPU-NEXT: 0 2 + ; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003 + ; RPU-NEXT: bb.1: + ; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003 + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: DBG_VALUE + ; RPU-NEXT: 0 2 + ; RPU-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode + ; RPU-NEXT: DBG_VALUE + ; RPU-NEXT: DBG_VALUE + ; RPU-NEXT: 0 2 + ; RPU-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode + ; RPU-NEXT: DBG_VALUE + ; RPU-NEXT: 0 2 + ; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003 + ; RPU-NEXT: bb.2: + ; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003 + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 0 2 + ; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003 + ; RPU-NEXT: bb.3: + ; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003 + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 0 2 + ; RPU-NEXT: 0 2 S_NOP 0, implicit %0:vgpr_32 + ; RPU-NEXT: 0 1 + ; RPU-NEXT: 0 1 S_NOP 0, implicit %16:vgpr_32 + ; RPU-NEXT: 0 0 + ; RPU-NEXT: 0 0 S_ENDPGM 0 + ; RPU-NEXT: 0 0 + ; RPU-NEXT: Live-out: + ; + ; RPD-LABEL: name: only_dbg_value_sched_region + ; RPD: bb.0: + ; RPD-NEXT: Live-in: + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 0 0 + ; RPD-NEXT: 0 1 %0:vgpr_32 = COPY $vgpr0 + ; RPD-NEXT: 0 1 + ; RPD-NEXT: 0 3 %1:vreg_64 = IMPLICIT_DEF + ; RPD-NEXT: 0 3 + ; RPD-NEXT: 0 5 %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec + ; RPD-NEXT: 0 5 + ; RPD-NEXT: 0 6 %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec + ; RPD-NEXT: 0 6 + ; RPD-NEXT: 0 7 undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec + ; RPD-NEXT: 0 7 + ; RPD-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec + ; RPD-NEXT: 0 8 + ; RPD-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64 + ; RPD-NEXT: 0 9 + ; RPD-NEXT: 0 10 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec + ; RPD-NEXT: 0 8 + ; RPD-NEXT: 0 9 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec + ; RPD-NEXT: 0 7 + ; RPD-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec + ; RPD-NEXT: 0 6 + ; RPD-NEXT: 0 8 %8:vreg_64 = IMPLICIT_DEF + ; RPD-NEXT: 0 7 + ; RPD-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF + ; RPD-NEXT: 0 9 + ; RPD-NEXT: 0 11 %10:vreg_64 = IMPLICIT_DEF + ; RPD-NEXT: 0 11 + ; RPD-NEXT: 0 12 undef %11.sub1:vreg_64 = IMPLICIT_DEF + ; RPD-NEXT: 0 12 + ; RPD-NEXT: 0 13 %12:vgpr_32 = IMPLICIT_DEF + ; RPD-NEXT: 0 13 + ; RPD-NEXT: 0 14 %13:vgpr_32 = IMPLICIT_DEF + ; RPD-NEXT: 0 14 + ; RPD-NEXT: 0 16 %14:vreg_64 = IMPLICIT_DEF + ; RPD-NEXT: 0 16 + ; RPD-NEXT: 0 18 %15:vreg_64 = IMPLICIT_DEF + ; RPD-NEXT: 0 18 + ; RPD-NEXT: 0 19 %16:vgpr_32 = IMPLICIT_DEF + ; RPD-NEXT: 0 19 + ; RPD-NEXT: 0 20 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; RPD-NEXT: 0 20 + ; RPD-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; RPD-NEXT: 0 21 + ; RPD-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec + ; RPD-NEXT: 0 20 + ; RPD-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec + ; RPD-NEXT: DBG_VALUE + ; RPD-NEXT: 0 20 + ; RPD-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec + ; RPD-NEXT: 0 16 + ; RPD-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec + ; RPD-NEXT: 0 15 + ; RPD-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec + ; RPD-NEXT: 0 14 + ; RPD-NEXT: 0 15 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec + ; RPD-NEXT: DBG_VALUE + ; RPD-NEXT: DBG_VALUE + ; RPD-NEXT: 0 12 + ; RPD-NEXT: 0 13 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec + ; RPD-NEXT: 0 10 + ; RPD-NEXT: 0 11 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec + ; RPD-NEXT: 0 10 + ; RPD-NEXT: 0 12 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec + ; RPD-NEXT: 0 9 + ; RPD-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32 + ; RPD-NEXT: 0 5 + ; RPD-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec + ; RPD-NEXT: 0 2 + ; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003 + ; RPD-NEXT: bb.1: + ; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003 + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: DBG_VALUE + ; RPD-NEXT: 0 2 + ; RPD-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode + ; RPD-NEXT: DBG_VALUE + ; RPD-NEXT: DBG_VALUE + ; RPD-NEXT: 0 2 + ; RPD-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode + ; RPD-NEXT: DBG_VALUE + ; RPD-NEXT: 0 2 + ; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003 + ; RPD-NEXT: bb.2: + ; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003 + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 0 2 + ; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003 + ; RPD-NEXT: bb.3: + ; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003 + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 0 2 + ; RPD-NEXT: 0 2 S_NOP 0, implicit %0:vgpr_32 + ; RPD-NEXT: 0 1 + ; RPD-NEXT: 0 1 S_NOP 0, implicit %16:vgpr_32 + ; RPD-NEXT: 0 0 + ; RPD-NEXT: 0 0 S_ENDPGM 0 + ; RPD-NEXT: 0 0 + ; RPD-NEXT: Live-out: + bb.0: + liveins: $vgpr0 + + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64 = IMPLICIT_DEF + %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1, 0, 0, implicit $exec + %3:vgpr_32 = GLOBAL_LOAD_DWORD %1, 8, 0, implicit $exec + undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0, %0, implicit $exec + %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec + %5:vreg_64 = COPY %2 + undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0, %5.sub0, implicit $mode, implicit $exec + %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1, %5.sub0, implicit $mode, implicit $exec + %7:vgpr_32 = GLOBAL_LOAD_DWORD %5, 0, 0, implicit $exec + %8:vreg_64 = IMPLICIT_DEF + %9:vreg_64 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + undef %11.sub1:vreg_64 = IMPLICIT_DEF + %12:vgpr_32 = IMPLICIT_DEF + %13:vgpr_32 = IMPLICIT_DEF + %14:vreg_64 = IMPLICIT_DEF + %15:vreg_64 = IMPLICIT_DEF + %16:vgpr_32 = IMPLICIT_DEF + %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7, %2.sub0, implicit $mode, implicit $exec + %19.sub1:vreg_64 = V_ADD_F32_e32 %3, %3, implicit $mode, implicit $exec + DBG_VALUE + GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, implicit $exec + %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9, 0, 0, implicit $exec + %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10, 0, 0, implicit $exec + %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, implicit $exec + DBG_VALUE + DBG_VALUE + %21:vgpr_32 = GLOBAL_LOAD_DWORD %14, 0, 0, implicit $exec + %22:vgpr_32 = GLOBAL_LOAD_DWORD %15, 0, 0, implicit $exec + %23:vreg_64 = V_LSHLREV_B64_e64 2, %8, implicit $exec + S_NOP 0, implicit %13, implicit %23.sub0, implicit %12, implicit %17 + GLOBAL_STORE_DWORD %15, %18, 0, 0, implicit $exec + + bb.1: + DBG_VALUE + S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode + DBG_VALUE + DBG_VALUE + S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode + DBG_VALUE + + bb.3: + + bb.2: + S_NOP 0, implicit %0 + S_NOP 0, implicit %16 + S_ENDPGM 0 +... + diff --git a/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx-set-kill.mir b/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx-set-kill.mir new file mode 100644 index 00000000000000..aef7adcf72b53a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx-set-kill.mir @@ -0,0 +1,39 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX1100 %s + +--- + +name: vcmp_saveexec_to_vcmpx_set_kill +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr43, $sgpr44, $sgpr45, $sgpr55, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $vgpr40, $vgpr41, $vgpr76, $vgpr77, $vgpr78, $vgpr95, $vgpr109, $vgpr110, $vgpr111, $sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000000C, $sgpr52_sgpr53_sgpr54_sgpr55:0x0000000000000003, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $vgpr92_vgpr93_vgpr94_vgpr95:0x000000000000003F, $vgpr104_vgpr105_vgpr106_vgpr107:0x000000000000003F, $vgpr46_vgpr47:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000C, $vgpr72_vgpr73:0x000000000000000F, $vgpr74_vgpr75:0x000000000000000F, $vgpr88_vgpr89:0x000000000000000C, $vgpr90_vgpr91:0x0000000000000003, $vgpr124_vgpr125:0x000000000000000F, $vgpr126_vgpr127:0x000000000000000F + + ; GFX1100-LABEL: name: vcmp_saveexec_to_vcmpx_set_kill + ; GFX1100: liveins: $sgpr43, $sgpr44, $sgpr45, $sgpr55, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $vgpr40, $vgpr41, $vgpr76, $vgpr77, $vgpr78, $vgpr95, $vgpr109, $vgpr110, $vgpr111, $sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000000C, $sgpr52_sgpr53_sgpr54_sgpr55:0x0000000000000003, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $vgpr92_vgpr93_vgpr94_vgpr95:0x000000000000003F, $vgpr104_vgpr105_vgpr106_vgpr107:0x000000000000003F, $vgpr46_vgpr47:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000C, $vgpr72_vgpr73:0x000000000000000F, $vgpr74_vgpr75:0x000000000000000F, $vgpr88_vgpr89:0x000000000000000C, $vgpr90_vgpr91:0x0000000000000003, $vgpr124_vgpr125:0x000000000000000F, $vgpr126_vgpr127:0x000000000000000F + ; GFX1100-NEXT: {{ $}} + ; GFX1100-NEXT: renamable $vgpr0 = V_AND_B32_e32 128, $vgpr90, implicit $exec + ; GFX1100-NEXT: renamable $vgpr1 = V_AND_B32_e32 128, $vgpr89, implicit $exec + ; GFX1100-NEXT: renamable $sgpr4 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec + ; GFX1100-NEXT: renamable $sgpr0 = S_MOV_B32 0 + ; GFX1100-NEXT: renamable $sgpr1 = COPY renamable $sgpr0 + ; GFX1100-NEXT: renamable $sgpr2 = COPY renamable $sgpr0 + ; GFX1100-NEXT: renamable $sgpr3 = COPY renamable $sgpr0 + ; GFX1100-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact $vgpr0_vgpr1, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 344, 1, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 8) + ; GFX1100-NEXT: renamable $sgpr68 = COPY renamable $sgpr66 + ; GFX1100-NEXT: $sgpr5 = S_MOV_B32 $exec_lo + ; GFX1100-NEXT: V_CMPX_EQ_U32_nosdst_e64 $vgpr0, 0, implicit-def $exec, implicit $exec + renamable $vgpr0 = V_AND_B32_e32 128, $vgpr90, implicit $exec + renamable $vgpr1 = V_AND_B32_e32 128, $vgpr89, implicit $exec + renamable $vcc_lo = V_CMP_EQ_U32_e64 $vgpr0, 0, implicit $exec + renamable $sgpr4 = V_CMP_NE_U32_e64 0, killed $vgpr0, implicit $exec + renamable $sgpr0 = S_MOV_B32 0 + renamable $sgpr1 = COPY renamable $sgpr0 + renamable $sgpr2 = COPY renamable $sgpr0 + renamable $sgpr3 = COPY renamable $sgpr0 + BUFFER_STORE_DWORDX2_OFFSET_exact killed $vgpr0_vgpr1, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 344, 1, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 8) + renamable $sgpr68 = COPY renamable $sgpr66 + renamable $sgpr5 = COPY $exec_lo, implicit-def $exec_lo + renamable $sgpr6 = S_AND_B32 renamable $sgpr5, killed renamable $vcc_lo, implicit-def dead $scc + $exec_lo = S_MOV_B32_term killed renamable $sgpr6 +... diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll index e8baa373c30c07..8c546b57a78a1f 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll @@ -1,10 +1,92 @@ -; RUN: llc -O0 -march=amdgcn -mcpu=gfx900 -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-O0 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-O3 %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -O0 -march=amdgcn -mcpu=gfx900 -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-O0 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-O3 %s ; NOTE: llvm.amdgcn.wwm is deprecated, use llvm.amdgcn.strict.wwm instead. -; GFX9-LABEL: {{^}}no_cfg: define amdgpu_cs void @no_cfg(ptr addrspace(8) inreg %tmp14) { +; GFX9-O0-LABEL: no_cfg: +; GFX9-O0: ; %bb.0: +; GFX9-O0-NEXT: s_mov_b32 s6, s2 +; GFX9-O0-NEXT: s_mov_b32 s4, s0 +; GFX9-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 +; GFX9-O0-NEXT: s_mov_b32 s7, s3 +; GFX9-O0-NEXT: s_mov_b32 s8, s7 +; GFX9-O0-NEXT: s_mov_b32 s9, s6 +; GFX9-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 +; GFX9-O0-NEXT: s_mov_b32 s5, s1 +; GFX9-O0-NEXT: s_mov_b32 s10, s5 +; GFX9-O0-NEXT: s_mov_b32 s0, s4 +; GFX9-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 +; GFX9-O0-NEXT: s_mov_b32 s1, s10 +; GFX9-O0-NEXT: s_mov_b32 s2, s9 +; GFX9-O0-NEXT: s_mov_b32 s3, s8 +; GFX9-O0-NEXT: s_mov_b32 s4, 0 +; GFX9-O0-NEXT: buffer_load_dwordx2 v[5:6], off, s[0:3], s4 +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s4 +; GFX9-O0-NEXT: s_nop 1 +; GFX9-O0-NEXT: v_mov_b32_dpp v2, v0 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-O0-NEXT: v_add_u32_e64 v0, v0, v2 +; GFX9-O0-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-O0-NEXT: s_nop 1 +; GFX9-O0-NEXT: v_mov_b32_dpp v0, v1 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-O0-NEXT: v_add_u32_e64 v0, v1, v0 +; GFX9-O0-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v3, v4 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[6:7] +; GFX9-O0-NEXT: s_mov_b32 s5, 1 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s5, v3 +; GFX9-O0-NEXT: s_mov_b32 s5, 2 +; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s5 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s4 offset:4 +; GFX9-O0-NEXT: s_endpgm +; +; GFX9-O3-LABEL: no_cfg: +; GFX9-O3: ; %bb.0: +; GFX9-O3-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-O3-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-O3-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-O3-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O3-NEXT: s_waitcnt vmcnt(0) +; GFX9-O3-NEXT: v_mov_b32_e32 v2, v4 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v3, v5 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v3, 0 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-O3-NEXT: v_mov_b32_dpp v1, v2 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-O3-NEXT: v_mov_b32_dpp v0, v3 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-O3-NEXT: v_add_u32_e32 v1, v2, v1 +; GFX9-O3-NEXT: v_add_u32_e32 v0, v3, v0 +; GFX9-O3-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O3-NEXT: v_mov_b32_e32 v4, v1 +; GFX9-O3-NEXT: v_mov_b32_e32 v5, v0 +; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX9-O3-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX9-O3-NEXT: v_lshlrev_b32_e32 v4, 1, v4 +; GFX9-O3-NEXT: v_and_b32_e32 v4, 2, v4 +; GFX9-O3-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:4 +; GFX9-O3-NEXT: s_endpgm %tmp100 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %tmp14, i32 0, i32 0, i32 0) %tmp101 = bitcast <2 x float> %tmp100 to <2 x i32> %tmp102 = extractelement <2 x i32> %tmp101, i32 0 @@ -12,26 +94,15 @@ define amdgpu_cs void @no_cfg(ptr addrspace(8) inreg %tmp14) { %tmp105 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %tmp102, i32 0) %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %tmp103, i32 0) -; GFX9: s_or_saveexec_b64 s[{{[0-9]+}}:{{[0-9]+}}], -1 -; GFX9-DAG: v_mov_b32_dpp v[[FIRST_MOV:[0-9]+]], v{{[0-9]+}} row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-O3-DAG: v_add_u32_e32 v[[FIRST_ADD:[0-9]+]], v{{[0-9]+}}, v[[FIRST_MOV]] -; GFX9-O0-DAG: v_add_u32_e64 v[[FIRST_ADD:[0-9]+]], v{{[0-9]+}}, v[[FIRST_MOV]] -; GFX9-DAG: v_mov_b32_e32 v[[FIRST:[0-9]+]], v[[FIRST_ADD]] %tmp120 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp105, i32 323, i32 12, i32 15, i1 false) %tmp121 = add i32 %tmp105, %tmp120 %tmp122 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp121) -; GFX9-DAG: v_mov_b32_dpp v[[SECOND_MOV:[0-9]+]], v{{[0-9]+}} row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-O3-DAG: v_add_u32_e32 v[[SECOND_ADD:[0-9]+]], v{{[0-9]+}}, v[[SECOND_MOV]] -; GFX9-O0-DAG: v_add_u32_e64 v[[SECOND_ADD:[0-9]+]], v{{[0-9]+}}, v[[SECOND_MOV]] -; GFX9-DAG: v_mov_b32_e32 v[[SECOND:[0-9]+]], v[[SECOND_ADD]] %tmp135 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp107, i32 323, i32 12, i32 15, i1 false) %tmp136 = add i32 %tmp107, %tmp135 %tmp137 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp136) -; GFX9-O3: v_cmp_eq_u32_e32 vcc, v[[FIRST]], v[[SECOND]] -; GFX9-O0: v_cmp_eq_u32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[FIRST]], v[[SECOND]] %tmp138 = icmp eq i32 %tmp122, %tmp137 %tmp139 = sext i1 %tmp138 to i32 %tmp140 = shl nsw i32 %tmp139, 1 @@ -41,19 +112,171 @@ define amdgpu_cs void @no_cfg(ptr addrspace(8) inreg %tmp14) { ret void } -; GFX9-LABEL: {{^}}cfg: define amdgpu_cs void @cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) { +; GFX9-O0-LABEL: cfg: +; GFX9-O0: ; %bb.0: ; %entry +; GFX9-O0-NEXT: s_mov_b32 s16, SCRATCH_RSRC_DWORD0 +; GFX9-O0-NEXT: s_mov_b32 s17, SCRATCH_RSRC_DWORD1 +; GFX9-O0-NEXT: s_mov_b32 s18, -1 +; GFX9-O0-NEXT: s_mov_b32 s19, 0xe00000 +; GFX9-O0-NEXT: s_add_u32 s16, s16, s4 +; GFX9-O0-NEXT: s_addc_u32 s17, s17, 0 +; GFX9-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_writelane_b32 v0, s3, 0 +; GFX9-O0-NEXT: s_mov_b32 s4, s1 +; GFX9-O0-NEXT: v_readlane_b32 s1, v0, 0 +; GFX9-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 +; GFX9-O0-NEXT: s_mov_b32 s3, s1 +; GFX9-O0-NEXT: s_mov_b32 s8, s3 +; GFX9-O0-NEXT: s_mov_b32 s9, s2 +; GFX9-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 +; GFX9-O0-NEXT: s_mov_b32 s1, s4 +; GFX9-O0-NEXT: s_mov_b32 s10, s1 +; GFX9-O0-NEXT: s_mov_b32 s4, s0 +; GFX9-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 +; GFX9-O0-NEXT: s_mov_b32 s5, s10 +; GFX9-O0-NEXT: s_mov_b32 s6, s9 +; GFX9-O0-NEXT: s_mov_b32 s7, s8 +; GFX9-O0-NEXT: v_writelane_b32 v0, s2, 1 +; GFX9-O0-NEXT: v_writelane_b32 v0, s3, 2 +; GFX9-O0-NEXT: v_writelane_b32 v0, s0, 3 +; GFX9-O0-NEXT: v_writelane_b32 v0, s1, 4 +; GFX9-O0-NEXT: s_mov_b32 s0, 0 +; GFX9-O0-NEXT: s_nop 2 +; GFX9-O0-NEXT: buffer_load_dwordx2 v[4:5], off, s[4:7], s0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[16:19], 0 offset:20 ; 4-byte Folded Spill +; GFX9-O0-NEXT: ; implicit-def: $sgpr2_sgpr3 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: s_or_saveexec_b64 s[2:3], -1 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-O0-NEXT: s_nop 1 +; GFX9-O0-NEXT: v_mov_b32_dpp v2, v1 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2 +; GFX9-O0-NEXT: s_mov_b64 exec, s[2:3] +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v3, s0 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, s0 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_mov_b64 s[0:1], exec +; GFX9-O0-NEXT: v_writelane_b32 v0, s0, 5 +; GFX9-O0-NEXT: v_writelane_b32 v0, s1, 6 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13] +; GFX9-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] +; GFX9-O0-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-O0-NEXT: s_cbranch_execz .LBB1_2 +; GFX9-O0-NEXT: ; %bb.1: ; %if +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:16 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v4, off, s[16:19], 0 offset:20 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-O0-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX9-O0-NEXT: v_mov_b32_dpp v1, v2 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-O0-NEXT: v_add_u32_e64 v1, v2, v1 +; GFX9-O0-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill +; GFX9-O0-NEXT: .LBB1_2: ; %merge +; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s4, v0, 5 +; GFX9-O0-NEXT: v_readlane_b32 s5, v0, 6 +; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-O0-NEXT: v_readlane_b32 s2, v0, 1 +; GFX9-O0-NEXT: v_readlane_b32 s3, v0, 2 +; GFX9-O0-NEXT: v_readlane_b32 s0, v0, 3 +; GFX9-O0-NEXT: v_readlane_b32 s1, v0, 4 +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:12 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v4, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v4 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9-O0-NEXT: s_mov_b32 s4, 1 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s4, v3 +; GFX9-O0-NEXT: s_mov_b32 s4, 2 +; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s4 +; GFX9-O0-NEXT: s_mov_b32 s6, s1 +; GFX9-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 +; GFX9-O0-NEXT: s_mov_b32 s4, s3 +; GFX9-O0-NEXT: s_mov_b32 s5, s2 +; GFX9-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 +; GFX9-O0-NEXT: s_mov_b32 s1, s6 +; GFX9-O0-NEXT: s_mov_b32 s2, s5 +; GFX9-O0-NEXT: s_mov_b32 s3, s4 +; GFX9-O0-NEXT: s_mov_b32 s4, 0 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s4 offset:4 +; GFX9-O0-NEXT: ; kill: killed $vgpr0 +; GFX9-O0-NEXT: s_endpgm +; +; GFX9-O3-LABEL: cfg: +; GFX9-O3: ; %bb.0: ; %entry +; GFX9-O3-NEXT: buffer_load_dwordx2 v[3:4], off, s[0:3], 0 +; GFX9-O3-NEXT: v_mov_b32_e32 v5, 0 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-O3-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-O3-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O3-NEXT: s_waitcnt vmcnt(0) +; GFX9-O3-NEXT: v_mov_b32_e32 v2, v3 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-O3-NEXT: v_mov_b32_dpp v1, v2 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-O3-NEXT: v_add_u32_e32 v1, v2, v1 +; GFX9-O3-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O3-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-O3-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX9-O3-NEXT: s_cbranch_execz .LBB1_2 +; GFX9-O3-NEXT: ; %bb.1: ; %if +; GFX9-O3-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-O3-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-O3-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-O3-NEXT: v_mov_b32_e32 v2, v4 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-O3-NEXT: v_mov_b32_dpp v1, v2 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-O3-NEXT: v_add_u32_e32 v1, v2, v1 +; GFX9-O3-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-O3-NEXT: v_mov_b32_e32 v5, v1 +; GFX9-O3-NEXT: .LBB1_2: ; %merge +; GFX9-O3-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX9-O3-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-O3-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX9-O3-NEXT: v_and_b32_e32 v0, 2, v0 +; GFX9-O3-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 +; GFX9-O3-NEXT: s_endpgm entry: %tmp100 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %tmp14, i32 0, i32 0, i32 0) %tmp101 = bitcast <2 x float> %tmp100 to <2 x i32> %tmp102 = extractelement <2 x i32> %tmp101, i32 0 %tmp105 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %tmp102, i32 0) -; GFX9: v_mov_b32_dpp v[[FIRST_MOV:[0-9]+]], v{{[0-9]+}} row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-O3: v_add_u32_e32 v[[FIRST_ADD:[0-9]+]], v{{[0-9]+}}, v[[FIRST_MOV]] -; GFX9-O0: v_add_u32_e64 v[[FIRST_ADD:[0-9]+]], v{{[0-9]+}}, v[[FIRST_MOV]] -; GFX9: v_mov_b32_e32 v[[FIRST:[0-9]+]], v[[FIRST_ADD]] -; GFX9-O0: buffer_store_dword v[[FIRST]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:[[FIRST_IMM_OFFSET:[0-9]+]] %tmp120 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp105, i32 323, i32 12, i32 15, i1 false) %tmp121 = add i32 %tmp105, %tmp120 %tmp122 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp121) @@ -64,11 +287,6 @@ if: %tmp103 = extractelement <2 x i32> %tmp101, i32 1 %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %tmp103, i32 0) -; GFX9: v_mov_b32_dpp v[[SECOND_MOV:[0-9]+]], v{{[0-9]+}} row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-O3: v_add_u32_e32 v[[SECOND_ADD:[0-9]+]], v{{[0-9]+}}, v[[SECOND_MOV]] -; GFX9-O0: v_add_u32_e64 v[[SECOND_ADD:[0-9]+]], v{{[0-9]+}}, v[[SECOND_MOV]] -; GFX9: v_mov_b32_e32 v[[SECOND:[0-9]+]], v[[SECOND_ADD]] -; GFX9-O0: buffer_store_dword v[[SECOND]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:[[SECOND_IMM_OFFSET:[0-9]+]] %tmp135 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp107, i32 323, i32 12, i32 15, i1 false) %tmp136 = add i32 %tmp107, %tmp135 %tmp137 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp136) @@ -76,10 +294,6 @@ if: merge: %merge_value = phi i32 [ 0, %entry ], [%tmp137, %if ] -; GFX9-O3: v_cmp_eq_u32_e32 vcc, v[[FIRST]], v[[SECOND]] -; GFX9-O0: buffer_load_dword v[[FIRST:[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:[[FIRST_IMM_OFFSET]] -; GFX9-O0: buffer_load_dword v[[SECOND:[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:[[SECOND_IMM_OFFSET]] -; GFX9-O0: v_cmp_eq_u32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[FIRST]], v[[SECOND]] %tmp138 = icmp eq i32 %tmp122, %merge_value %tmp139 = sext i1 %tmp138 to i32 %tmp140 = shl nsw i32 %tmp139, 1 @@ -89,92 +303,555 @@ merge: ret void } -; GFX9-LABEL: {{^}}called: define hidden i32 @called(i32 %a) noinline { -; GFX9-O3: v_add_u32_e32 v1, v0, v0 -; GFX9-O0: v_add_u32_e64 v1, v0, v0 +; GFX9-O0-LABEL: called: +; GFX9-O0: ; %bb.0: +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-O0-NEXT: v_add_u32_e64 v1, v0, v0 +; GFX9-O0-NEXT: v_mul_lo_u32 v0, v1, v0 +; GFX9-O0-NEXT: v_sub_u32_e64 v0, v0, v1 +; GFX9-O0-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-O3-LABEL: called: +; GFX9-O3: ; %bb.0: +; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-O3-NEXT: v_add_u32_e32 v1, v0, v0 +; GFX9-O3-NEXT: v_mul_lo_u32 v0, v1, v0 +; GFX9-O3-NEXT: v_sub_u32_e32 v0, v0, v1 +; GFX9-O3-NEXT: s_setpc_b64 s[30:31] %add = add i32 %a, %a -; GFX9: v_mul_lo_u32 v0, v1, v0 %mul = mul i32 %add, %a -; GFX9-O3: v_sub_u32_e32 v0, v0, v1 -; GFX9-O0: v_sub_u32_e64 v0, v0, v1 %sub = sub i32 %mul, %add ret i32 %sub } -; GFX9-LABEL: {{^}}call: define amdgpu_kernel void @call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) { -; GFX9-DAG: s_load_dword [[ARG:s[0-9]+]] -; GFX9-O0-DAG: s_mov_b32 s3, 0{{$}} -; GFX9-O0-DAG: v_mov_b32_e32 v{{[0-9]+}}, [[ARG]] +; GFX9-O0-LABEL: call: +; GFX9-O0: ; %bb.0: +; GFX9-O0-NEXT: s_mov_b32 s32, 0x400 +; GFX9-O0-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0 +; GFX9-O0-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1 +; GFX9-O0-NEXT: s_mov_b32 s26, -1 +; GFX9-O0-NEXT: s_mov_b32 s27, 0xe00000 +; GFX9-O0-NEXT: s_add_u32 s24, s24, s9 +; GFX9-O0-NEXT: s_addc_u32 s25, s25, 0 +; GFX9-O0-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane +; GFX9-O0-NEXT: s_mov_b32 s14, s8 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 +; GFX9-O0-NEXT: v_writelane_b32 v7, s8, 0 +; GFX9-O0-NEXT: v_writelane_b32 v7, s9, 1 +; GFX9-O0-NEXT: s_mov_b32 s13, s7 +; GFX9-O0-NEXT: s_mov_b32 s12, s6 +; GFX9-O0-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[2:3] +; GFX9-O0-NEXT: v_readlane_b32 s2, v7, 0 +; GFX9-O0-NEXT: v_readlane_b32 s3, v7, 1 +; GFX9-O0-NEXT: v_writelane_b32 v7, s4, 2 +; GFX9-O0-NEXT: v_writelane_b32 v7, s5, 3 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[0:1] +; GFX9-O0-NEXT: v_readlane_b32 s0, v7, 2 +; GFX9-O0-NEXT: v_readlane_b32 s1, v7, 3 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: s_mov_b64 exec, s[2:3] +; GFX9-O0-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 +; GFX9-O0-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x2c +; GFX9-O0-NEXT: s_load_dword s2, s[0:1], 0x34 +; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-O0-NEXT: s_mov_b32 s3, s7 +; GFX9-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 killed $sgpr6_sgpr7 +; GFX9-O0-NEXT: s_mov_b32 s7, s9 +; GFX9-O0-NEXT: s_mov_b32 s16, s8 +; GFX9-O0-NEXT: ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17_sgpr18_sgpr19 +; GFX9-O0-NEXT: s_mov_b32 s17, s7 +; GFX9-O0-NEXT: s_mov_b32 s18, s6 +; GFX9-O0-NEXT: s_mov_b32 s19, s3 +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_writelane_b32 v0, s16, 4 +; GFX9-O0-NEXT: v_writelane_b32 v0, s17, 5 +; GFX9-O0-NEXT: v_writelane_b32 v0, s18, 6 +; GFX9-O0-NEXT: v_writelane_b32 v0, s19, 7 +; GFX9-O0-NEXT: s_mov_b32 s3, 0 +; GFX9-O0-NEXT: v_writelane_b32 v0, s3, 8 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, s2 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, s3 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: s_or_saveexec_b64 s[2:3], -1 +; GFX9-O0-NEXT: v_writelane_b32 v0, s2, 9 +; GFX9-O0-NEXT: v_writelane_b32 v0, s3, 10 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: s_mov_b64 s[6:7], 56 +; GFX9-O0-NEXT: s_mov_b32 s2, s0 +; GFX9-O0-NEXT: s_mov_b32 s0, s1 +; GFX9-O0-NEXT: s_mov_b32 s3, s6 +; GFX9-O0-NEXT: s_mov_b32 s1, s7 +; GFX9-O0-NEXT: s_add_u32 s8, s2, s3 +; GFX9-O0-NEXT: s_addc_u32 s0, s0, s1 +; GFX9-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 +; GFX9-O0-NEXT: s_mov_b32 s9, s0 +; GFX9-O0-NEXT: s_getpc_b64 s[16:17] +; GFX9-O0-NEXT: s_add_u32 s16, s16, called@rel32@lo+4 +; GFX9-O0-NEXT: s_addc_u32 s17, s17, called@rel32@hi+12 +; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[24:25] +; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[26:27] +; GFX9-O0-NEXT: s_mov_b32 s6, 20 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s6, v3 +; GFX9-O0-NEXT: s_mov_b32 s6, 10 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v4, s6, v4 +; GFX9-O0-NEXT: v_or3_b32 v3, v5, v4, v3 +; GFX9-O0-NEXT: ; implicit-def: $sgpr6_sgpr7 +; GFX9-O0-NEXT: ; implicit-def: $sgpr15 +; GFX9-O0-NEXT: v_mov_b32_e32 v31, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6 +; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s0, v1, 4 +; GFX9-O0-NEXT: v_readlane_b32 s1, v1, 5 +; GFX9-O0-NEXT: v_readlane_b32 s2, v1, 6 +; GFX9-O0-NEXT: v_readlane_b32 s3, v1, 7 +; GFX9-O0-NEXT: v_readlane_b32 s6, v1, 9 +; GFX9-O0-NEXT: v_readlane_b32 s7, v1, 10 +; GFX9-O0-NEXT: v_readlane_b32 s4, v1, 8 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: v_add_u32_e64 v3, v3, v6 +; GFX9-O0-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s4 offset:4 +; GFX9-O0-NEXT: ; kill: killed $vgpr0 +; GFX9-O0-NEXT: s_endpgm +; +; GFX9-O3-LABEL: call: +; GFX9-O3: ; %bb.0: +; GFX9-O3-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; GFX9-O3-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GFX9-O3-NEXT: s_mov_b32 s14, -1 +; GFX9-O3-NEXT: s_load_dword s2, s[0:1], 0x34 +; GFX9-O3-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; GFX9-O3-NEXT: s_mov_b32 s15, 0xe00000 +; GFX9-O3-NEXT: s_add_u32 s12, s12, s3 +; GFX9-O3-NEXT: s_addc_u32 s13, s13, 0 +; GFX9-O3-NEXT: s_mov_b32 s32, 0 +; GFX9-O3-NEXT: s_getpc_b64 s[8:9] +; GFX9-O3-NEXT: s_add_u32 s8, s8, called@rel32@lo+4 +; GFX9-O3-NEXT: s_addc_u32 s9, s9, called@rel32@hi+12 +; GFX9-O3-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-O3-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: s_or_saveexec_b64 s[10:11], -1 +; GFX9-O3-NEXT: s_mov_b64 s[0:1], s[12:13] +; GFX9-O3-NEXT: s_mov_b64 s[2:3], s[14:15] +; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GFX9-O3-NEXT: v_mov_b32_e32 v1, v0 +; GFX9-O3-NEXT: v_add_u32_e32 v1, v1, v2 +; GFX9-O3-NEXT: s_mov_b64 exec, s[10:11] +; GFX9-O3-NEXT: v_mov_b32_e32 v0, v1 +; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4 +; GFX9-O3-NEXT: s_endpgm -; GFX9-O3: v_mov_b32_e32 v2, [[ARG]] -; GFX9-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, s3 -; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_not_b64 exec, exec %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0) -; GFX9-O0: v_mov_b32_e32 v0, v6 -; GFX9-O3: v_mov_b32_e32 v0, v2 -; GFX9: s_swappc_b64 %tmp134 = call i32 @called(i32 %tmp107) -; GFX9-O3: v_mov_b32_e32 v1, v0 -; GFX9-O3: v_add_u32_e32 v1, v1, v2 -; GFX9-O0: v_mov_b32_e32 v3, v0 -; GFX9-O0: v_add_u32_e64 v3, v3, v6 %tmp136 = add i32 %tmp134, %tmp107 %tmp137 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp136) -; GFX9-O0: buffer_store_dword v1 -; GFX9-O3: buffer_store_dword v0 call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %tmp137, ptr addrspace(8) %tmp14, i32 4, i32 0, i32 0) ret void } -; GFX9-LABEL: {{^}}called_i64: define i64 @called_i64(i64 %a) noinline { +; GFX9-O0-LABEL: called_i64: +; GFX9-O0: ; %bb.0: +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 +; GFX9-O0-NEXT: ; implicit-def: $sgpr4 +; GFX9-O0-NEXT: ; implicit-def: $sgpr4 +; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-O0-NEXT: v_add_co_u32_e64 v4, s[4:5], v4, v5 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v0, s[4:5], v0, v1, s[4:5] +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 +; GFX9-O0-NEXT: s_mov_b32 s4, 32 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 +; GFX9-O0-NEXT: v_mul_lo_u32 v1, v0, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 +; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], s4, v[4:5] +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v6 +; GFX9-O0-NEXT: v_mul_lo_u32 v2, v2, v3 +; GFX9-O0-NEXT: v_mad_u64_u32 v[6:7], s[6:7], v0, v3, 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7 +; GFX9-O0-NEXT: v_add3_u32 v0, v0, v1, v2 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: ; implicit-def: $sgpr6 +; GFX9-O0-NEXT: ; implicit-def: $sgpr6 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5 +; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 +; GFX9-O0-NEXT: v_lshlrev_b64 v[1:2], s4, v[0:1] +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: s_mov_b32 s5, 0 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7 +; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 +; GFX9-O0-NEXT: v_or_b32_e64 v6, v1, v2 +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 +; GFX9-O0-NEXT: v_sub_co_u32_e64 v1, s[6:7], v1, v3 +; GFX9-O0-NEXT: v_subb_co_u32_e64 v0, s[6:7], v0, v2, s[6:7] +; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 +; GFX9-O0-NEXT: v_lshrrev_b64 v[1:2], s4, v[1:2] +; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr1_vgpr2 killed $exec +; GFX9-O0-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-O3-LABEL: called_i64: +; GFX9-O3: ; %bb.0: +; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-O3-NEXT: v_add_co_u32_e32 v2, vcc, v0, v0 +; GFX9-O3-NEXT: v_addc_co_u32_e32 v3, vcc, v1, v1, vcc +; GFX9-O3-NEXT: v_mul_lo_u32 v4, v3, v0 +; GFX9-O3-NEXT: v_mul_lo_u32 v5, v2, v1 +; GFX9-O3-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, 0 +; GFX9-O3-NEXT: v_add3_u32 v1, v1, v5, v4 +; GFX9-O3-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2 +; GFX9-O3-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-O3-NEXT: s_setpc_b64 s[30:31] %add = add i64 %a, %a %mul = mul i64 %add, %a %sub = sub i64 %mul, %add ret i64 %sub } -; GFX9-LABEL: {{^}}call_i64: define amdgpu_kernel void @call_i64(ptr addrspace(8) inreg %tmp14, i64 inreg %arg) { -; GFX9: s_load_dwordx2 s[[[ARG_LO:[0-9]+]]:[[ARG_HI:[0-9]+]]]{{.*}}, 0x34 +; GFX9-O0-LABEL: call_i64: +; GFX9-O0: ; %bb.0: +; GFX9-O0-NEXT: s_mov_b32 s32, 0x400 +; GFX9-O0-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0 +; GFX9-O0-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1 +; GFX9-O0-NEXT: s_mov_b32 s26, -1 +; GFX9-O0-NEXT: s_mov_b32 s27, 0xe00000 +; GFX9-O0-NEXT: s_add_u32 s24, s24, s9 +; GFX9-O0-NEXT: s_addc_u32 s25, s25, 0 +; GFX9-O0-NEXT: ; implicit-def: $vgpr12 : SGPR spill to VGPR lane +; GFX9-O0-NEXT: s_mov_b32 s14, s8 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 +; GFX9-O0-NEXT: v_writelane_b32 v12, s8, 0 +; GFX9-O0-NEXT: v_writelane_b32 v12, s9, 1 +; GFX9-O0-NEXT: s_mov_b32 s13, s7 +; GFX9-O0-NEXT: s_mov_b32 s12, s6 +; GFX9-O0-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[2:3] +; GFX9-O0-NEXT: v_readlane_b32 s2, v12, 0 +; GFX9-O0-NEXT: v_readlane_b32 s3, v12, 1 +; GFX9-O0-NEXT: v_writelane_b32 v12, s4, 2 +; GFX9-O0-NEXT: v_writelane_b32 v12, s5, 3 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_store_dword v12, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[0:1] +; GFX9-O0-NEXT: v_readlane_b32 s0, v12, 2 +; GFX9-O0-NEXT: v_readlane_b32 s1, v12, 3 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: s_mov_b64 exec, s[2:3] +; GFX9-O0-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x24 +; GFX9-O0-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x2c +; GFX9-O0-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-O0-NEXT: s_mov_b32 s6, s9 +; GFX9-O0-NEXT: s_mov_b32 s7, s8 +; GFX9-O0-NEXT: s_mov_b32 s8, s17 +; GFX9-O0-NEXT: ; kill: def $sgpr16 killed $sgpr16 killed $sgpr16_sgpr17 +; GFX9-O0-NEXT: ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17_sgpr18_sgpr19 +; GFX9-O0-NEXT: s_mov_b32 s17, s8 +; GFX9-O0-NEXT: s_mov_b32 s18, s7 +; GFX9-O0-NEXT: s_mov_b32 s19, s6 +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_writelane_b32 v0, s16, 4 +; GFX9-O0-NEXT: v_writelane_b32 v0, s17, 5 +; GFX9-O0-NEXT: v_writelane_b32 v0, s18, 6 +; GFX9-O0-NEXT: v_writelane_b32 v0, s19, 7 +; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, s2 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, s3 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v8, s6 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, s7 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: s_or_saveexec_b64 s[2:3], -1 +; GFX9-O0-NEXT: v_writelane_b32 v0, s2, 8 +; GFX9-O0-NEXT: v_writelane_b32 v0, s3, 9 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: s_mov_b64 s[6:7], 60 +; GFX9-O0-NEXT: s_mov_b32 s2, s0 +; GFX9-O0-NEXT: s_mov_b32 s0, s1 +; GFX9-O0-NEXT: s_mov_b32 s3, s6 +; GFX9-O0-NEXT: s_mov_b32 s1, s7 +; GFX9-O0-NEXT: s_add_u32 s8, s2, s3 +; GFX9-O0-NEXT: s_addc_u32 s0, s0, s1 +; GFX9-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 +; GFX9-O0-NEXT: s_mov_b32 s9, s0 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8 +; GFX9-O0-NEXT: s_mov_b32 s0, 32 +; GFX9-O0-NEXT: ; implicit-def: $sgpr2_sgpr3 +; GFX9-O0-NEXT: v_lshrrev_b64 v[10:11], s0, v[8:9] +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10 +; GFX9-O0-NEXT: s_getpc_b64 s[0:1] +; GFX9-O0-NEXT: s_add_u32 s0, s0, called_i64@gotpcrel32@lo+4 +; GFX9-O0-NEXT: s_addc_u32 s1, s1, called_i64@gotpcrel32@hi+12 +; GFX9-O0-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 +; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[24:25] +; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[26:27] +; GFX9-O0-NEXT: s_mov_b32 s6, 20 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s6, v3 +; GFX9-O0-NEXT: s_mov_b32 s6, 10 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v4, s6, v4 +; GFX9-O0-NEXT: v_or3_b32 v3, v5, v4, v3 +; GFX9-O0-NEXT: ; implicit-def: $sgpr6_sgpr7 +; GFX9-O0-NEXT: ; implicit-def: $sgpr15 +; GFX9-O0-NEXT: v_mov_b32_e32 v31, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7 +; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_load_dword v2, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s0, v2, 4 +; GFX9-O0-NEXT: v_readlane_b32 s1, v2, 5 +; GFX9-O0-NEXT: v_readlane_b32 s2, v2, 6 +; GFX9-O0-NEXT: v_readlane_b32 s3, v2, 7 +; GFX9-O0-NEXT: v_readlane_b32 s4, v2, 8 +; GFX9-O0-NEXT: v_readlane_b32 s5, v2, 9 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 +; GFX9-O0-NEXT: ; implicit-def: $sgpr6 +; GFX9-O0-NEXT: ; implicit-def: $sgpr6 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9 +; GFX9-O0-NEXT: v_add_co_u32_e64 v3, s[6:7], v3, v5 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[6:7], v4, v6, s[6:7] +; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v4 +; GFX9-O0-NEXT: s_mov_b32 s4, 0 +; GFX9-O0-NEXT: buffer_store_dwordx2 v[1:2], off, s[0:3], s4 offset:4 +; GFX9-O0-NEXT: ; kill: killed $vgpr0 +; GFX9-O0-NEXT: s_endpgm +; +; GFX9-O3-LABEL: call_i64: +; GFX9-O3: ; %bb.0: +; GFX9-O3-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; GFX9-O3-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GFX9-O3-NEXT: s_mov_b32 s14, -1 +; GFX9-O3-NEXT: s_mov_b32 s15, 0xe00000 +; GFX9-O3-NEXT: s_add_u32 s12, s12, s3 +; GFX9-O3-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX9-O3-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x24 +; GFX9-O3-NEXT: s_mov_b32 s32, 0 +; GFX9-O3-NEXT: s_addc_u32 s13, s13, 0 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX9-O3-NEXT: s_getpc_b64 s[4:5] +; GFX9-O3-NEXT: s_add_u32 s4, s4, called_i64@gotpcrel32@lo+4 +; GFX9-O3-NEXT: s_addc_u32 s5, s5, called_i64@gotpcrel32@hi+12 +; GFX9-O3-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GFX9-O3-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-O3-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-O3-NEXT: v_mov_b32_e32 v6, s2 +; GFX9-O3-NEXT: v_mov_b32_e32 v7, s3 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v6, 0 +; GFX9-O3-NEXT: v_mov_b32_e32 v7, 0 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-O3-NEXT: s_mov_b64 s[0:1], s[12:13] +; GFX9-O3-NEXT: s_mov_b64 s[2:3], s[14:15] +; GFX9-O3-NEXT: v_mov_b32_e32 v0, v6 +; GFX9-O3-NEXT: v_mov_b32_e32 v1, v7 +; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-O3-NEXT: v_mov_b32_e32 v2, v0 +; GFX9-O3-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-O3-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 +; GFX9-O3-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v7, vcc +; GFX9-O3-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-O3-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-O3-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 offset:4 +; GFX9-O3-NEXT: s_endpgm -; GFX9-O0: s_mov_b64 s[[[ZERO_LO:[0-9]+]]:[[ZERO_HI:[0-9]+]]], 0{{$}} -; GFX9-O0-DAG: v_mov_b32_e32 v9, s[[ARG_HI]] -; GFX9-O0-DAG: v_mov_b32_e32 v8, s[[ARG_LO]] -; GFX9-O3-DAG: v_mov_b32_e32 v7, s[[ARG_HI]] -; GFX9-O3-DAG: v_mov_b32_e32 v6, s[[ARG_LO]] -; GFX9: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v8, s[[ZERO_LO]] -; GFX9-O0-NEXT: v_mov_b32_e32 v9, s[[ZERO_HI]] -; GFX9-O3-NEXT: v_mov_b32_e32 v6, 0 -; GFX9-O3-NEXT: v_mov_b32_e32 v7, 0 -; GFX9-NEXT: s_not_b64 exec, exec %tmp107 = tail call i64 @llvm.amdgcn.set.inactive.i64(i64 %arg, i64 0) -; GFX9: s_swappc_b64 %tmp134 = call i64 @called_i64(i64 %tmp107) %tmp136 = add i64 %tmp134, %tmp107 %tmp137 = tail call i64 @llvm.amdgcn.wwm.i64(i64 %tmp136) %tmp138 = bitcast i64 %tmp137 to <2 x i32> -; GFX9: buffer_store_dwordx2 call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> %tmp138, ptr addrspace(8) %tmp14, i32 4, i32 0, i32 0) ret void } -; GFX9-LABEL: {{^}}_amdgpu_cs_main: define amdgpu_cs void @_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %index) { +; GFX9-O0-LABEL: _amdgpu_cs_main: +; GFX9-O0: ; %bb.0: +; GFX9-O0-NEXT: s_mov_b32 s4, s3 +; GFX9-O0-NEXT: s_mov_b32 s5, s2 +; GFX9-O0-NEXT: s_mov_b32 s6, s1 +; GFX9-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 +; GFX9-O0-NEXT: s_mov_b32 s1, s6 +; GFX9-O0-NEXT: s_mov_b32 s2, s5 +; GFX9-O0-NEXT: s_mov_b32 s3, s4 +; GFX9-O0-NEXT: ; kill: def $sgpr4_sgpr5_sgpr6_sgpr7 killed $sgpr0_sgpr1_sgpr2_sgpr3 +; GFX9-O0-NEXT: s_mov_b32 s4, 5 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v0, s4, v0 +; GFX9-O0-NEXT: s_mov_b32 s4, 0 +; GFX9-O0-NEXT: buffer_load_dwordx4 v[10:13], v0, s[0:3], s4 offen +; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], v0, s[0:3], s4 offen offset:16 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 +; GFX9-O0-NEXT: s_mov_b32 s5, 0x7fffffff +; GFX9-O0-NEXT: s_mov_b32 s6, -1 +; GFX9-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 +; GFX9-O0-NEXT: s_mov_b32 s7, s5 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v6 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s6 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s7 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v13 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v6 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s6 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s7 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2 +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v4 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s6 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s7 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v9 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6_vgpr7_vgpr8 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v9 +; GFX9-O0-NEXT: buffer_store_dwordx4 v[5:8], v0, s[0:3], s4 offen +; GFX9-O0-NEXT: buffer_store_dwordx2 v[3:4], v0, s[0:3], s4 offen offset:16 +; GFX9-O0-NEXT: s_endpgm +; +; GFX9-O3-LABEL: _amdgpu_cs_main: +; GFX9-O3: ; %bb.0: +; GFX9-O3-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX9-O3-NEXT: buffer_load_dwordx4 v[7:10], v0, s[0:3], 0 offen +; GFX9-O3-NEXT: buffer_load_dwordx2 v[11:12], v0, s[0:3], 0 offen offset:16 +; GFX9-O3-NEXT: s_mov_b32 s4, -1 +; GFX9-O3-NEXT: s_brev_b32 s5, -2 +; GFX9-O3-NEXT: s_waitcnt vmcnt(1) +; GFX9-O3-NEXT: v_mov_b32_e32 v1, v7 +; GFX9-O3-NEXT: v_mov_b32_e32 v2, v8 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-O3-NEXT: v_mov_b32_e32 v2, s5 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v3, v9 +; GFX9-O3-NEXT: v_mov_b32_e32 v4, v10 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-O3-NEXT: v_mov_b32_e32 v4, s5 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: s_waitcnt vmcnt(0) +; GFX9-O3-NEXT: v_mov_b32_e32 v5, v11 +; GFX9-O3-NEXT: v_mov_b32_e32 v6, v12 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v5, s4 +; GFX9-O3-NEXT: v_mov_b32_e32 v6, s5 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v7, v1 +; GFX9-O3-NEXT: v_mov_b32_e32 v9, v3 +; GFX9-O3-NEXT: v_mov_b32_e32 v8, v2 +; GFX9-O3-NEXT: v_mov_b32_e32 v10, v4 +; GFX9-O3-NEXT: v_mov_b32_e32 v11, v5 +; GFX9-O3-NEXT: v_mov_b32_e32 v12, v6 +; GFX9-O3-NEXT: buffer_store_dwordx4 v[7:10], v0, s[0:3], 0 offen +; GFX9-O3-NEXT: buffer_store_dwordx2 v[11:12], v0, s[0:3], 0 offen offset:16 +; GFX9-O3-NEXT: s_endpgm %tmp17 = shl i32 %index, 5 -; GFX9: buffer_load_dwordx4 %tmp18 = tail call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %tmp17, i32 0) %.i0.upto1.bc = bitcast <4 x i32> %tmp18 to <2 x i64> %tmp19 = or i32 %tmp17, 16 -; GFX9: buffer_load_dwordx2 %tmp20 = tail call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %tmp19, i32 0) %.i0.upto1.extract = extractelement <2 x i64> %.i0.upto1.bc, i32 0 %tmp22 = tail call i64 @llvm.amdgcn.set.inactive.i64(i64 %.i0.upto1.extract, i64 9223372036854775807) @@ -191,16 +868,95 @@ define amdgpu_cs void @_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %index) { %tmp254 = shufflevector <2 x float> %.cast, <2 x float> %.cast6, <4 x i32> %desc.int = bitcast <4 x i32> %desc to i128 %desc.ptr = inttoptr i128 %desc.int to ptr addrspace(8) -; GFX9: buffer_store_dwordx4 tail call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %tmp254, ptr addrspace(8) %desc.ptr, i32 %tmp17, i32 0, i32 0) - ; GFX9: buffer_store_dwordx2 tail call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %.cast7, ptr addrspace(8) %desc.ptr, i32 %tmp19, i32 0, i32 0) ret void } -; GFX9-LABEL: {{^}}strict_wwm_no_cfg: define amdgpu_cs void @strict_wwm_no_cfg(ptr addrspace(8) inreg %tmp14) { +; GFX9-O0-LABEL: strict_wwm_no_cfg: +; GFX9-O0: ; %bb.0: +; GFX9-O0-NEXT: s_mov_b32 s6, s2 +; GFX9-O0-NEXT: s_mov_b32 s4, s0 +; GFX9-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 +; GFX9-O0-NEXT: s_mov_b32 s7, s3 +; GFX9-O0-NEXT: s_mov_b32 s8, s7 +; GFX9-O0-NEXT: s_mov_b32 s9, s6 +; GFX9-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 +; GFX9-O0-NEXT: s_mov_b32 s5, s1 +; GFX9-O0-NEXT: s_mov_b32 s10, s5 +; GFX9-O0-NEXT: s_mov_b32 s0, s4 +; GFX9-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 +; GFX9-O0-NEXT: s_mov_b32 s1, s10 +; GFX9-O0-NEXT: s_mov_b32 s2, s9 +; GFX9-O0-NEXT: s_mov_b32 s3, s8 +; GFX9-O0-NEXT: s_mov_b32 s4, 0 +; GFX9-O0-NEXT: buffer_load_dwordx2 v[5:6], off, s[0:3], s4 +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s4 +; GFX9-O0-NEXT: s_nop 1 +; GFX9-O0-NEXT: v_mov_b32_dpp v2, v0 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-O0-NEXT: v_add_u32_e64 v0, v0, v2 +; GFX9-O0-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-O0-NEXT: s_nop 1 +; GFX9-O0-NEXT: v_mov_b32_dpp v0, v1 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-O0-NEXT: v_add_u32_e64 v0, v1, v0 +; GFX9-O0-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v3, v4 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[6:7] +; GFX9-O0-NEXT: s_mov_b32 s5, 1 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s5, v3 +; GFX9-O0-NEXT: s_mov_b32 s5, 2 +; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s5 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s4 offset:4 +; GFX9-O0-NEXT: s_endpgm +; +; GFX9-O3-LABEL: strict_wwm_no_cfg: +; GFX9-O3: ; %bb.0: +; GFX9-O3-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-O3-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-O3-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-O3-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O3-NEXT: s_waitcnt vmcnt(0) +; GFX9-O3-NEXT: v_mov_b32_e32 v2, v4 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v3, v5 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v3, 0 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-O3-NEXT: v_mov_b32_dpp v1, v2 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-O3-NEXT: v_mov_b32_dpp v0, v3 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-O3-NEXT: v_add_u32_e32 v1, v2, v1 +; GFX9-O3-NEXT: v_add_u32_e32 v0, v3, v0 +; GFX9-O3-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O3-NEXT: v_mov_b32_e32 v4, v1 +; GFX9-O3-NEXT: v_mov_b32_e32 v5, v0 +; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX9-O3-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX9-O3-NEXT: v_lshlrev_b32_e32 v4, 1, v4 +; GFX9-O3-NEXT: v_and_b32_e32 v4, 2, v4 +; GFX9-O3-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:4 +; GFX9-O3-NEXT: s_endpgm %tmp100 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %tmp14, i32 0, i32 0, i32 0) %tmp101 = bitcast <2 x float> %tmp100 to <2 x i32> %tmp102 = extractelement <2 x i32> %tmp101, i32 0 @@ -208,26 +964,15 @@ define amdgpu_cs void @strict_wwm_no_cfg(ptr addrspace(8) inreg %tmp14) { %tmp105 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %tmp102, i32 0) %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %tmp103, i32 0) -; GFX9: s_or_saveexec_b64 s[{{[0-9]+}}:{{[0-9]+}}], -1 -; GFX9-DAG: v_mov_b32_dpp v[[FIRST_MOV:[0-9]+]], v{{[0-9]+}} row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-O3-DAG: v_add_u32_e32 v[[FIRST_ADD:[0-9]+]], v{{[0-9]+}}, v[[FIRST_MOV]] -; GFX9-O0-DAG: v_add_u32_e64 v[[FIRST_ADD:[0-9]+]], v{{[0-9]+}}, v[[FIRST_MOV]] -; GFX9-DAG: v_mov_b32_e32 v[[FIRST:[0-9]+]], v[[FIRST_ADD]] %tmp120 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp105, i32 323, i32 12, i32 15, i1 false) %tmp121 = add i32 %tmp105, %tmp120 %tmp122 = tail call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp121) -; GFX9-DAG: v_mov_b32_dpp v[[SECOND_MOV:[0-9]+]], v{{[0-9]+}} row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-O3-DAG: v_add_u32_e32 v[[SECOND_ADD:[0-9]+]], v{{[0-9]+}}, v[[SECOND_MOV]] -; GFX9-O0-DAG: v_add_u32_e64 v[[SECOND_ADD:[0-9]+]], v{{[0-9]+}}, v[[SECOND_MOV]] -; GFX9-DAG: v_mov_b32_e32 v[[SECOND:[0-9]+]], v[[SECOND_ADD]] %tmp135 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp107, i32 323, i32 12, i32 15, i1 false) %tmp136 = add i32 %tmp107, %tmp135 %tmp137 = tail call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp136) -; GFX9-O3: v_cmp_eq_u32_e32 vcc, v[[FIRST]], v[[SECOND]] -; GFX9-O0: v_cmp_eq_u32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[FIRST]], v[[SECOND]] %tmp138 = icmp eq i32 %tmp122, %tmp137 %tmp139 = sext i1 %tmp138 to i32 %tmp140 = shl nsw i32 %tmp139, 1 @@ -237,19 +982,171 @@ define amdgpu_cs void @strict_wwm_no_cfg(ptr addrspace(8) inreg %tmp14) { ret void } -; GFX9-LABEL: {{^}}strict_wwm_cfg: define amdgpu_cs void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) { +; GFX9-O0-LABEL: strict_wwm_cfg: +; GFX9-O0: ; %bb.0: ; %entry +; GFX9-O0-NEXT: s_mov_b32 s16, SCRATCH_RSRC_DWORD0 +; GFX9-O0-NEXT: s_mov_b32 s17, SCRATCH_RSRC_DWORD1 +; GFX9-O0-NEXT: s_mov_b32 s18, -1 +; GFX9-O0-NEXT: s_mov_b32 s19, 0xe00000 +; GFX9-O0-NEXT: s_add_u32 s16, s16, s4 +; GFX9-O0-NEXT: s_addc_u32 s17, s17, 0 +; GFX9-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_writelane_b32 v0, s3, 0 +; GFX9-O0-NEXT: s_mov_b32 s4, s1 +; GFX9-O0-NEXT: v_readlane_b32 s1, v0, 0 +; GFX9-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 +; GFX9-O0-NEXT: s_mov_b32 s3, s1 +; GFX9-O0-NEXT: s_mov_b32 s8, s3 +; GFX9-O0-NEXT: s_mov_b32 s9, s2 +; GFX9-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 +; GFX9-O0-NEXT: s_mov_b32 s1, s4 +; GFX9-O0-NEXT: s_mov_b32 s10, s1 +; GFX9-O0-NEXT: s_mov_b32 s4, s0 +; GFX9-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 +; GFX9-O0-NEXT: s_mov_b32 s5, s10 +; GFX9-O0-NEXT: s_mov_b32 s6, s9 +; GFX9-O0-NEXT: s_mov_b32 s7, s8 +; GFX9-O0-NEXT: v_writelane_b32 v0, s2, 1 +; GFX9-O0-NEXT: v_writelane_b32 v0, s3, 2 +; GFX9-O0-NEXT: v_writelane_b32 v0, s0, 3 +; GFX9-O0-NEXT: v_writelane_b32 v0, s1, 4 +; GFX9-O0-NEXT: s_mov_b32 s0, 0 +; GFX9-O0-NEXT: s_nop 2 +; GFX9-O0-NEXT: buffer_load_dwordx2 v[4:5], off, s[4:7], s0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[16:19], 0 offset:20 ; 4-byte Folded Spill +; GFX9-O0-NEXT: ; implicit-def: $sgpr2_sgpr3 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: s_or_saveexec_b64 s[2:3], -1 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-O0-NEXT: s_nop 1 +; GFX9-O0-NEXT: v_mov_b32_dpp v2, v1 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2 +; GFX9-O0-NEXT: s_mov_b64 exec, s[2:3] +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v3, s0 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, s0 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_mov_b64 s[0:1], exec +; GFX9-O0-NEXT: v_writelane_b32 v0, s0, 5 +; GFX9-O0-NEXT: v_writelane_b32 v0, s1, 6 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13] +; GFX9-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] +; GFX9-O0-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-O0-NEXT: s_cbranch_execz .LBB8_2 +; GFX9-O0-NEXT: ; %bb.1: ; %if +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:16 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v4, off, s[16:19], 0 offset:20 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-O0-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX9-O0-NEXT: v_mov_b32_dpp v1, v2 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-O0-NEXT: v_add_u32_e64 v1, v2, v1 +; GFX9-O0-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill +; GFX9-O0-NEXT: .LBB8_2: ; %merge +; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s4, v0, 5 +; GFX9-O0-NEXT: v_readlane_b32 s5, v0, 6 +; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-O0-NEXT: v_readlane_b32 s2, v0, 1 +; GFX9-O0-NEXT: v_readlane_b32 s3, v0, 2 +; GFX9-O0-NEXT: v_readlane_b32 s0, v0, 3 +; GFX9-O0-NEXT: v_readlane_b32 s1, v0, 4 +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:12 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v4, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v4 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9-O0-NEXT: s_mov_b32 s4, 1 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s4, v3 +; GFX9-O0-NEXT: s_mov_b32 s4, 2 +; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s4 +; GFX9-O0-NEXT: s_mov_b32 s6, s1 +; GFX9-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 +; GFX9-O0-NEXT: s_mov_b32 s4, s3 +; GFX9-O0-NEXT: s_mov_b32 s5, s2 +; GFX9-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 +; GFX9-O0-NEXT: s_mov_b32 s1, s6 +; GFX9-O0-NEXT: s_mov_b32 s2, s5 +; GFX9-O0-NEXT: s_mov_b32 s3, s4 +; GFX9-O0-NEXT: s_mov_b32 s4, 0 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s4 offset:4 +; GFX9-O0-NEXT: ; kill: killed $vgpr0 +; GFX9-O0-NEXT: s_endpgm +; +; GFX9-O3-LABEL: strict_wwm_cfg: +; GFX9-O3: ; %bb.0: ; %entry +; GFX9-O3-NEXT: buffer_load_dwordx2 v[3:4], off, s[0:3], 0 +; GFX9-O3-NEXT: v_mov_b32_e32 v5, 0 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-O3-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-O3-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O3-NEXT: s_waitcnt vmcnt(0) +; GFX9-O3-NEXT: v_mov_b32_e32 v2, v3 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-O3-NEXT: v_mov_b32_dpp v1, v2 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-O3-NEXT: v_add_u32_e32 v1, v2, v1 +; GFX9-O3-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O3-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-O3-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX9-O3-NEXT: s_cbranch_execz .LBB8_2 +; GFX9-O3-NEXT: ; %bb.1: ; %if +; GFX9-O3-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-O3-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-O3-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-O3-NEXT: v_mov_b32_e32 v2, v4 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-O3-NEXT: v_mov_b32_dpp v1, v2 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-O3-NEXT: v_add_u32_e32 v1, v2, v1 +; GFX9-O3-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-O3-NEXT: v_mov_b32_e32 v5, v1 +; GFX9-O3-NEXT: .LBB8_2: ; %merge +; GFX9-O3-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX9-O3-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-O3-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX9-O3-NEXT: v_and_b32_e32 v0, 2, v0 +; GFX9-O3-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 +; GFX9-O3-NEXT: s_endpgm entry: %tmp100 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %tmp14, i32 0, i32 0, i32 0) %tmp101 = bitcast <2 x float> %tmp100 to <2 x i32> %tmp102 = extractelement <2 x i32> %tmp101, i32 0 %tmp105 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %tmp102, i32 0) -; GFX9: v_mov_b32_dpp v[[FIRST_MOV:[0-9]+]], v{{[0-9]+}} row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-O3: v_add_u32_e32 v[[FIRST_ADD:[0-9]+]], v{{[0-9]+}}, v[[FIRST_MOV]] -; GFX9-O0: v_add_u32_e64 v[[FIRST_ADD:[0-9]+]], v{{[0-9]+}}, v[[FIRST_MOV]] -; GFX9: v_mov_b32_e32 v[[FIRST:[0-9]+]], v[[FIRST_ADD]] -; GFX9-O0: buffer_store_dword v[[FIRST]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:[[FIRST_IMM_OFFSET:[0-9]+]] %tmp120 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp105, i32 323, i32 12, i32 15, i1 false) %tmp121 = add i32 %tmp105, %tmp120 %tmp122 = tail call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp121) @@ -260,11 +1157,6 @@ if: %tmp103 = extractelement <2 x i32> %tmp101, i32 1 %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %tmp103, i32 0) -; GFX9: v_mov_b32_dpp v[[SECOND_MOV:[0-9]+]], v{{[0-9]+}} row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-O3: v_add_u32_e32 v[[SECOND_ADD:[0-9]+]], v{{[0-9]+}}, v[[SECOND_MOV]] -; GFX9-O0: v_add_u32_e64 v[[SECOND_ADD:[0-9]+]], v{{[0-9]+}}, v[[SECOND_MOV]] -; GFX9: v_mov_b32_e32 v[[SECOND:[0-9]+]], v[[SECOND_ADD]] -; GFX9-O0: buffer_store_dword v[[SECOND]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:[[SECOND_IMM_OFFSET:[0-9]+]] %tmp135 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp107, i32 323, i32 12, i32 15, i1 false) %tmp136 = add i32 %tmp107, %tmp135 %tmp137 = tail call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp136) @@ -272,10 +1164,6 @@ if: merge: %merge_value = phi i32 [ 0, %entry ], [%tmp137, %if ] -; GFX9-O3: v_cmp_eq_u32_e32 vcc, v[[FIRST]], v[[SECOND]] -; GFX9-O0: buffer_load_dword v[[FIRST:[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:[[FIRST_IMM_OFFSET]] -; GFX9-O0: buffer_load_dword v[[SECOND:[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:[[SECOND_IMM_OFFSET]] -; GFX9-O0: v_cmp_eq_u32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[FIRST]], v[[SECOND]] %tmp138 = icmp eq i32 %tmp122, %merge_value %tmp139 = sext i1 %tmp138 to i32 %tmp140 = shl nsw i32 %tmp139, 1 @@ -285,92 +1173,555 @@ merge: ret void } -; GFX9-LABEL: {{^}}strict_wwm_called: define hidden i32 @strict_wwm_called(i32 %a) noinline { -; GFX9-O3: v_add_u32_e32 v1, v0, v0 -; GFX9-O0: v_add_u32_e64 v1, v0, v0 +; GFX9-O0-LABEL: strict_wwm_called: +; GFX9-O0: ; %bb.0: +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-O0-NEXT: v_add_u32_e64 v1, v0, v0 +; GFX9-O0-NEXT: v_mul_lo_u32 v0, v1, v0 +; GFX9-O0-NEXT: v_sub_u32_e64 v0, v0, v1 +; GFX9-O0-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-O3-LABEL: strict_wwm_called: +; GFX9-O3: ; %bb.0: +; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-O3-NEXT: v_add_u32_e32 v1, v0, v0 +; GFX9-O3-NEXT: v_mul_lo_u32 v0, v1, v0 +; GFX9-O3-NEXT: v_sub_u32_e32 v0, v0, v1 +; GFX9-O3-NEXT: s_setpc_b64 s[30:31] %add = add i32 %a, %a -; GFX9: v_mul_lo_u32 v0, v1, v0 %mul = mul i32 %add, %a -; GFX9-O3: v_sub_u32_e32 v0, v0, v1 -; GFX9-O0: v_sub_u32_e64 v0, v0, v1 %sub = sub i32 %mul, %add ret i32 %sub } -; GFX9-LABEL: {{^}}strict_wwm_call: define amdgpu_kernel void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) { -; GFX9-DAG: s_load_dword [[ARG:s[0-9]+]] -; GFX9-O0-DAG: s_mov_b32 s3, 0{{$}} -; GFX9-O0-DAG: v_mov_b32_e32 v6, [[ARG]] +; GFX9-O0-LABEL: strict_wwm_call: +; GFX9-O0: ; %bb.0: +; GFX9-O0-NEXT: s_mov_b32 s32, 0x400 +; GFX9-O0-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0 +; GFX9-O0-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1 +; GFX9-O0-NEXT: s_mov_b32 s26, -1 +; GFX9-O0-NEXT: s_mov_b32 s27, 0xe00000 +; GFX9-O0-NEXT: s_add_u32 s24, s24, s9 +; GFX9-O0-NEXT: s_addc_u32 s25, s25, 0 +; GFX9-O0-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane +; GFX9-O0-NEXT: s_mov_b32 s14, s8 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 +; GFX9-O0-NEXT: v_writelane_b32 v7, s8, 0 +; GFX9-O0-NEXT: v_writelane_b32 v7, s9, 1 +; GFX9-O0-NEXT: s_mov_b32 s13, s7 +; GFX9-O0-NEXT: s_mov_b32 s12, s6 +; GFX9-O0-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[2:3] +; GFX9-O0-NEXT: v_readlane_b32 s2, v7, 0 +; GFX9-O0-NEXT: v_readlane_b32 s3, v7, 1 +; GFX9-O0-NEXT: v_writelane_b32 v7, s4, 2 +; GFX9-O0-NEXT: v_writelane_b32 v7, s5, 3 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[0:1] +; GFX9-O0-NEXT: v_readlane_b32 s0, v7, 2 +; GFX9-O0-NEXT: v_readlane_b32 s1, v7, 3 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: s_mov_b64 exec, s[2:3] +; GFX9-O0-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 +; GFX9-O0-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x2c +; GFX9-O0-NEXT: s_load_dword s2, s[0:1], 0x34 +; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-O0-NEXT: s_mov_b32 s3, s7 +; GFX9-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 killed $sgpr6_sgpr7 +; GFX9-O0-NEXT: s_mov_b32 s7, s9 +; GFX9-O0-NEXT: s_mov_b32 s16, s8 +; GFX9-O0-NEXT: ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17_sgpr18_sgpr19 +; GFX9-O0-NEXT: s_mov_b32 s17, s7 +; GFX9-O0-NEXT: s_mov_b32 s18, s6 +; GFX9-O0-NEXT: s_mov_b32 s19, s3 +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_writelane_b32 v0, s16, 4 +; GFX9-O0-NEXT: v_writelane_b32 v0, s17, 5 +; GFX9-O0-NEXT: v_writelane_b32 v0, s18, 6 +; GFX9-O0-NEXT: v_writelane_b32 v0, s19, 7 +; GFX9-O0-NEXT: s_mov_b32 s3, 0 +; GFX9-O0-NEXT: v_writelane_b32 v0, s3, 8 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, s2 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, s3 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: s_or_saveexec_b64 s[2:3], -1 +; GFX9-O0-NEXT: v_writelane_b32 v0, s2, 9 +; GFX9-O0-NEXT: v_writelane_b32 v0, s3, 10 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: s_mov_b64 s[6:7], 56 +; GFX9-O0-NEXT: s_mov_b32 s2, s0 +; GFX9-O0-NEXT: s_mov_b32 s0, s1 +; GFX9-O0-NEXT: s_mov_b32 s3, s6 +; GFX9-O0-NEXT: s_mov_b32 s1, s7 +; GFX9-O0-NEXT: s_add_u32 s8, s2, s3 +; GFX9-O0-NEXT: s_addc_u32 s0, s0, s1 +; GFX9-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 +; GFX9-O0-NEXT: s_mov_b32 s9, s0 +; GFX9-O0-NEXT: s_getpc_b64 s[16:17] +; GFX9-O0-NEXT: s_add_u32 s16, s16, strict_wwm_called@rel32@lo+4 +; GFX9-O0-NEXT: s_addc_u32 s17, s17, strict_wwm_called@rel32@hi+12 +; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[24:25] +; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[26:27] +; GFX9-O0-NEXT: s_mov_b32 s6, 20 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s6, v3 +; GFX9-O0-NEXT: s_mov_b32 s6, 10 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v4, s6, v4 +; GFX9-O0-NEXT: v_or3_b32 v3, v5, v4, v3 +; GFX9-O0-NEXT: ; implicit-def: $sgpr6_sgpr7 +; GFX9-O0-NEXT: ; implicit-def: $sgpr15 +; GFX9-O0-NEXT: v_mov_b32_e32 v31, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6 +; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s0, v1, 4 +; GFX9-O0-NEXT: v_readlane_b32 s1, v1, 5 +; GFX9-O0-NEXT: v_readlane_b32 s2, v1, 6 +; GFX9-O0-NEXT: v_readlane_b32 s3, v1, 7 +; GFX9-O0-NEXT: v_readlane_b32 s6, v1, 9 +; GFX9-O0-NEXT: v_readlane_b32 s7, v1, 10 +; GFX9-O0-NEXT: v_readlane_b32 s4, v1, 8 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: v_add_u32_e64 v3, v3, v6 +; GFX9-O0-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s4 offset:4 +; GFX9-O0-NEXT: ; kill: killed $vgpr0 +; GFX9-O0-NEXT: s_endpgm +; +; GFX9-O3-LABEL: strict_wwm_call: +; GFX9-O3: ; %bb.0: +; GFX9-O3-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; GFX9-O3-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GFX9-O3-NEXT: s_mov_b32 s14, -1 +; GFX9-O3-NEXT: s_load_dword s2, s[0:1], 0x34 +; GFX9-O3-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; GFX9-O3-NEXT: s_mov_b32 s15, 0xe00000 +; GFX9-O3-NEXT: s_add_u32 s12, s12, s3 +; GFX9-O3-NEXT: s_addc_u32 s13, s13, 0 +; GFX9-O3-NEXT: s_mov_b32 s32, 0 +; GFX9-O3-NEXT: s_getpc_b64 s[8:9] +; GFX9-O3-NEXT: s_add_u32 s8, s8, strict_wwm_called@rel32@lo+4 +; GFX9-O3-NEXT: s_addc_u32 s9, s9, strict_wwm_called@rel32@hi+12 +; GFX9-O3-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-O3-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: s_or_saveexec_b64 s[10:11], -1 +; GFX9-O3-NEXT: s_mov_b64 s[0:1], s[12:13] +; GFX9-O3-NEXT: s_mov_b64 s[2:3], s[14:15] +; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GFX9-O3-NEXT: v_mov_b32_e32 v1, v0 +; GFX9-O3-NEXT: v_add_u32_e32 v1, v1, v2 +; GFX9-O3-NEXT: s_mov_b64 exec, s[10:11] +; GFX9-O3-NEXT: v_mov_b32_e32 v0, v1 +; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4 +; GFX9-O3-NEXT: s_endpgm -; GFX9-O3: v_mov_b32_e32 v2, [[ARG]] -; GFX9-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, s3 -; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_not_b64 exec, exec %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0) -; GFX9-O3: v_mov_b32_e32 v0, v2 -; GFX9-O0: v_mov_b32_e32 v0, v6 -; GFX9: s_swappc_b64 %tmp134 = call i32 @strict_wwm_called(i32 %tmp107) -; GFX9-O3: v_mov_b32_e32 v1, v0 -; GFX9-O3: v_add_u32_e32 v1, v1, v2 -; GFX9-O0: v_mov_b32_e32 v3, v0 -; GFX9-O0: v_add_u32_e64 v3, v3, v6 %tmp136 = add i32 %tmp134, %tmp107 %tmp137 = tail call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp136) -; GFX9-O0: buffer_store_dword v1 -; GFX9-O3: buffer_store_dword v0 call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %tmp137, ptr addrspace(8) %tmp14, i32 4, i32 0, i32 0) ret void } -; GFX9-LABEL: {{^}}strict_wwm_called_i64: define i64 @strict_wwm_called_i64(i64 %a) noinline { +; GFX9-O0-LABEL: strict_wwm_called_i64: +; GFX9-O0: ; %bb.0: +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 +; GFX9-O0-NEXT: ; implicit-def: $sgpr4 +; GFX9-O0-NEXT: ; implicit-def: $sgpr4 +; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-O0-NEXT: v_add_co_u32_e64 v4, s[4:5], v4, v5 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v0, s[4:5], v0, v1, s[4:5] +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 +; GFX9-O0-NEXT: s_mov_b32 s4, 32 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 +; GFX9-O0-NEXT: v_mul_lo_u32 v1, v0, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 +; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], s4, v[4:5] +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v6 +; GFX9-O0-NEXT: v_mul_lo_u32 v2, v2, v3 +; GFX9-O0-NEXT: v_mad_u64_u32 v[6:7], s[6:7], v0, v3, 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7 +; GFX9-O0-NEXT: v_add3_u32 v0, v0, v1, v2 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: ; implicit-def: $sgpr6 +; GFX9-O0-NEXT: ; implicit-def: $sgpr6 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5 +; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 +; GFX9-O0-NEXT: v_lshlrev_b64 v[1:2], s4, v[0:1] +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: s_mov_b32 s5, 0 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7 +; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 +; GFX9-O0-NEXT: v_or_b32_e64 v6, v1, v2 +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 +; GFX9-O0-NEXT: v_sub_co_u32_e64 v1, s[6:7], v1, v3 +; GFX9-O0-NEXT: v_subb_co_u32_e64 v0, s[6:7], v0, v2, s[6:7] +; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 +; GFX9-O0-NEXT: v_lshrrev_b64 v[1:2], s4, v[1:2] +; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr1_vgpr2 killed $exec +; GFX9-O0-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-O3-LABEL: strict_wwm_called_i64: +; GFX9-O3: ; %bb.0: +; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-O3-NEXT: v_add_co_u32_e32 v2, vcc, v0, v0 +; GFX9-O3-NEXT: v_addc_co_u32_e32 v3, vcc, v1, v1, vcc +; GFX9-O3-NEXT: v_mul_lo_u32 v4, v3, v0 +; GFX9-O3-NEXT: v_mul_lo_u32 v5, v2, v1 +; GFX9-O3-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, 0 +; GFX9-O3-NEXT: v_add3_u32 v1, v1, v5, v4 +; GFX9-O3-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2 +; GFX9-O3-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-O3-NEXT: s_setpc_b64 s[30:31] %add = add i64 %a, %a %mul = mul i64 %add, %a %sub = sub i64 %mul, %add ret i64 %sub } -; GFX9-LABEL: {{^}}strict_wwm_call_i64: define amdgpu_kernel void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 inreg %arg) { -; GFX9: s_load_dwordx2 s[[[ARG_LO:[0-9]+]]:[[ARG_HI:[0-9]+]]]{{.*}}, 0x34 +; GFX9-O0-LABEL: strict_wwm_call_i64: +; GFX9-O0: ; %bb.0: +; GFX9-O0-NEXT: s_mov_b32 s32, 0x400 +; GFX9-O0-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0 +; GFX9-O0-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1 +; GFX9-O0-NEXT: s_mov_b32 s26, -1 +; GFX9-O0-NEXT: s_mov_b32 s27, 0xe00000 +; GFX9-O0-NEXT: s_add_u32 s24, s24, s9 +; GFX9-O0-NEXT: s_addc_u32 s25, s25, 0 +; GFX9-O0-NEXT: ; implicit-def: $vgpr12 : SGPR spill to VGPR lane +; GFX9-O0-NEXT: s_mov_b32 s14, s8 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 +; GFX9-O0-NEXT: v_writelane_b32 v12, s8, 0 +; GFX9-O0-NEXT: v_writelane_b32 v12, s9, 1 +; GFX9-O0-NEXT: s_mov_b32 s13, s7 +; GFX9-O0-NEXT: s_mov_b32 s12, s6 +; GFX9-O0-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[2:3] +; GFX9-O0-NEXT: v_readlane_b32 s2, v12, 0 +; GFX9-O0-NEXT: v_readlane_b32 s3, v12, 1 +; GFX9-O0-NEXT: v_writelane_b32 v12, s4, 2 +; GFX9-O0-NEXT: v_writelane_b32 v12, s5, 3 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_store_dword v12, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[0:1] +; GFX9-O0-NEXT: v_readlane_b32 s0, v12, 2 +; GFX9-O0-NEXT: v_readlane_b32 s1, v12, 3 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: s_mov_b64 exec, s[2:3] +; GFX9-O0-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x24 +; GFX9-O0-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x2c +; GFX9-O0-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-O0-NEXT: s_mov_b32 s6, s9 +; GFX9-O0-NEXT: s_mov_b32 s7, s8 +; GFX9-O0-NEXT: s_mov_b32 s8, s17 +; GFX9-O0-NEXT: ; kill: def $sgpr16 killed $sgpr16 killed $sgpr16_sgpr17 +; GFX9-O0-NEXT: ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17_sgpr18_sgpr19 +; GFX9-O0-NEXT: s_mov_b32 s17, s8 +; GFX9-O0-NEXT: s_mov_b32 s18, s7 +; GFX9-O0-NEXT: s_mov_b32 s19, s6 +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_writelane_b32 v0, s16, 4 +; GFX9-O0-NEXT: v_writelane_b32 v0, s17, 5 +; GFX9-O0-NEXT: v_writelane_b32 v0, s18, 6 +; GFX9-O0-NEXT: v_writelane_b32 v0, s19, 7 +; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, s2 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, s3 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v8, s6 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, s7 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: s_or_saveexec_b64 s[2:3], -1 +; GFX9-O0-NEXT: v_writelane_b32 v0, s2, 8 +; GFX9-O0-NEXT: v_writelane_b32 v0, s3, 9 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: s_mov_b64 s[6:7], 60 +; GFX9-O0-NEXT: s_mov_b32 s2, s0 +; GFX9-O0-NEXT: s_mov_b32 s0, s1 +; GFX9-O0-NEXT: s_mov_b32 s3, s6 +; GFX9-O0-NEXT: s_mov_b32 s1, s7 +; GFX9-O0-NEXT: s_add_u32 s8, s2, s3 +; GFX9-O0-NEXT: s_addc_u32 s0, s0, s1 +; GFX9-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 +; GFX9-O0-NEXT: s_mov_b32 s9, s0 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8 +; GFX9-O0-NEXT: s_mov_b32 s0, 32 +; GFX9-O0-NEXT: ; implicit-def: $sgpr2_sgpr3 +; GFX9-O0-NEXT: v_lshrrev_b64 v[10:11], s0, v[8:9] +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10 +; GFX9-O0-NEXT: s_getpc_b64 s[0:1] +; GFX9-O0-NEXT: s_add_u32 s0, s0, strict_wwm_called_i64@gotpcrel32@lo+4 +; GFX9-O0-NEXT: s_addc_u32 s1, s1, strict_wwm_called_i64@gotpcrel32@hi+12 +; GFX9-O0-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 +; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[24:25] +; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[26:27] +; GFX9-O0-NEXT: s_mov_b32 s6, 20 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s6, v3 +; GFX9-O0-NEXT: s_mov_b32 s6, 10 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v4, s6, v4 +; GFX9-O0-NEXT: v_or3_b32 v3, v5, v4, v3 +; GFX9-O0-NEXT: ; implicit-def: $sgpr6_sgpr7 +; GFX9-O0-NEXT: ; implicit-def: $sgpr15 +; GFX9-O0-NEXT: v_mov_b32_e32 v31, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7 +; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_load_dword v2, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s0, v2, 4 +; GFX9-O0-NEXT: v_readlane_b32 s1, v2, 5 +; GFX9-O0-NEXT: v_readlane_b32 s2, v2, 6 +; GFX9-O0-NEXT: v_readlane_b32 s3, v2, 7 +; GFX9-O0-NEXT: v_readlane_b32 s4, v2, 8 +; GFX9-O0-NEXT: v_readlane_b32 s5, v2, 9 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 +; GFX9-O0-NEXT: ; implicit-def: $sgpr6 +; GFX9-O0-NEXT: ; implicit-def: $sgpr6 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9 +; GFX9-O0-NEXT: v_add_co_u32_e64 v3, s[6:7], v3, v5 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[6:7], v4, v6, s[6:7] +; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v4 +; GFX9-O0-NEXT: s_mov_b32 s4, 0 +; GFX9-O0-NEXT: buffer_store_dwordx2 v[1:2], off, s[0:3], s4 offset:4 +; GFX9-O0-NEXT: ; kill: killed $vgpr0 +; GFX9-O0-NEXT: s_endpgm +; +; GFX9-O3-LABEL: strict_wwm_call_i64: +; GFX9-O3: ; %bb.0: +; GFX9-O3-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; GFX9-O3-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GFX9-O3-NEXT: s_mov_b32 s14, -1 +; GFX9-O3-NEXT: s_mov_b32 s15, 0xe00000 +; GFX9-O3-NEXT: s_add_u32 s12, s12, s3 +; GFX9-O3-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX9-O3-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x24 +; GFX9-O3-NEXT: s_mov_b32 s32, 0 +; GFX9-O3-NEXT: s_addc_u32 s13, s13, 0 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX9-O3-NEXT: s_getpc_b64 s[4:5] +; GFX9-O3-NEXT: s_add_u32 s4, s4, strict_wwm_called_i64@gotpcrel32@lo+4 +; GFX9-O3-NEXT: s_addc_u32 s5, s5, strict_wwm_called_i64@gotpcrel32@hi+12 +; GFX9-O3-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GFX9-O3-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-O3-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-O3-NEXT: v_mov_b32_e32 v6, s2 +; GFX9-O3-NEXT: v_mov_b32_e32 v7, s3 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v6, 0 +; GFX9-O3-NEXT: v_mov_b32_e32 v7, 0 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-O3-NEXT: s_mov_b64 s[0:1], s[12:13] +; GFX9-O3-NEXT: s_mov_b64 s[2:3], s[14:15] +; GFX9-O3-NEXT: v_mov_b32_e32 v0, v6 +; GFX9-O3-NEXT: v_mov_b32_e32 v1, v7 +; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-O3-NEXT: v_mov_b32_e32 v2, v0 +; GFX9-O3-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-O3-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 +; GFX9-O3-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v7, vcc +; GFX9-O3-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-O3-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-O3-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 offset:4 +; GFX9-O3-NEXT: s_endpgm -; GFX9-O0: s_mov_b64 s[[[ZERO_LO:[0-9]+]]:[[ZERO_HI:[0-9]+]]], 0{{$}} -; GFX9-O0-DAG: v_mov_b32_e32 v9, s[[ARG_HI]] -; GFX9-O0-DAG: v_mov_b32_e32 v8, s[[ARG_LO]] -; GFX9-O3-DAG: v_mov_b32_e32 v7, s[[ARG_HI]] -; GFX9-O3-DAG: v_mov_b32_e32 v6, s[[ARG_LO]] -; GFX9: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v8, s[[ZERO_LO]] -; GFX9-O0-NEXT: v_mov_b32_e32 v9, s[[ZERO_HI]] -; GFX9-O3-NEXT: v_mov_b32_e32 v6, 0 -; GFX9-O3-NEXT: v_mov_b32_e32 v7, 0 -; GFX9-NEXT: s_not_b64 exec, exec %tmp107 = tail call i64 @llvm.amdgcn.set.inactive.i64(i64 %arg, i64 0) -; GFX9: s_swappc_b64 %tmp134 = call i64 @strict_wwm_called_i64(i64 %tmp107) %tmp136 = add i64 %tmp134, %tmp107 %tmp137 = tail call i64 @llvm.amdgcn.strict.wwm.i64(i64 %tmp136) %tmp138 = bitcast i64 %tmp137 to <2 x i32> -; GFX9: buffer_store_dwordx2 call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> %tmp138, ptr addrspace(8) %tmp14, i32 4, i32 0, i32 0) ret void } -; GFX9-LABEL: {{^}}strict_wwm_amdgpu_cs_main: define amdgpu_cs void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %index) { +; GFX9-O0-LABEL: strict_wwm_amdgpu_cs_main: +; GFX9-O0: ; %bb.0: +; GFX9-O0-NEXT: s_mov_b32 s4, s3 +; GFX9-O0-NEXT: s_mov_b32 s5, s2 +; GFX9-O0-NEXT: s_mov_b32 s6, s1 +; GFX9-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 +; GFX9-O0-NEXT: s_mov_b32 s1, s6 +; GFX9-O0-NEXT: s_mov_b32 s2, s5 +; GFX9-O0-NEXT: s_mov_b32 s3, s4 +; GFX9-O0-NEXT: ; kill: def $sgpr4_sgpr5_sgpr6_sgpr7 killed $sgpr0_sgpr1_sgpr2_sgpr3 +; GFX9-O0-NEXT: s_mov_b32 s4, 5 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v0, s4, v0 +; GFX9-O0-NEXT: s_mov_b32 s4, 0 +; GFX9-O0-NEXT: buffer_load_dwordx4 v[10:13], v0, s[0:3], s4 offen +; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], v0, s[0:3], s4 offen offset:16 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 +; GFX9-O0-NEXT: s_mov_b32 s5, 0x7fffffff +; GFX9-O0-NEXT: s_mov_b32 s6, -1 +; GFX9-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 +; GFX9-O0-NEXT: s_mov_b32 s7, s5 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v6 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s6 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s7 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v13 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v6 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s6 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s7 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2 +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v4 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s6 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s7 +; GFX9-O0-NEXT: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v9 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: ; implicit-def: $sgpr5 +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6_vgpr7_vgpr8 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v9 +; GFX9-O0-NEXT: buffer_store_dwordx4 v[5:8], v0, s[0:3], s4 offen +; GFX9-O0-NEXT: buffer_store_dwordx2 v[3:4], v0, s[0:3], s4 offen offset:16 +; GFX9-O0-NEXT: s_endpgm +; +; GFX9-O3-LABEL: strict_wwm_amdgpu_cs_main: +; GFX9-O3: ; %bb.0: +; GFX9-O3-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX9-O3-NEXT: buffer_load_dwordx4 v[7:10], v0, s[0:3], 0 offen +; GFX9-O3-NEXT: buffer_load_dwordx2 v[11:12], v0, s[0:3], 0 offen offset:16 +; GFX9-O3-NEXT: s_mov_b32 s4, -1 +; GFX9-O3-NEXT: s_brev_b32 s5, -2 +; GFX9-O3-NEXT: s_waitcnt vmcnt(1) +; GFX9-O3-NEXT: v_mov_b32_e32 v1, v7 +; GFX9-O3-NEXT: v_mov_b32_e32 v2, v8 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-O3-NEXT: v_mov_b32_e32 v2, s5 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v3, v9 +; GFX9-O3-NEXT: v_mov_b32_e32 v4, v10 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-O3-NEXT: v_mov_b32_e32 v4, s5 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: s_waitcnt vmcnt(0) +; GFX9-O3-NEXT: v_mov_b32_e32 v5, v11 +; GFX9-O3-NEXT: v_mov_b32_e32 v6, v12 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v5, s4 +; GFX9-O3-NEXT: v_mov_b32_e32 v6, s5 +; GFX9-O3-NEXT: s_not_b64 exec, exec +; GFX9-O3-NEXT: v_mov_b32_e32 v7, v1 +; GFX9-O3-NEXT: v_mov_b32_e32 v9, v3 +; GFX9-O3-NEXT: v_mov_b32_e32 v8, v2 +; GFX9-O3-NEXT: v_mov_b32_e32 v10, v4 +; GFX9-O3-NEXT: v_mov_b32_e32 v11, v5 +; GFX9-O3-NEXT: v_mov_b32_e32 v12, v6 +; GFX9-O3-NEXT: buffer_store_dwordx4 v[7:10], v0, s[0:3], 0 offen +; GFX9-O3-NEXT: buffer_store_dwordx2 v[11:12], v0, s[0:3], 0 offen offset:16 +; GFX9-O3-NEXT: s_endpgm %tmp17 = shl i32 %index, 5 -; GFX9: buffer_load_dwordx4 %tmp18 = tail call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %tmp17, i32 0) %.i0.upto1.bc = bitcast <4 x i32> %tmp18 to <2 x i64> %tmp19 = or i32 %tmp17, 16 -; GFX9: buffer_load_dwordx2 %tmp20 = tail call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %tmp19, i32 0) %.i0.upto1.extract = extractelement <2 x i64> %.i0.upto1.bc, i32 0 %tmp22 = tail call i64 @llvm.amdgcn.set.inactive.i64(i64 %.i0.upto1.extract, i64 9223372036854775807) @@ -385,11 +1736,9 @@ define amdgpu_cs void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %ind %.cast6 = bitcast i64 %tmp174 to <2 x float> %.cast7 = bitcast i64 %tmp251 to <2 x float> %tmp254 = shufflevector <2 x float> %.cast, <2 x float> %.cast6, <4 x i32> -; GFX9: buffer_store_dwordx4 %desc.int = bitcast <4 x i32> %desc to i128 %desc.ptr = inttoptr i128 %desc.int to ptr addrspace(8) tail call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %tmp254, ptr addrspace(8) %desc.ptr, i32 %tmp17, i32 0, i32 0) - ; GFX9: buffer_store_dwordx2 tail call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %.cast7, ptr addrspace(8)%desc.ptr, i32 %tmp19, i32 0, i32 0) ret void } diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir index 782b05d5500300..96461e044813d4 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir @@ -1117,8 +1117,7 @@ body: | ; SOFT-NOT: G_FCMP ; For soft float we just need to return a '-1' constant, but the truncation ; to 1 bit is converted by the combiner to the following masking sequence. - ; SOFT: [[R:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SOFT: [[REXT:%[0-9]+]]:_(s32) = COPY [[R]](s32) + ; SOFT: [[REXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SOFT-NOT: G_FCMP ; CHECK: $r0 = COPY [[REXT]] ... @@ -1152,8 +1151,7 @@ body: | ; SOFT-NOT: G_FCMP ; For soft float we just need to return a '0' constant, but the truncation ; to 1 bit is converted by the combiner to the following masking sequence. - ; SOFT: [[R:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SOFT: [[REXT:%[0-9]+]]:_(s32) = COPY [[R]](s32) + ; SOFT: [[REXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; SOFT-NOT: G_FCMP ; CHECK: $r0 = COPY [[REXT]] ... @@ -1825,8 +1823,7 @@ body: | ; SOFT-NOT: G_FCMP ; The result needs to be truncated, and the combiner turns the truncation ; into the following masking sequence. - ; SOFT: [[MASK:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SOFT: [[REXT:%[0-9]+]]:_(s32) = COPY [[MASK]] + ; SOFT: [[REXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) $r0 = COPY %7(s32) @@ -1870,11 +1867,9 @@ body: | ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(false), [[X]](s64), [[Y]] ; HARD: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) ; SOFT-NOT: G_FCMP - ; SOFT: [[R:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT: [[REXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; The result needs to be truncated, and the combiner turns the truncation ; into the following masking sequence. - ; SOFT: [[REXT:%[0-9]+]]:_(s32) = COPY [[R]] - ; SOFT-NOT: G_FCMP ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) $r0 = COPY %7(s32) diff --git a/llvm/test/CodeGen/M68k/GlobalISel/legalize-udiv.mir b/llvm/test/CodeGen/M68k/GlobalISel/legalize-udiv.mir index 439a961113908f..7e12bdded3f652 100644 --- a/llvm/test/CodeGen/M68k/GlobalISel/legalize-udiv.mir +++ b/llvm/test/CodeGen/M68k/GlobalISel/legalize-udiv.mir @@ -16,14 +16,14 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s8) from %fixed-stack.0, align 8) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s8) from %fixed-stack.1, align 4) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC1]], [[TRUNC]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[C1]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s8) = G_AND [[TRUNC3]], [[TRUNC2]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[C1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s8) = G_AND [[TRUNC2]], [[TRUNC3]] ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s8) = G_UDIV [[AND]], [[AND1]] ; CHECK-NEXT: $bd0 = COPY [[UDIV]](s8) ; CHECK-NEXT: RTS implicit $bd0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add.mir index 8fcdd3fc8da256..52352edbe33921 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add.mir @@ -24,11 +24,12 @@ body: | ; MIPS32-LABEL: name: add_i32 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; MIPS32: $v0 = COPY [[ADD]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; MIPS32-NEXT: $v0 = COPY [[ADD]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = G_ADD %0, %1 @@ -46,14 +47,15 @@ body: | ; MIPS32-LABEL: name: add_i8_sext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ADD]], [[C]](s32) - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; MIPS32: $v0 = COPY [[ASHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ADD]], [[C]](s32) + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[ASHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -74,13 +76,14 @@ body: | ; MIPS32-LABEL: name: add_i8_zext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -101,11 +104,12 @@ body: | ; MIPS32-LABEL: name: add_i8_aext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[ADD]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[ADD]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -126,14 +130,15 @@ body: | ; MIPS32-LABEL: name: add_i16_sext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ADD]], [[C]](s32) - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; MIPS32: $v0 = COPY [[ASHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ADD]], [[C]](s32) + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[ASHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -154,13 +159,14 @@ body: | ; MIPS32-LABEL: name: add_i16_zext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -181,11 +187,12 @@ body: | ; MIPS32-LABEL: name: add_i16_aext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[ADD]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[ADD]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -206,19 +213,18 @@ body: | ; MIPS32-LABEL: name: add_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY3]], [[COPY1]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[COPY1]] - ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; MIPS32: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[AND]] - ; MIPS32: $v0 = COPY [[ADD2]](s32) - ; MIPS32: $v1 = COPY [[ADD]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY3]], [[COPY1]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[COPY1]] + ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY]] + ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[ADD2]](s32) + ; MIPS32-NEXT: $v1 = COPY [[ADD]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %3(s32), %2(s32) @@ -247,44 +253,44 @@ body: | ; MIPS32-LABEL: name: add_i128 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0) - ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) - ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (s32) from %fixed-stack.2) - ; MIPS32: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 - ; MIPS32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.3) - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[COPY]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[COPY]] - ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[COPY1]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[LOAD1]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; MIPS32: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[AND]] - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD2]](s32), [[C1]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND1]] - ; MIPS32: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LOAD2]], [[COPY2]] - ; MIPS32: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[LOAD2]] - ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C]] - ; MIPS32: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[AND2]] - ; MIPS32: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD4]](s32), [[C1]] - ; MIPS32: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[OR]] - ; MIPS32: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ICMP3]], [[AND3]] - ; MIPS32: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[LOAD3]], [[COPY3]] - ; MIPS32: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C]] - ; MIPS32: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[AND4]] - ; MIPS32: $v0 = COPY [[ADD]](s32) - ; MIPS32: $v1 = COPY [[ADD2]](s32) - ; MIPS32: $a0 = COPY [[ADD4]](s32) - ; MIPS32: $a1 = COPY [[ADD6]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1, implicit $a0, implicit $a1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; MIPS32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0) + ; MIPS32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 + ; MIPS32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) + ; MIPS32-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 + ; MIPS32-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (s32) from %fixed-stack.2) + ; MIPS32-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 + ; MIPS32-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.3) + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[COPY]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[COPY]] + ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[COPY1]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[LOAD1]] + ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD2]](s32), [[C]] + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND]] + ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LOAD2]], [[COPY2]] + ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[LOAD2]] + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C1]] + ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[AND1]] + ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD4]](s32), [[C]] + ; MIPS32-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[OR]] + ; MIPS32-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ICMP3]], [[AND2]] + ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[LOAD3]], [[COPY3]] + ; MIPS32-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C1]] + ; MIPS32-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[AND3]] + ; MIPS32-NEXT: $v0 = COPY [[ADD]](s32) + ; MIPS32-NEXT: $v1 = COPY [[ADD2]](s32) + ; MIPS32-NEXT: $a0 = COPY [[ADD4]](s32) + ; MIPS32-NEXT: $a1 = COPY [[ADD6]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1, implicit $a0, implicit $a1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %4:_(s32) = COPY $a2 @@ -318,18 +324,18 @@ body: | ; MIPS32-LABEL: name: uadd_with_overflow ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(p0) = COPY $a3 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[COPY1]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] - ; MIPS32: G_STORE [[AND1]](s32), [[COPY3]](p0) :: (store (s8) into %ir.pcarry_flag) - ; MIPS32: G_STORE [[ADD]](s32), [[COPY2]](p0) :: (store (s32) into %ir.padd) - ; MIPS32: RetRA + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $a3 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[COPY1]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] + ; MIPS32-NEXT: G_STORE [[AND]](s32), [[COPY3]](p0) :: (store (s8) into %ir.pcarry_flag) + ; MIPS32-NEXT: G_STORE [[ADD]](s32), [[COPY2]](p0) :: (store (s32) into %ir.padd) + ; MIPS32-NEXT: RetRA %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(p0) = COPY $a2 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/bitwise.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/bitwise.mir index ecda48cbb76aab..ec88831baa7245 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/bitwise.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/bitwise.mir @@ -42,11 +42,12 @@ body: | ; MIPS32-LABEL: name: and_i1 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s1) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -67,11 +68,12 @@ body: | ; MIPS32-LABEL: name: and_i8 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -92,11 +94,12 @@ body: | ; MIPS32-LABEL: name: and_i16 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -117,11 +120,12 @@ body: | ; MIPS32-LABEL: name: and_i32 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = G_AND %1, %0 @@ -139,15 +143,16 @@ body: | ; MIPS32-LABEL: name: and_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[COPY1]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: $v1 = COPY [[AND1]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY]] + ; MIPS32-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[COPY1]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: $v1 = COPY [[AND1]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -171,11 +176,12 @@ body: | ; MIPS32-LABEL: name: or_i1 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[OR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[OR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s1) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -196,11 +202,12 @@ body: | ; MIPS32-LABEL: name: or_i8 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[OR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[OR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -221,11 +228,12 @@ body: | ; MIPS32-LABEL: name: or_i16 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[OR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[OR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -246,11 +254,12 @@ body: | ; MIPS32-LABEL: name: or_i32 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[OR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[OR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = G_OR %1, %0 @@ -268,15 +277,16 @@ body: | ; MIPS32-LABEL: name: or_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[COPY]] - ; MIPS32: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[COPY1]] - ; MIPS32: $v0 = COPY [[OR]](s32) - ; MIPS32: $v1 = COPY [[OR1]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[COPY]] + ; MIPS32-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[COPY1]] + ; MIPS32-NEXT: $v0 = COPY [[OR]](s32) + ; MIPS32-NEXT: $v1 = COPY [[OR1]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -300,11 +310,12 @@ body: | ; MIPS32-LABEL: name: xor_i1 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[XOR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[XOR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s1) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -325,11 +336,12 @@ body: | ; MIPS32-LABEL: name: xor_i8 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[XOR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[XOR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -350,11 +362,12 @@ body: | ; MIPS32-LABEL: name: xor_i16 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[XOR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[XOR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -375,11 +388,12 @@ body: | ; MIPS32-LABEL: name: xor_i32 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[XOR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[XOR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = G_XOR %1, %0 @@ -397,15 +411,16 @@ body: | ; MIPS32-LABEL: name: xor_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[COPY]] - ; MIPS32: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY3]], [[COPY1]] - ; MIPS32: $v0 = COPY [[XOR]](s32) - ; MIPS32: $v1 = COPY [[XOR1]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[COPY]] + ; MIPS32-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY3]], [[COPY1]] + ; MIPS32-NEXT: $v0 = COPY [[XOR]](s32) + ; MIPS32-NEXT: $v1 = COPY [[XOR1]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -429,11 +444,12 @@ body: | ; MIPS32-LABEL: name: shl ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; MIPS32: $v0 = COPY [[SHL]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[SHL]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_SHL %0, %1 @@ -451,11 +467,12 @@ body: | ; MIPS32-LABEL: name: ashr ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; MIPS32: $v0 = COPY [[ASHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[ASHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_ASHR %0, %1 @@ -473,11 +490,12 @@ body: | ; MIPS32-LABEL: name: lshr ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; MIPS32: $v0 = COPY [[LSHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[LSHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_LSHR %0, %1 @@ -495,11 +513,12 @@ body: | ; MIPS32-LABEL: name: lshr_i64_shift_amount ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; MIPS32: $v0 = COPY [[LSHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[LSHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s64) = G_CONSTANT i64 1 %2:_(s32) = G_LSHR %0, %1 @@ -517,11 +536,12 @@ body: | ; MIPS32-LABEL: name: shlv ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; MIPS32: $v0 = COPY [[SHL]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; MIPS32-NEXT: $v0 = COPY [[SHL]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = G_SHL %0, %1 @@ -539,11 +559,12 @@ body: | ; MIPS32-LABEL: name: ashrv ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]](s32) - ; MIPS32: $v0 = COPY [[ASHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]](s32) + ; MIPS32-NEXT: $v0 = COPY [[ASHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = G_ASHR %0, %1 @@ -561,11 +582,12 @@ body: | ; MIPS32-LABEL: name: lshrv ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY1]](s32) - ; MIPS32: $v0 = COPY [[LSHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY1]](s32) + ; MIPS32-NEXT: $v0 = COPY [[LSHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = G_LSHR %0, %1 @@ -583,12 +605,12 @@ body: | ; MIPS32-LABEL: name: shl_i16 ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; MIPS32: $v0 = COPY [[SHL]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[SHL]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %1:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %1(s32) %2:_(s16) = G_CONSTANT i16 2 @@ -608,15 +630,15 @@ body: | ; MIPS32-LABEL: name: ashr_i8 ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32) - ; MIPS32: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[COPY1]](s32) - ; MIPS32: $v0 = COPY [[ASHR1]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32) + ; MIPS32-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[ASHR1]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %1:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %1(s32) %2:_(s8) = G_CONSTANT i8 2 @@ -636,14 +658,14 @@ body: | ; MIPS32-LABEL: name: lshr_i16 ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) - ; MIPS32: $v0 = COPY [[LSHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; MIPS32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[LSHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %1:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %1(s32) %2:_(s16) = G_CONSTANT i16 2 @@ -663,30 +685,27 @@ body: | ; MIPS32-LABEL: name: shl_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[C]] - ; MIPS32: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY2]] - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY2]](s32) - ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[SUB1]](s32) - ; MIPS32: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY2]](s32) - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; MIPS32: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[SUB]](s32) - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[SHL]], [[C1]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; MIPS32: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[OR]], [[SHL2]] - ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; MIPS32: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[COPY1]], [[SELECT1]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: $v1 = COPY [[SELECT2]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[C]] + ; MIPS32-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY2]] + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY2]](s32) + ; MIPS32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[SUB1]](s32) + ; MIPS32-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY2]](s32) + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] + ; MIPS32-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[SUB]](s32) + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[SHL]], [[C1]] + ; MIPS32-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR]], [[SHL2]] + ; MIPS32-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[COPY1]], [[SELECT1]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: $v1 = COPY [[SELECT2]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -710,32 +729,29 @@ body: | ; MIPS32-LABEL: name: ashl_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[C]] - ; MIPS32: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY2]] - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[COPY2]](s32) - ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY2]](s32) - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[SUB1]](s32) - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; MIPS32: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C2]](s32) - ; MIPS32: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[SUB]](s32) - ; MIPS32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[OR]], [[ASHR2]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C3]] - ; MIPS32: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[COPY]], [[SELECT]] - ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; MIPS32: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ASHR]], [[ASHR1]] - ; MIPS32: $v0 = COPY [[SELECT1]](s32) - ; MIPS32: $v1 = COPY [[SELECT2]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[C]] + ; MIPS32-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY2]] + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[COPY2]](s32) + ; MIPS32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY2]](s32) + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[SUB1]](s32) + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] + ; MIPS32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; MIPS32-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C2]](s32) + ; MIPS32-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[SUB]](s32) + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR]], [[ASHR2]] + ; MIPS32-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[COPY]], [[SELECT]] + ; MIPS32-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ASHR]], [[ASHR1]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT1]](s32) + ; MIPS32-NEXT: $v1 = COPY [[SELECT2]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -759,30 +775,27 @@ body: | ; MIPS32-LABEL: name: lshr_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[C]] - ; MIPS32: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY2]] - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[COPY2]](s32) - ; MIPS32: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY2]](s32) - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[SUB1]](s32) - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL]] - ; MIPS32: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[SUB]](s32) - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[OR]], [[LSHR2]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; MIPS32: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[COPY]], [[SELECT]] - ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; MIPS32: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[LSHR]], [[C1]] - ; MIPS32: $v0 = COPY [[SELECT1]](s32) - ; MIPS32: $v1 = COPY [[SELECT2]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[C]] + ; MIPS32-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY2]] + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] + ; MIPS32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[COPY2]](s32) + ; MIPS32-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY2]](s32) + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[SUB1]](s32) + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL]] + ; MIPS32-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[SUB]](s32) + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR]], [[LSHR2]] + ; MIPS32-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[COPY]], [[SELECT]] + ; MIPS32-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[LSHR]], [[C1]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT1]](s32) + ; MIPS32-NEXT: $v1 = COPY [[SELECT2]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/constants.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/constants.mir index 9265bae16b899b..0ced5d52d262c4 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/constants.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/constants.mir @@ -20,10 +20,10 @@ body: | bb.1.entry: ; MIPS32-LABEL: name: any_i64 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; MIPS32: $v0 = COPY [[C1]](s32) - ; MIPS32: $v1 = COPY [[C]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; MIPS32-NEXT: $v0 = COPY [[C1]](s32) + ; MIPS32-NEXT: $v1 = COPY [[C]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %0:_(s64) = G_CONSTANT i64 -9223372036854775808 %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0(s64) $v0 = COPY %2(s32) @@ -39,8 +39,8 @@ body: | bb.1.entry: ; MIPS32-LABEL: name: any_i32 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; MIPS32: $v0 = COPY [[C]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: $v0 = COPY [[C]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = G_CONSTANT i32 -2147483648 $v0 = COPY %0(s32) RetRA implicit $v0 @@ -54,9 +54,9 @@ body: | bb.1.entry: ; MIPS32-LABEL: name: signed_i16 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; MIPS32: $v0 = COPY [[COPY]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[COPY]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s16) = G_CONSTANT i16 -32768 %1:_(s32) = G_SEXT %0(s16) $v0 = COPY %1(s32) @@ -71,9 +71,9 @@ body: | bb.1.entry: ; MIPS32-LABEL: name: signed_i8 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -128 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; MIPS32: $v0 = COPY [[COPY]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[COPY]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s8) = G_CONSTANT i8 -128 %1:_(s32) = G_SEXT %0(s8) $v0 = COPY %1(s32) @@ -88,8 +88,8 @@ body: | bb.1.entry: ; MIPS32-LABEL: name: unsigned_i16 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32768 - ; MIPS32: $v0 = COPY [[C]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: $v0 = COPY [[C]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s16) = G_CONSTANT i16 -32768 %1:_(s32) = G_ZEXT %0(s16) $v0 = COPY %1(s32) @@ -104,8 +104,8 @@ body: | bb.1.entry: ; MIPS32-LABEL: name: unsigned_i8 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; MIPS32: $v0 = COPY [[C]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: $v0 = COPY [[C]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s8) = G_CONSTANT i8 -128 %1:_(s32) = G_ZEXT %0(s8) $v0 = COPY %1(s32) @@ -120,9 +120,8 @@ body: | bb.1.entry: ; MIPS32-LABEL: name: i1_true ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; MIPS32: $v0 = COPY [[COPY]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: $v0 = COPY [[C]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s1) = G_CONSTANT i1 true %1:_(s32) = G_ZEXT %0(s1) $v0 = COPY %1(s32) @@ -137,9 +136,8 @@ body: | bb.1.entry: ; MIPS32-LABEL: name: i1_false ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; MIPS32: $v0 = COPY [[COPY]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: $v0 = COPY [[C]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s1) = G_CONSTANT i1 false %1:_(s32) = G_ZEXT %0(s1) $v0 = COPY %1(s32) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctlz.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctlz.mir index f3865435873ada..73ea9338426c24 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctlz.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctlz.mir @@ -10,10 +10,11 @@ body: | ; MIPS32-LABEL: name: ctlz_i32 ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32) - ; MIPS32: $v0 = COPY [[CTLZ]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32) + ; MIPS32-NEXT: $v0 = COPY [[CTLZ]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = G_CTLZ %0(s32) $v0 = COPY %1(s32) @@ -30,20 +31,19 @@ body: | ; MIPS32-LABEL: name: ctlz_i64 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32) - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[CTLZ]], [[C1]] - ; MIPS32: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[COPY1]](s32) - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[CTLZ1]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: $v1 = COPY [[C]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; MIPS32-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32) + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[CTLZ]], [[C1]] + ; MIPS32-NEXT: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[COPY1]](s32) + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD]], [[CTLZ1]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: $v1 = COPY [[C]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %1:_(s32) = COPY $a0 %2:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/cttz.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/cttz.mir index 78a182ff4cfdd2..3e7bcdc39d5d90 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/cttz.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/cttz.mir @@ -10,16 +10,17 @@ body: | ; MIPS32-LABEL: name: cttz_i32 ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C]] - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C]] - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[CTLZ]] - ; MIPS32: $v0 = COPY [[SUB]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C]] + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C]] + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[CTLZ]] + ; MIPS32-NEXT: $v0 = COPY [[SUB]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = G_CTTZ %0(s32) $v0 = COPY %1(s32) @@ -36,29 +37,28 @@ body: | ; MIPS32-LABEL: name: cttz_i64 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[C1]] - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[C1]] - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ]] - ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB]], [[C2]] - ; MIPS32: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C1]] - ; MIPS32: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C1]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR1]], [[ADD2]] - ; MIPS32: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[AND1]](s32) - ; MIPS32: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ1]] - ; MIPS32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ADD1]], [[SUB1]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: $v1 = COPY [[C]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[C1]] + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[C1]] + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] + ; MIPS32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ]] + ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB]], [[C2]] + ; MIPS32-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C1]] + ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C1]] + ; MIPS32-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR1]], [[ADD2]] + ; MIPS32-NEXT: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[AND1]](s32) + ; MIPS32-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ1]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD1]], [[SUB1]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: $v1 = COPY [[C]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %1:_(s32) = COPY $a0 %2:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) @@ -79,22 +79,22 @@ body: | ; MIPS32-LABEL: name: ffs_i32_expansion ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C2]] - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C2]] - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] - ; MIPS32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ]] - ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = nuw nsw G_ADD [[SUB]], [[C]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[C1]], [[ADD1]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C2]] + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C2]] + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] + ; MIPS32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ]] + ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = nuw nsw G_ADD [[SUB]], [[C]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C1]], [[ADD1]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %2:_(s32) = G_CONSTANT i32 1 %4:_(s32) = G_CONSTANT i32 0 @@ -116,43 +116,41 @@ body: | ; MIPS32-LABEL: name: ffs_i64_expansion ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C1]](s32), [[C1]](s32) - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]] - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[C2]] - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[C2]] - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] - ; MIPS32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ]] - ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB]], [[C3]] - ; MIPS32: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C2]] - ; MIPS32: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C2]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR1]], [[ADD2]] - ; MIPS32: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[AND1]](s32) - ; MIPS32: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ1]] - ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ADD1]], [[SUB1]] - ; MIPS32: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[C]] - ; MIPS32: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[C1]], [[C1]] - ; MIPS32: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[AND3]] - ; MIPS32: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ADD3]](s32), [[ADD5]](s32) - ; MIPS32: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C1]] - ; MIPS32: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[C1]] - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR2]], [[XOR3]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s32), [[C1]] - ; MIPS32: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C]] - ; MIPS32: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s32), [[MV]], [[MV1]] - ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT1]](s64) - ; MIPS32: $v0 = COPY [[UV]](s32) - ; MIPS32: $v1 = COPY [[UV1]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C1]](s32), [[C1]](s32) + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]] + ; MIPS32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[C2]] + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[C2]] + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] + ; MIPS32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ]] + ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB]], [[C3]] + ; MIPS32-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C2]] + ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C2]] + ; MIPS32-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR1]], [[ADD2]] + ; MIPS32-NEXT: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[AND1]](s32) + ; MIPS32-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ1]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD1]], [[SUB1]] + ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[C]] + ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[C1]], [[C1]] + ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ICMP1]] + ; MIPS32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ADD3]](s32), [[ADD5]](s32) + ; MIPS32-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C1]] + ; MIPS32-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[C1]] + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR2]], [[XOR3]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s32), [[C1]] + ; MIPS32-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s32), [[MV]], [[MV1]] + ; MIPS32-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT1]](s64) + ; MIPS32-NEXT: $v0 = COPY [[UV]](s32) + ; MIPS32-NEXT: $v1 = COPY [[UV1]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %1:_(s32) = COPY $a0 %2:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fptosi_and_fptoui.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fptosi_and_fptoui.mir index 88910e3e19fdfb..533cf5e1328062 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fptosi_and_fptoui.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fptosi_and_fptoui.mir @@ -31,28 +31,31 @@ body: | ; FP32-LABEL: name: f32toi64 ; FP32: liveins: $f12 - ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; FP32: $f12 = COPY [[COPY]](s32) - ; FP32: JAL &__fixsfdi, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit-def $v0, implicit-def $v1 - ; FP32: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 - ; FP32: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 - ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp - ; FP32: $v0 = COPY [[COPY1]](s32) - ; FP32: $v1 = COPY [[COPY2]](s32) - ; FP32: RetRA implicit $v0, implicit $v1 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32-NEXT: $f12 = COPY [[COPY]](s32) + ; FP32-NEXT: JAL &__fixsfdi, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit-def $v0, implicit-def $v1 + ; FP32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 + ; FP32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 + ; FP32-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32-NEXT: $v0 = COPY [[COPY1]](s32) + ; FP32-NEXT: $v1 = COPY [[COPY2]](s32) + ; FP32-NEXT: RetRA implicit $v0, implicit $v1 + ; ; FP64-LABEL: name: f32toi64 ; FP64: liveins: $f12 - ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; FP64: $f12 = COPY [[COPY]](s32) - ; FP64: JAL &__fixsfdi, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $f12, implicit-def $v0, implicit-def $v1 - ; FP64: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 - ; FP64: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 - ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp - ; FP64: $v0 = COPY [[COPY1]](s32) - ; FP64: $v1 = COPY [[COPY2]](s32) - ; FP64: RetRA implicit $v0, implicit $v1 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64-NEXT: $f12 = COPY [[COPY]](s32) + ; FP64-NEXT: JAL &__fixsfdi, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $f12, implicit-def $v0, implicit-def $v1 + ; FP64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 + ; FP64-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 + ; FP64-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64-NEXT: $v0 = COPY [[COPY1]](s32) + ; FP64-NEXT: $v1 = COPY [[COPY2]](s32) + ; FP64-NEXT: RetRA implicit $v0, implicit $v1 %0:_(s32) = COPY $f12 %1:_(s64) = G_FPTOSI %0(s32) %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %1(s64) @@ -71,16 +74,19 @@ body: | ; FP32-LABEL: name: f32toi32 ; FP32: liveins: $f12 - ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP32: $v0 = COPY [[FPTOSI]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP32-NEXT: $v0 = COPY [[FPTOSI]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f32toi32 ; FP64: liveins: $f12 - ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP64: $v0 = COPY [[FPTOSI]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP64-NEXT: $v0 = COPY [[FPTOSI]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $f12 %1:_(s32) = G_FPTOSI %0(s32) $v0 = COPY %1(s32) @@ -97,22 +103,25 @@ body: | ; FP32-LABEL: name: f32toi16 ; FP32: liveins: $f12 - ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; FP32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) - ; FP32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; FP32: $v0 = COPY [[ASHR]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; FP32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) + ; FP32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; FP32-NEXT: $v0 = COPY [[ASHR]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f32toi16 ; FP64: liveins: $f12 - ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP64: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; FP64: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) - ; FP64: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; FP64: $v0 = COPY [[ASHR]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; FP64-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) + ; FP64-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; FP64-NEXT: $v0 = COPY [[ASHR]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $f12 %1:_(s16) = G_FPTOSI %0(s32) %2:_(s32) = G_SEXT %1(s16) @@ -130,22 +139,25 @@ body: | ; FP32-LABEL: name: f32toi8 ; FP32: liveins: $f12 - ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; FP32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) - ; FP32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; FP32: $v0 = COPY [[ASHR]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; FP32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) + ; FP32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; FP32-NEXT: $v0 = COPY [[ASHR]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f32toi8 ; FP64: liveins: $f12 - ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP64: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; FP64: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) - ; FP64: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; FP64: $v0 = COPY [[ASHR]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; FP64-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) + ; FP64-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; FP64-NEXT: $v0 = COPY [[ASHR]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $f12 %1:_(s8) = G_FPTOSI %0(s32) %2:_(s32) = G_SEXT %1(s8) @@ -163,28 +175,31 @@ body: | ; FP32-LABEL: name: f64toi64 ; FP32: liveins: $d6 - ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; FP32: $d6 = COPY [[COPY]](s64) - ; FP32: JAL &__fixdfdi, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit-def $v0, implicit-def $v1 - ; FP32: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 - ; FP32: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 - ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp - ; FP32: $v0 = COPY [[COPY1]](s32) - ; FP32: $v1 = COPY [[COPY2]](s32) - ; FP32: RetRA implicit $v0, implicit $v1 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32-NEXT: $d6 = COPY [[COPY]](s64) + ; FP32-NEXT: JAL &__fixdfdi, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit-def $v0, implicit-def $v1 + ; FP32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 + ; FP32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 + ; FP32-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32-NEXT: $v0 = COPY [[COPY1]](s32) + ; FP32-NEXT: $v1 = COPY [[COPY2]](s32) + ; FP32-NEXT: RetRA implicit $v0, implicit $v1 + ; ; FP64-LABEL: name: f64toi64 ; FP64: liveins: $d6 - ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; FP64: $d12_64 = COPY [[COPY]](s64) - ; FP64: JAL &__fixdfdi, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $d12_64, implicit-def $v0, implicit-def $v1 - ; FP64: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 - ; FP64: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 - ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp - ; FP64: $v0 = COPY [[COPY1]](s32) - ; FP64: $v1 = COPY [[COPY2]](s32) - ; FP64: RetRA implicit $v0, implicit $v1 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP64-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64-NEXT: $d12_64 = COPY [[COPY]](s64) + ; FP64-NEXT: JAL &__fixdfdi, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $d12_64, implicit-def $v0, implicit-def $v1 + ; FP64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 + ; FP64-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 + ; FP64-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64-NEXT: $v0 = COPY [[COPY1]](s32) + ; FP64-NEXT: $v1 = COPY [[COPY2]](s32) + ; FP64-NEXT: RetRA implicit $v0, implicit $v1 %0:_(s64) = COPY $d6 %1:_(s64) = G_FPTOSI %0(s64) %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %1(s64) @@ -203,16 +218,19 @@ body: | ; FP32-LABEL: name: f64toi32 ; FP32: liveins: $d6 - ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP32: $v0 = COPY [[FPTOSI]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP32-NEXT: $v0 = COPY [[FPTOSI]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f64toi32 ; FP64: liveins: $d6 - ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP64: $v0 = COPY [[FPTOSI]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP64-NEXT: $v0 = COPY [[FPTOSI]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s64) = COPY $d6 %1:_(s32) = G_FPTOSI %0(s64) $v0 = COPY %1(s32) @@ -229,22 +247,25 @@ body: | ; FP32-LABEL: name: f64toi16 ; FP32: liveins: $d6 - ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; FP32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) - ; FP32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; FP32: $v0 = COPY [[ASHR]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; FP32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) + ; FP32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; FP32-NEXT: $v0 = COPY [[ASHR]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f64toi16 ; FP64: liveins: $d6 - ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP64: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; FP64: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) - ; FP64: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; FP64: $v0 = COPY [[ASHR]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; FP64-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) + ; FP64-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; FP64-NEXT: $v0 = COPY [[ASHR]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s64) = COPY $d6 %1:_(s16) = G_FPTOSI %0(s64) %2:_(s32) = G_SEXT %1(s16) @@ -262,22 +283,25 @@ body: | ; FP32-LABEL: name: f64toi8 ; FP32: liveins: $d6 - ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; FP32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) - ; FP32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; FP32: $v0 = COPY [[ASHR]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; FP32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) + ; FP32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; FP32-NEXT: $v0 = COPY [[ASHR]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f64toi8 ; FP64: liveins: $d6 - ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP64: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; FP64: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) - ; FP64: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; FP64: $v0 = COPY [[ASHR]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; FP64-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) + ; FP64-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; FP64-NEXT: $v0 = COPY [[ASHR]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s64) = COPY $d6 %1:_(s8) = G_FPTOSI %0(s64) %2:_(s32) = G_SEXT %1(s8) @@ -295,28 +319,31 @@ body: | ; FP32-LABEL: name: f32tou64 ; FP32: liveins: $f12 - ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; FP32: $f12 = COPY [[COPY]](s32) - ; FP32: JAL &__fixunssfdi, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit-def $v0, implicit-def $v1 - ; FP32: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 - ; FP32: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 - ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp - ; FP32: $v0 = COPY [[COPY1]](s32) - ; FP32: $v1 = COPY [[COPY2]](s32) - ; FP32: RetRA implicit $v0, implicit $v1 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32-NEXT: $f12 = COPY [[COPY]](s32) + ; FP32-NEXT: JAL &__fixunssfdi, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit-def $v0, implicit-def $v1 + ; FP32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 + ; FP32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 + ; FP32-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32-NEXT: $v0 = COPY [[COPY1]](s32) + ; FP32-NEXT: $v1 = COPY [[COPY2]](s32) + ; FP32-NEXT: RetRA implicit $v0, implicit $v1 + ; ; FP64-LABEL: name: f32tou64 ; FP64: liveins: $f12 - ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; FP64: $f12 = COPY [[COPY]](s32) - ; FP64: JAL &__fixunssfdi, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $f12, implicit-def $v0, implicit-def $v1 - ; FP64: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 - ; FP64: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 - ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp - ; FP64: $v0 = COPY [[COPY1]](s32) - ; FP64: $v1 = COPY [[COPY2]](s32) - ; FP64: RetRA implicit $v0, implicit $v1 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64-NEXT: $f12 = COPY [[COPY]](s32) + ; FP64-NEXT: JAL &__fixunssfdi, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $f12, implicit-def $v0, implicit-def $v1 + ; FP64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 + ; FP64-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 + ; FP64-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64-NEXT: $v0 = COPY [[COPY1]](s32) + ; FP64-NEXT: $v1 = COPY [[COPY2]](s32) + ; FP64-NEXT: RetRA implicit $v0, implicit $v1 %0:_(s32) = COPY $f12 %1:_(s64) = G_FPTOUI %0(s32) %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %1(s64) @@ -335,34 +362,33 @@ body: | ; FP32-LABEL: name: f32tou32 ; FP32: liveins: $f12 - ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP32: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 - ; FP32: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] - ; FP32: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) - ; FP32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP32: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] - ; FP32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP32: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP32: $v0 = COPY [[SELECT]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 + ; FP32-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] + ; FP32-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) + ; FP32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP32-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] + ; FP32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP32-NEXT: $v0 = COPY [[SELECT]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f32tou32 ; FP64: liveins: $f12 - ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP64: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 - ; FP64: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] - ; FP64: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) - ; FP64: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP64: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP64: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] - ; FP64: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP64: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP64: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP64: $v0 = COPY [[SELECT]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 + ; FP64-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] + ; FP64-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) + ; FP64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP64-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP64-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] + ; FP64-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP64-NEXT: $v0 = COPY [[SELECT]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $f12 %1:_(s32) = G_FPTOUI %0(s32) $v0 = COPY %1(s32) @@ -379,38 +405,37 @@ body: | ; FP32-LABEL: name: f32tou16 ; FP32: liveins: $f12 - ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP32: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 - ; FP32: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] - ; FP32: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) - ; FP32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP32: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] - ; FP32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP32: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; FP32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; FP32: $v0 = COPY [[AND1]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 + ; FP32-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] + ; FP32-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) + ; FP32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP32-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] + ; FP32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; FP32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; FP32-NEXT: $v0 = COPY [[AND]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f32tou16 ; FP64: liveins: $f12 - ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP64: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 - ; FP64: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] - ; FP64: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) - ; FP64: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP64: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP64: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] - ; FP64: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP64: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP64: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP64: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; FP64: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; FP64: $v0 = COPY [[AND1]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 + ; FP64-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] + ; FP64-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) + ; FP64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP64-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP64-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] + ; FP64-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; FP64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; FP64-NEXT: $v0 = COPY [[AND]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $f12 %1:_(s16) = G_FPTOUI %0(s32) %2:_(s32) = G_ZEXT %1(s16) @@ -428,38 +453,37 @@ body: | ; FP32-LABEL: name: f32tou8 ; FP32: liveins: $f12 - ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP32: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 - ; FP32: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] - ; FP32: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) - ; FP32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP32: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] - ; FP32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP32: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; FP32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; FP32: $v0 = COPY [[AND1]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 + ; FP32-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] + ; FP32-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) + ; FP32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP32-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] + ; FP32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; FP32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; FP32-NEXT: $v0 = COPY [[AND]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f32tou8 ; FP64: liveins: $f12 - ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP64: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 - ; FP64: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] - ; FP64: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) - ; FP64: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP64: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP64: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] - ; FP64: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP64: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP64: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP64: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; FP64: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; FP64: $v0 = COPY [[AND1]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 + ; FP64-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] + ; FP64-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) + ; FP64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP64-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP64-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] + ; FP64-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; FP64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; FP64-NEXT: $v0 = COPY [[AND]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $f12 %1:_(s8) = G_FPTOUI %0(s32) %2:_(s32) = G_ZEXT %1(s8) @@ -477,28 +501,31 @@ body: | ; FP32-LABEL: name: f64tou64 ; FP32: liveins: $d6 - ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; FP32: $d6 = COPY [[COPY]](s64) - ; FP32: JAL &__fixunsdfdi, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit-def $v0, implicit-def $v1 - ; FP32: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 - ; FP32: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 - ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp - ; FP32: $v0 = COPY [[COPY1]](s32) - ; FP32: $v1 = COPY [[COPY2]](s32) - ; FP32: RetRA implicit $v0, implicit $v1 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32-NEXT: $d6 = COPY [[COPY]](s64) + ; FP32-NEXT: JAL &__fixunsdfdi, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit-def $v0, implicit-def $v1 + ; FP32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 + ; FP32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 + ; FP32-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32-NEXT: $v0 = COPY [[COPY1]](s32) + ; FP32-NEXT: $v1 = COPY [[COPY2]](s32) + ; FP32-NEXT: RetRA implicit $v0, implicit $v1 + ; ; FP64-LABEL: name: f64tou64 ; FP64: liveins: $d6 - ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; FP64: $d12_64 = COPY [[COPY]](s64) - ; FP64: JAL &__fixunsdfdi, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $d12_64, implicit-def $v0, implicit-def $v1 - ; FP64: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 - ; FP64: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 - ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp - ; FP64: $v0 = COPY [[COPY1]](s32) - ; FP64: $v1 = COPY [[COPY2]](s32) - ; FP64: RetRA implicit $v0, implicit $v1 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP64-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64-NEXT: $d12_64 = COPY [[COPY]](s64) + ; FP64-NEXT: JAL &__fixunsdfdi, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $d12_64, implicit-def $v0, implicit-def $v1 + ; FP64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 + ; FP64-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 + ; FP64-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64-NEXT: $v0 = COPY [[COPY1]](s32) + ; FP64-NEXT: $v1 = COPY [[COPY2]](s32) + ; FP64-NEXT: RetRA implicit $v0, implicit $v1 %0:_(s64) = COPY $d6 %1:_(s64) = G_FPTOUI %0(s64) %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %1(s64) @@ -517,34 +544,33 @@ body: | ; FP32-LABEL: name: f64tou32 ; FP32: liveins: $d6 - ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP32: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 - ; FP32: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] - ; FP32: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) - ; FP32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP32: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] - ; FP32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP32: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP32: $v0 = COPY [[SELECT]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 + ; FP32-NEXT: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] + ; FP32-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) + ; FP32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP32-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] + ; FP32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP32-NEXT: $v0 = COPY [[SELECT]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f64tou32 ; FP64: liveins: $d6 - ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP64: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 - ; FP64: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] - ; FP64: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) - ; FP64: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP64: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP64: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] - ; FP64: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP64: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP64: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP64: $v0 = COPY [[SELECT]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 + ; FP64-NEXT: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] + ; FP64-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) + ; FP64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP64-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP64-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] + ; FP64-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP64-NEXT: $v0 = COPY [[SELECT]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s64) = COPY $d6 %1:_(s32) = G_FPTOUI %0(s64) $v0 = COPY %1(s32) @@ -561,38 +587,37 @@ body: | ; FP32-LABEL: name: f64tou16 ; FP32: liveins: $d6 - ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP32: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 - ; FP32: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] - ; FP32: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) - ; FP32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP32: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] - ; FP32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP32: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; FP32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; FP32: $v0 = COPY [[AND1]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 + ; FP32-NEXT: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] + ; FP32-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) + ; FP32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP32-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] + ; FP32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; FP32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; FP32-NEXT: $v0 = COPY [[AND]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f64tou16 ; FP64: liveins: $d6 - ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP64: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 - ; FP64: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] - ; FP64: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) - ; FP64: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP64: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP64: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] - ; FP64: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP64: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP64: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP64: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; FP64: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; FP64: $v0 = COPY [[AND1]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 + ; FP64-NEXT: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] + ; FP64-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) + ; FP64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP64-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP64-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] + ; FP64-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; FP64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; FP64-NEXT: $v0 = COPY [[AND]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s64) = COPY $d6 %1:_(s16) = G_FPTOUI %0(s64) %2:_(s32) = G_ZEXT %1(s16) @@ -610,38 +635,37 @@ body: | ; FP32-LABEL: name: f64tou8 ; FP32: liveins: $d6 - ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP32: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 - ; FP32: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] - ; FP32: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) - ; FP32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP32: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] - ; FP32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP32: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; FP32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; FP32: $v0 = COPY [[AND1]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 + ; FP32-NEXT: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] + ; FP32-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) + ; FP32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP32-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] + ; FP32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; FP32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; FP32-NEXT: $v0 = COPY [[AND]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f64tou8 ; FP64: liveins: $d6 - ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP64: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 - ; FP64: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] - ; FP64: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) - ; FP64: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP64: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP64: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] - ; FP64: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP64: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP64: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP64: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; FP64: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; FP64: $v0 = COPY [[AND1]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 + ; FP64-NEXT: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] + ; FP64-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) + ; FP64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP64-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP64-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] + ; FP64-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; FP64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; FP64-NEXT: $v0 = COPY [[AND]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s64) = COPY $d6 %1:_(s8) = G_FPTOUI %0(s64) %2:_(s32) = G_ZEXT %1(s8) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/icmp.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/icmp.mir index c7a7a10b3f9218..3c81427231a359 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/icmp.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/icmp.mir @@ -28,11 +28,12 @@ body: | ; MIPS32-LABEL: name: ne_i32 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; MIPS32: $v0 = COPY [[ICMP]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; MIPS32-NEXT: $v0 = COPY [[ICMP]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s1) = G_ICMP intpred(ne), %0(s32), %1 @@ -51,11 +52,12 @@ body: | ; MIPS32-LABEL: name: eq_ptr ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](p0), [[COPY1]] - ; MIPS32: $v0 = COPY [[ICMP]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](p0), [[COPY1]] + ; MIPS32-NEXT: $v0 = COPY [[ICMP]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(s1) = G_ICMP intpred(eq), %0(p0), %1 @@ -74,14 +76,15 @@ body: | ; MIPS32-LABEL: name: ult_i8 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]] - ; MIPS32: $v0 = COPY [[ICMP]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; MIPS32-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]] + ; MIPS32-NEXT: $v0 = COPY [[ICMP]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -102,16 +105,17 @@ body: | ; MIPS32-LABEL: name: slt_i16 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; MIPS32: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; MIPS32: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[ASHR]](s32), [[ASHR1]] - ; MIPS32: $v0 = COPY [[ICMP]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; MIPS32-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; MIPS32-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[ASHR]](s32), [[ASHR1]] + ; MIPS32-NEXT: $v0 = COPY [[ICMP]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -132,17 +136,18 @@ body: | ; MIPS32-LABEL: name: eq_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY2]] - ; MIPS32: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY3]] - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR]], [[XOR1]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s32), [[C]] - ; MIPS32: $v0 = COPY [[ICMP]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY2]] + ; MIPS32-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY3]] + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR]], [[XOR1]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s32), [[C]] + ; MIPS32-NEXT: $v0 = COPY [[ICMP]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -165,17 +170,18 @@ body: | ; MIPS32-LABEL: name: ne_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY2]] - ; MIPS32: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY3]] - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR]], [[XOR1]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[OR]](s32), [[C]] - ; MIPS32: $v0 = COPY [[ICMP]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY2]] + ; MIPS32-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY3]] + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR]], [[XOR1]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[OR]](s32), [[C]] + ; MIPS32-NEXT: $v0 = COPY [[ICMP]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -198,18 +204,17 @@ body: | ; MIPS32-LABEL: name: sgt_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY2]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY2]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -232,18 +237,17 @@ body: | ; MIPS32-LABEL: name: sge_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sge), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY2]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sge), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY2]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -266,18 +270,17 @@ body: | ; MIPS32-LABEL: name: slt_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY2]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY2]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -300,18 +303,17 @@ body: | ; MIPS32-LABEL: name: sle_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sle), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY]](s32), [[COPY2]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sle), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY]](s32), [[COPY2]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -334,18 +336,17 @@ body: | ; MIPS32-LABEL: name: ugt_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY2]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY2]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -368,18 +369,17 @@ body: | ; MIPS32-LABEL: name: uge_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY2]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY2]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -402,18 +402,17 @@ body: | ; MIPS32-LABEL: name: ult_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY2]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY2]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -436,18 +435,17 @@ body: | ; MIPS32-LABEL: name: ule_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY]](s32), [[COPY2]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY]](s32), [[COPY2]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/jump_table_and_brjt.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/jump_table_and_brjt.mir index 0c36cf6459b6df..301fd2b9196e9c 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/jump_table_and_brjt.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/jump_table_and_brjt.mir @@ -70,69 +70,84 @@ jumpTable: body: | ; MIPS32-LABEL: name: mod4_0_to_11 ; MIPS32: bb.0.entry: - ; MIPS32: successors: %bb.6(0x40000000), %bb.1(0x40000000) - ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; MIPS32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; MIPS32: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[C6]] - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY1]](s32), [[COPY2]] - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; MIPS32: G_BRCOND [[AND]](s32), %bb.6 - ; MIPS32: bb.1.entry: - ; MIPS32: successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000) - ; MIPS32: [[JUMP_TABLE:%[0-9]+]]:_(p0) = G_JUMP_TABLE %jump-table.0 - ; MIPS32: G_BRJT [[JUMP_TABLE]](p0), %jump-table.0, [[COPY1]](s32) - ; MIPS32: bb.2.sw.bb: - ; MIPS32: $v0 = COPY [[C4]](s32) - ; MIPS32: RetRA implicit $v0 - ; MIPS32: bb.3.sw.bb1: - ; MIPS32: $v0 = COPY [[C3]](s32) - ; MIPS32: RetRA implicit $v0 - ; MIPS32: bb.4.sw.bb2: - ; MIPS32: $v0 = COPY [[C2]](s32) - ; MIPS32: RetRA implicit $v0 - ; MIPS32: bb.5.sw.bb3: - ; MIPS32: $v0 = COPY [[C1]](s32) - ; MIPS32: RetRA implicit $v0 - ; MIPS32: bb.6.sw.default: - ; MIPS32: successors: %bb.7(0x80000000) - ; MIPS32: bb.7.sw.epilog: - ; MIPS32: successors: %bb.13(0x40000000), %bb.8(0x40000000) - ; MIPS32: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; MIPS32: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[C7]] - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY3]](s32), [[COPY4]] - ; MIPS32: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C8]] - ; MIPS32: G_BRCOND [[AND1]](s32), %bb.13 - ; MIPS32: bb.8.sw.epilog: - ; MIPS32: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) - ; MIPS32: [[JUMP_TABLE1:%[0-9]+]]:_(p0) = G_JUMP_TABLE %jump-table.1 - ; MIPS32: G_BRJT [[JUMP_TABLE1]](p0), %jump-table.1, [[COPY3]](s32) - ; MIPS32: bb.9.sw.bb4: - ; MIPS32: $v0 = COPY [[C4]](s32) - ; MIPS32: RetRA implicit $v0 - ; MIPS32: bb.10.sw.bb5: - ; MIPS32: $v0 = COPY [[C3]](s32) - ; MIPS32: RetRA implicit $v0 - ; MIPS32: bb.11.sw.bb6: - ; MIPS32: $v0 = COPY [[C2]](s32) - ; MIPS32: RetRA implicit $v0 - ; MIPS32: bb.12.sw.bb7: - ; MIPS32: $v0 = COPY [[C1]](s32) - ; MIPS32: RetRA implicit $v0 - ; MIPS32: bb.13.sw.default8: - ; MIPS32: $v0 = COPY [[C5]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: successors: %bb.6(0x40000000), %bb.1(0x40000000) + ; MIPS32-NEXT: liveins: $a0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; MIPS32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; MIPS32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; MIPS32-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[C6]] + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY1]](s32), [[COPY2]] + ; MIPS32-NEXT: G_BRCOND [[ICMP]](s32), %bb.6 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.1.entry: + ; MIPS32-NEXT: successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000) + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[JUMP_TABLE:%[0-9]+]]:_(p0) = G_JUMP_TABLE %jump-table.0 + ; MIPS32-NEXT: G_BRJT [[JUMP_TABLE]](p0), %jump-table.0, [[COPY1]](s32) + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.2.sw.bb: + ; MIPS32-NEXT: $v0 = COPY [[C4]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.3.sw.bb1: + ; MIPS32-NEXT: $v0 = COPY [[C3]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.4.sw.bb2: + ; MIPS32-NEXT: $v0 = COPY [[C2]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.5.sw.bb3: + ; MIPS32-NEXT: $v0 = COPY [[C1]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.6.sw.default: + ; MIPS32-NEXT: successors: %bb.7(0x80000000) + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.7.sw.epilog: + ; MIPS32-NEXT: successors: %bb.13(0x40000000), %bb.8(0x40000000) + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; MIPS32-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[C7]] + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) + ; MIPS32-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY3]](s32), [[COPY4]] + ; MIPS32-NEXT: G_BRCOND [[ICMP1]](s32), %bb.13 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.8.sw.epilog: + ; MIPS32-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[JUMP_TABLE1:%[0-9]+]]:_(p0) = G_JUMP_TABLE %jump-table.1 + ; MIPS32-NEXT: G_BRJT [[JUMP_TABLE1]](p0), %jump-table.1, [[COPY3]](s32) + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.9.sw.bb4: + ; MIPS32-NEXT: $v0 = COPY [[C4]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.10.sw.bb5: + ; MIPS32-NEXT: $v0 = COPY [[C3]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.11.sw.bb6: + ; MIPS32-NEXT: $v0 = COPY [[C2]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.12.sw.bb7: + ; MIPS32-NEXT: $v0 = COPY [[C1]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.13.sw.default8: + ; MIPS32-NEXT: $v0 = COPY [[C5]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 bb.1.entry: liveins: $a0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir index e96d826638541b..7ad286b952cb12 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir @@ -25,11 +25,12 @@ body: | ; MIPS32-LABEL: name: mul_i32 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] - ; MIPS32: $v0 = COPY [[MUL]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] + ; MIPS32-NEXT: $v0 = COPY [[MUL]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = G_MUL %0, %1 @@ -47,14 +48,15 @@ body: | ; MIPS32-LABEL: name: mul_i8_sext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[MUL]], [[C]](s32) - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; MIPS32: $v0 = COPY [[ASHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[MUL]], [[C]](s32) + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[ASHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -75,13 +77,14 @@ body: | ; MIPS32-LABEL: name: mul_i8_zext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -102,11 +105,12 @@ body: | ; MIPS32-LABEL: name: mul_i8_aext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[MUL]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[MUL]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -127,14 +131,15 @@ body: | ; MIPS32-LABEL: name: mul_i16_sext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[MUL]], [[C]](s32) - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; MIPS32: $v0 = COPY [[ASHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[MUL]], [[C]](s32) + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[ASHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -155,13 +160,14 @@ body: | ; MIPS32-LABEL: name: mul_i16_zext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -182,11 +188,12 @@ body: | ; MIPS32-LABEL: name: mul_i16_aext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[MUL]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[MUL]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -207,19 +214,20 @@ body: | ; MIPS32-LABEL: name: mul_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY]] - ; MIPS32: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY]] - ; MIPS32: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY1]] - ; MIPS32: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY]] - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; MIPS32: $v0 = COPY [[MUL]](s32) - ; MIPS32: $v1 = COPY [[ADD1]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY]] + ; MIPS32-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY]] + ; MIPS32-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY1]] + ; MIPS32-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY]] + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; MIPS32-NEXT: $v0 = COPY [[MUL]](s32) + ; MIPS32-NEXT: $v1 = COPY [[ADD1]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -248,73 +256,66 @@ body: | ; MIPS32-LABEL: name: mul_i128 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) - ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) - ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (s32) from %fixed-stack.2, align 8) - ; MIPS32: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 - ; MIPS32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.3) - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY]] - ; MIPS32: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY]] - ; MIPS32: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY1]] - ; MIPS32: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY]] - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL2]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] - ; MIPS32: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[LOAD2]], [[COPY]] - ; MIPS32: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY1]] - ; MIPS32: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY2]] - ; MIPS32: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[LOAD1]], [[COPY]] - ; MIPS32: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY1]] - ; MIPS32: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[MUL4]] - ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C]] - ; MIPS32: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL5]] - ; MIPS32: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[MUL5]] - ; MIPS32: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP3]], [[C]] - ; MIPS32: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[AND2]], [[AND3]] - ; MIPS32: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH1]] - ; MIPS32: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD6]](s32), [[UMULH1]] - ; MIPS32: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[C]] - ; MIPS32: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[AND4]] - ; MIPS32: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH2]] - ; MIPS32: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD8]](s32), [[UMULH2]] - ; MIPS32: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ICMP5]], [[C]] - ; MIPS32: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[AND5]] - ; MIPS32: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD8]], [[ADD2]] - ; MIPS32: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD10]](s32), [[ADD2]] - ; MIPS32: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ICMP6]], [[C]] - ; MIPS32: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[AND6]] - ; MIPS32: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[LOAD3]], [[COPY]] - ; MIPS32: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[LOAD2]], [[COPY1]] - ; MIPS32: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY2]] - ; MIPS32: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY3]] - ; MIPS32: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LOAD2]], [[COPY]] - ; MIPS32: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[LOAD1]], [[COPY1]] - ; MIPS32: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY2]] - ; MIPS32: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL6]], [[MUL7]] - ; MIPS32: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL8]] - ; MIPS32: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[MUL9]] - ; MIPS32: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[UMULH3]] - ; MIPS32: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ADD15]], [[UMULH4]] - ; MIPS32: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH5]] - ; MIPS32: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ADD11]] - ; MIPS32: $v0 = COPY [[MUL]](s32) - ; MIPS32: $v1 = COPY [[ADD1]](s32) - ; MIPS32: $a0 = COPY [[ADD10]](s32) - ; MIPS32: $a1 = COPY [[ADD18]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1, implicit $a0, implicit $a1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; MIPS32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) + ; MIPS32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 + ; MIPS32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) + ; MIPS32-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 + ; MIPS32-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (s32) from %fixed-stack.2, align 8) + ; MIPS32-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 + ; MIPS32-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.3) + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY]] + ; MIPS32-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY]] + ; MIPS32-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY1]] + ; MIPS32-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY]] + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL2]] + ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]] + ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ICMP]], [[ICMP1]] + ; MIPS32-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[LOAD2]], [[COPY]] + ; MIPS32-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY1]] + ; MIPS32-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY2]] + ; MIPS32-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[LOAD1]], [[COPY]] + ; MIPS32-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY1]] + ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[MUL4]] + ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL5]] + ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[MUL5]] + ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ICMP2]], [[ICMP3]] + ; MIPS32-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH1]] + ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD6]](s32), [[UMULH1]] + ; MIPS32-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ICMP4]] + ; MIPS32-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH2]] + ; MIPS32-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD8]](s32), [[UMULH2]] + ; MIPS32-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ICMP5]] + ; MIPS32-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD8]], [[ADD2]] + ; MIPS32-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD10]](s32), [[ADD2]] + ; MIPS32-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ICMP6]] + ; MIPS32-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[LOAD3]], [[COPY]] + ; MIPS32-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[LOAD2]], [[COPY1]] + ; MIPS32-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY2]] + ; MIPS32-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY3]] + ; MIPS32-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LOAD2]], [[COPY]] + ; MIPS32-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[LOAD1]], [[COPY1]] + ; MIPS32-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY2]] + ; MIPS32-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL6]], [[MUL7]] + ; MIPS32-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL8]] + ; MIPS32-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[MUL9]] + ; MIPS32-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[UMULH3]] + ; MIPS32-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ADD15]], [[UMULH4]] + ; MIPS32-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH5]] + ; MIPS32-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ADD11]] + ; MIPS32-NEXT: $v0 = COPY [[MUL]](s32) + ; MIPS32-NEXT: $v1 = COPY [[ADD1]](s32) + ; MIPS32-NEXT: $a0 = COPY [[ADD10]](s32) + ; MIPS32-NEXT: $a1 = COPY [[ADD18]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1, implicit $a0, implicit $a1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %4:_(s32) = COPY $a2 @@ -348,40 +349,35 @@ body: | ; MIPS32-LABEL: name: umulh_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY]] - ; MIPS32: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY1]] - ; MIPS32: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY]] - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL]], [[MUL1]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL1]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] - ; MIPS32: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY1]] - ; MIPS32: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY3]], [[COPY]] - ; MIPS32: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY1]] - ; MIPS32: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL2]], [[UMULH1]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[UMULH1]] - ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C]] - ; MIPS32: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]] - ; MIPS32: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[UMULH2]] - ; MIPS32: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP3]], [[C]] - ; MIPS32: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[AND2]], [[AND3]] - ; MIPS32: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD2]] - ; MIPS32: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD6]](s32), [[ADD2]] - ; MIPS32: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[C]] - ; MIPS32: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[AND4]] - ; MIPS32: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[COPY3]], [[COPY1]] - ; MIPS32: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD7]] - ; MIPS32: $v0 = COPY [[ADD6]](s32) - ; MIPS32: $v1 = COPY [[ADD8]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY]] + ; MIPS32-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY1]] + ; MIPS32-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY]] + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL]], [[MUL1]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL1]] + ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]] + ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ICMP]], [[ICMP1]] + ; MIPS32-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY1]] + ; MIPS32-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY3]], [[COPY]] + ; MIPS32-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY1]] + ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL2]], [[UMULH1]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[UMULH1]] + ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]] + ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[UMULH2]] + ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ICMP2]], [[ICMP3]] + ; MIPS32-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD2]] + ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD6]](s32), [[ADD2]] + ; MIPS32-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ICMP4]] + ; MIPS32-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[COPY3]], [[COPY1]] + ; MIPS32-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD7]] + ; MIPS32-NEXT: $v0 = COPY [[ADD6]](s32) + ; MIPS32-NEXT: $v1 = COPY [[ADD8]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -405,20 +401,20 @@ body: | ; MIPS32-LABEL: name: umul_with_overflow ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(p0) = COPY $a3 - ; MIPS32: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C1]] - ; MIPS32: G_STORE [[AND1]](s32), [[COPY3]](p0) :: (store (s8) into %ir.pcarry_flag) - ; MIPS32: G_STORE [[MUL]](s32), [[COPY2]](p0) :: (store (s32) into %ir.pmul) - ; MIPS32: RetRA + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $a3 + ; MIPS32-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] + ; MIPS32-NEXT: G_STORE [[AND]](s32), [[COPY3]](p0) :: (store (s8) into %ir.pcarry_flag) + ; MIPS32-NEXT: G_STORE [[MUL]](s32), [[COPY2]](p0) :: (store (s32) into %ir.pmul) + ; MIPS32-NEXT: RetRA %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(p0) = COPY $a2 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/select.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/select.mir index 60b16cc1f35252..a0bf22dbffd4ba 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/select.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/select.mir @@ -22,14 +22,15 @@ body: | ; MIPS32-LABEL: name: select_i8 ; MIPS32: liveins: $a0, $a1, $a2 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %3:_(s32) = COPY $a0 %0:_(s1) = G_TRUNC %3(s32) %4:_(s32) = COPY $a1 @@ -52,14 +53,15 @@ body: | ; MIPS32-LABEL: name: select_i16 ; MIPS32: liveins: $a0, $a1, $a2 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %3:_(s32) = COPY $a0 %0:_(s1) = G_TRUNC %3(s32) %4:_(s32) = COPY $a1 @@ -82,14 +84,15 @@ body: | ; MIPS32-LABEL: name: select_i32 ; MIPS32: liveins: $a0, $a1, $a2 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %3:_(s32) = COPY $a0 %0:_(s1) = G_TRUNC %3(s32) %1:_(s32) = COPY $a1 @@ -109,14 +112,15 @@ body: | ; MIPS32-LABEL: name: select_ptr ; MIPS32: liveins: $a0, $a1, $a2 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] - ; MIPS32: $v0 = COPY [[SELECT]](p0) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](p0) + ; MIPS32-NEXT: RetRA implicit $v0 %3:_(s32) = COPY $a0 %0:_(s1) = G_TRUNC %3(s32) %1:_(p0) = COPY $a1 @@ -136,17 +140,17 @@ body: | ; MIPS32-LABEL: name: select_with_negation ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]] - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ICMP]], [[C]] - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY2]], [[COPY3]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]] + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ICMP]], [[C]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s32), [[COPY2]], [[COPY3]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = COPY $a2 @@ -172,22 +176,23 @@ body: | ; MIPS32-LABEL: name: select_i64 ; MIPS32: liveins: $a0, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) - ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) - ; MIPS32: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[MV]], [[MV1]] - ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT]](s64) - ; MIPS32: $v0 = COPY [[UV]](s32) - ; MIPS32: $v1 = COPY [[UV1]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; MIPS32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; MIPS32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) + ; MIPS32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 + ; MIPS32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) + ; MIPS32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[MV]], [[MV1]] + ; MIPS32-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT]](s64) + ; MIPS32-NEXT: $v0 = COPY [[UV]](s32) + ; MIPS32-NEXT: $v1 = COPY [[UV1]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %3:_(s32) = COPY $a0 %0:_(s1) = G_TRUNC %3(s32) %4:_(s32) = COPY $a2 @@ -215,14 +220,15 @@ body: | ; MIPS32-LABEL: name: select_float ; MIPS32: liveins: $a0, $a1, $a2 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] - ; MIPS32: $f0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $f0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] + ; MIPS32-NEXT: $f0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $f0 %3:_(s32) = COPY $a0 %0:_(s1) = G_TRUNC %3(s32) %1:_(s32) = COPY $a1 @@ -244,15 +250,16 @@ body: | ; MIPS32-LABEL: name: select_double ; MIPS32: liveins: $d6, $d7 - ; MIPS32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s64) = COPY $d7 - ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] - ; MIPS32: $d0 = COPY [[SELECT]](s64) - ; MIPS32: RetRA implicit $d0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $d7 + ; MIPS32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; MIPS32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; MIPS32-NEXT: $d0 = COPY [[SELECT]](s64) + ; MIPS32-NEXT: RetRA implicit $d0 %0:_(s64) = COPY $d6 %1:_(s64) = COPY $d7 %4:_(p0) = G_FRAME_INDEX %fixed-stack.0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub.mir index 0230eb2c07e8f6..db62b72be87d8e 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub.mir @@ -23,11 +23,12 @@ body: | ; MIPS32-LABEL: name: sub_i32 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] - ; MIPS32: $v0 = COPY [[SUB]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] + ; MIPS32-NEXT: $v0 = COPY [[SUB]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = G_SUB %0, %1 @@ -45,14 +46,15 @@ body: | ; MIPS32-LABEL: name: sub_i8_sext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SUB]], [[C]](s32) - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; MIPS32: $v0 = COPY [[ASHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SUB]], [[C]](s32) + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[ASHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -73,13 +75,14 @@ body: | ; MIPS32-LABEL: name: sub_i8_zext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -100,11 +103,12 @@ body: | ; MIPS32-LABEL: name: sub_i8_aext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[SUB]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[SUB]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -125,14 +129,15 @@ body: | ; MIPS32-LABEL: name: sub_i16_sext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SUB]], [[C]](s32) - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; MIPS32: $v0 = COPY [[ASHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SUB]], [[C]](s32) + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[ASHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -153,13 +158,14 @@ body: | ; MIPS32-LABEL: name: sub_i16_zext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -180,11 +186,12 @@ body: | ; MIPS32-LABEL: name: sub_i16_aext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[SUB]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[SUB]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -205,19 +212,18 @@ body: | ; MIPS32-LABEL: name: sub_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY3]], [[COPY1]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY3]](s32), [[COPY1]] - ; MIPS32: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; MIPS32: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND]] - ; MIPS32: $v0 = COPY [[SUB2]](s32) - ; MIPS32: $v1 = COPY [[SUB]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY3]], [[COPY1]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY3]](s32), [[COPY1]] + ; MIPS32-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[COPY]] + ; MIPS32-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[SUB2]](s32) + ; MIPS32-NEXT: $v1 = COPY [[SUB]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %3(s32), %2(s32) @@ -246,44 +252,44 @@ body: | ; MIPS32-LABEL: name: sub_i128 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) - ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) - ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (s32) from %fixed-stack.2, align 8) - ; MIPS32: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 - ; MIPS32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.3) - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[LOAD]], [[COPY]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[LOAD]](s32), [[COPY]] - ; MIPS32: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LOAD1]], [[COPY1]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[SUB1]](s32), [[LOAD1]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; MIPS32: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND]] - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND1]] - ; MIPS32: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[LOAD2]], [[COPY2]] - ; MIPS32: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[SUB3]](s32), [[LOAD2]] - ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C]] - ; MIPS32: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB3]], [[AND2]] - ; MIPS32: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB3]](s32), [[C1]] - ; MIPS32: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[OR]] - ; MIPS32: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ICMP3]], [[AND3]] - ; MIPS32: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[LOAD3]], [[COPY3]] - ; MIPS32: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C]] - ; MIPS32: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND4]] - ; MIPS32: $v0 = COPY [[SUB]](s32) - ; MIPS32: $v1 = COPY [[SUB2]](s32) - ; MIPS32: $a0 = COPY [[SUB4]](s32) - ; MIPS32: $a1 = COPY [[SUB6]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1, implicit $a0, implicit $a1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; MIPS32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) + ; MIPS32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 + ; MIPS32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) + ; MIPS32-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 + ; MIPS32-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (s32) from %fixed-stack.2, align 8) + ; MIPS32-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 + ; MIPS32-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.3) + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[LOAD]], [[COPY]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[LOAD]](s32), [[COPY]] + ; MIPS32-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LOAD1]], [[COPY1]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[SUB1]](s32), [[LOAD1]] + ; MIPS32-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[ICMP]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND]] + ; MIPS32-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[LOAD2]], [[COPY2]] + ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[SUB3]](s32), [[LOAD2]] + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C1]] + ; MIPS32-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB3]], [[AND1]] + ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB3]](s32), [[C]] + ; MIPS32-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[OR]] + ; MIPS32-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ICMP3]], [[AND2]] + ; MIPS32-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[LOAD3]], [[COPY3]] + ; MIPS32-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C1]] + ; MIPS32-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND3]] + ; MIPS32-NEXT: $v0 = COPY [[SUB]](s32) + ; MIPS32-NEXT: $v1 = COPY [[SUB2]](s32) + ; MIPS32-NEXT: $a0 = COPY [[SUB4]](s32) + ; MIPS32-NEXT: $a1 = COPY [[SUB6]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1, implicit $a0, implicit $a1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %4:_(s32) = COPY $a2 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll index 38d3f00f62db21..f76d18b53e59bb 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll @@ -89,7 +89,6 @@ define i64 @add_i64(i64 %a, i64 %b) { ; MIPS32-NEXT: addu $2, $6, $4 ; MIPS32-NEXT: sltu $3, $2, $4 ; MIPS32-NEXT: addu $1, $7, $5 -; MIPS32-NEXT: andi $3, $3, 1 ; MIPS32-NEXT: addu $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -115,8 +114,7 @@ define i128 @add_i128(i128 %a, i128 %b) { ; MIPS32-NEXT: sltu $9, $2, $8 ; MIPS32-NEXT: addu $3, $4, $3 ; MIPS32-NEXT: sltu $4, $3, $4 -; MIPS32-NEXT: andi $8, $9, 1 -; MIPS32-NEXT: addu $3, $3, $8 +; MIPS32-NEXT: addu $3, $3, $9 ; MIPS32-NEXT: sltiu $8, $3, 1 ; MIPS32-NEXT: and $8, $8, $9 ; MIPS32-NEXT: or $8, $4, $8 @@ -178,7 +176,6 @@ define void @uadd_with_overflow(i32 %lhs, i32 %rhs, ptr %padd, ptr %pcarry_flag) ; MIPS32-NEXT: addu $1, $4, $5 ; MIPS32-NEXT: sltu $2, $1, $5 ; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: andi $2, $2, 1 ; MIPS32-NEXT: sb $2, 0($7) ; MIPS32-NEXT: sw $1, 0($6) ; MIPS32-NEXT: jr $ra diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll index 26816482306a9a..e572551ef4b2a0 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll @@ -306,8 +306,7 @@ entry: define i16 @shl_i16(i16 %a) { ; MIPS32-LABEL: shl_i16: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: ori $1, $zero, 2 -; MIPS32-NEXT: sllv $2, $4, $1 +; MIPS32-NEXT: sll $2, $4, 2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -318,10 +317,9 @@ entry: define i8 @ashr_i8(i8 %a) { ; MIPS32-LABEL: ashr_i8: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: ori $2, $zero, 2 ; MIPS32-NEXT: sll $1, $4, 24 ; MIPS32-NEXT: sra $1, $1, 24 -; MIPS32-NEXT: srav $2, $1, $2 +; MIPS32-NEXT: sra $2, $1, 2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -332,9 +330,8 @@ entry: define i16 @lshr_i16(i16 %a) { ; MIPS32-LABEL: lshr_i16: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: ori $2, $zero, 2 ; MIPS32-NEXT: andi $1, $4, 65535 -; MIPS32-NEXT: srlv $2, $1, $2 +; MIPS32-NEXT: srl $2, $1, 2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -346,24 +343,19 @@ define i64 @shl_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: shl_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: move $3, $4 -; MIPS32-NEXT: move $9, $6 -; MIPS32-NEXT: ori $1, $zero, 32 -; MIPS32-NEXT: subu $8, $9, $1 -; MIPS32-NEXT: subu $4, $1, $9 +; MIPS32-NEXT: ori $4, $zero, 32 +; MIPS32-NEXT: subu $8, $6, $4 +; MIPS32-NEXT: subu $1, $4, $6 ; MIPS32-NEXT: ori $2, $zero, 0 -; MIPS32-NEXT: sltu $6, $9, $1 -; MIPS32-NEXT: sltiu $1, $9, 1 -; MIPS32-NEXT: sllv $7, $3, $9 -; MIPS32-NEXT: srlv $4, $3, $4 -; MIPS32-NEXT: sllv $9, $5, $9 -; MIPS32-NEXT: or $4, $4, $9 +; MIPS32-NEXT: sltu $4, $6, $4 +; MIPS32-NEXT: sllv $7, $3, $6 +; MIPS32-NEXT: srlv $1, $3, $1 +; MIPS32-NEXT: sllv $9, $5, $6 +; MIPS32-NEXT: or $1, $1, $9 ; MIPS32-NEXT: sllv $3, $3, $8 -; MIPS32-NEXT: andi $8, $6, 1 -; MIPS32-NEXT: movn $2, $7, $8 -; MIPS32-NEXT: andi $6, $6, 1 -; MIPS32-NEXT: movn $3, $4, $6 -; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: movn $3, $5, $1 +; MIPS32-NEXT: movn $2, $7, $4 +; MIPS32-NEXT: movn $3, $1, $4 +; MIPS32-NEXT: movz $3, $5, $6 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -379,23 +371,18 @@ define i64 @ashl_i64(i64 %a, i64 %b) { ; MIPS32-NEXT: sw $4, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: move $2, $5 ; MIPS32-NEXT: lw $5, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: move $3, $6 ; MIPS32-NEXT: ori $1, $zero, 32 -; MIPS32-NEXT: subu $8, $3, $1 -; MIPS32-NEXT: subu $7, $1, $3 -; MIPS32-NEXT: sltu $4, $3, $1 -; MIPS32-NEXT: sltiu $6, $3, 1 -; MIPS32-NEXT: srav $1, $2, $3 -; MIPS32-NEXT: srlv $3, $5, $3 +; MIPS32-NEXT: subu $8, $6, $1 +; MIPS32-NEXT: subu $7, $1, $6 +; MIPS32-NEXT: sltu $4, $6, $1 +; MIPS32-NEXT: srav $1, $2, $6 +; MIPS32-NEXT: srlv $3, $5, $6 ; MIPS32-NEXT: sllv $7, $2, $7 ; MIPS32-NEXT: or $7, $3, $7 ; MIPS32-NEXT: sra $3, $2, 31 ; MIPS32-NEXT: srav $2, $2, $8 -; MIPS32-NEXT: andi $8, $4, 1 -; MIPS32-NEXT: movn $2, $7, $8 -; MIPS32-NEXT: andi $6, $6, 1 -; MIPS32-NEXT: movn $2, $5, $6 -; MIPS32-NEXT: andi $4, $4, 1 +; MIPS32-NEXT: movn $2, $7, $4 +; MIPS32-NEXT: movz $2, $5, $6 ; MIPS32-NEXT: movn $3, $1, $4 ; MIPS32-NEXT: addiu $sp, $sp, 8 ; MIPS32-NEXT: jr $ra @@ -413,23 +400,18 @@ define i64 @lshr_i64(i64 %a, i64 %b) { ; MIPS32-NEXT: sw $4, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: move $2, $5 ; MIPS32-NEXT: lw $5, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: move $7, $6 ; MIPS32-NEXT: ori $1, $zero, 32 -; MIPS32-NEXT: subu $8, $7, $1 -; MIPS32-NEXT: subu $9, $1, $7 +; MIPS32-NEXT: subu $8, $6, $1 +; MIPS32-NEXT: subu $9, $1, $6 ; MIPS32-NEXT: ori $3, $zero, 0 -; MIPS32-NEXT: sltu $4, $7, $1 -; MIPS32-NEXT: sltiu $6, $7, 1 -; MIPS32-NEXT: srlv $1, $2, $7 -; MIPS32-NEXT: srlv $7, $5, $7 +; MIPS32-NEXT: sltu $4, $6, $1 +; MIPS32-NEXT: srlv $1, $2, $6 +; MIPS32-NEXT: srlv $7, $5, $6 ; MIPS32-NEXT: sllv $9, $2, $9 ; MIPS32-NEXT: or $7, $7, $9 ; MIPS32-NEXT: srlv $2, $2, $8 -; MIPS32-NEXT: andi $8, $4, 1 -; MIPS32-NEXT: movn $2, $7, $8 -; MIPS32-NEXT: andi $6, $6, 1 -; MIPS32-NEXT: movn $2, $5, $6 -; MIPS32-NEXT: andi $4, $4, 1 +; MIPS32-NEXT: movn $2, $7, $4 +; MIPS32-NEXT: movz $2, $5, $6 ; MIPS32-NEXT: movn $3, $1, $4 ; MIPS32-NEXT: addiu $sp, $sp, 8 ; MIPS32-NEXT: jr $ra diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll index 65fce9d4f5d591..542a585b50e3dc 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll @@ -17,14 +17,11 @@ declare i32 @llvm.ctlz.i32(i32, i1 immarg) define i64 @ctlz_i64(i64 %a) { ; MIPS32-LABEL: ctlz_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: move $1, $4 ; MIPS32-NEXT: ori $3, $zero, 0 -; MIPS32-NEXT: sltiu $4, $5, 1 -; MIPS32-NEXT: clz $1, $1 +; MIPS32-NEXT: clz $1, $4 ; MIPS32-NEXT: addiu $1, $1, 32 ; MIPS32-NEXT: clz $2, $5 -; MIPS32-NEXT: andi $4, $4, 1 -; MIPS32-NEXT: movn $2, $1, $4 +; MIPS32-NEXT: movz $2, $1, $5 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll index 44a2e619f71569..6bc4c625d4010f 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll @@ -21,9 +21,7 @@ declare i32 @llvm.cttz.i32(i32, i1 immarg) define i64 @cttz_i64(i64 %a) { ; MIPS32-LABEL: cttz_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: move $6, $4 ; MIPS32-NEXT: ori $3, $zero, 0 -; MIPS32-NEXT: sltiu $4, $6, 1 ; MIPS32-NEXT: not $1, $5 ; MIPS32-NEXT: addiu $2, $5, -1 ; MIPS32-NEXT: and $1, $1, $2 @@ -31,13 +29,12 @@ define i64 @cttz_i64(i64 %a) { ; MIPS32-NEXT: clz $1, $1 ; MIPS32-NEXT: subu $1, $2, $1 ; MIPS32-NEXT: addiu $1, $1, 32 -; MIPS32-NEXT: not $5, $6 -; MIPS32-NEXT: addiu $6, $6, -1 +; MIPS32-NEXT: not $5, $4 +; MIPS32-NEXT: addiu $6, $4, -1 ; MIPS32-NEXT: and $5, $5, $6 ; MIPS32-NEXT: clz $5, $5 ; MIPS32-NEXT: subu $2, $2, $5 -; MIPS32-NEXT: andi $4, $4, 1 -; MIPS32-NEXT: movn $2, $1, $4 +; MIPS32-NEXT: movz $2, $1, $4 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -58,9 +55,7 @@ define i32 @ffs_i32_expansion(i32 %a) { ; MIPS32-NEXT: clz $3, $3 ; MIPS32-NEXT: subu $2, $2, $3 ; MIPS32-NEXT: addiu $2, $2, 1 -; MIPS32-NEXT: sltiu $3, $4, 1 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 +; MIPS32-NEXT: movz $2, $1, $4 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -76,7 +71,6 @@ define i64 @ffs_i64_expansion(i64 %a) { ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: ori $3, $zero, 1 ; MIPS32-NEXT: ori $1, $zero, 0 -; MIPS32-NEXT: sltiu $7, $4, 1 ; MIPS32-NEXT: not $2, $5 ; MIPS32-NEXT: addiu $6, $5, -1 ; MIPS32-NEXT: and $6, $2, $6 @@ -84,25 +78,21 @@ define i64 @ffs_i64_expansion(i64 %a) { ; MIPS32-NEXT: clz $6, $6 ; MIPS32-NEXT: subu $6, $2, $6 ; MIPS32-NEXT: addiu $6, $6, 32 -; MIPS32-NEXT: not $8, $4 -; MIPS32-NEXT: addiu $9, $4, -1 -; MIPS32-NEXT: and $8, $8, $9 -; MIPS32-NEXT: clz $8, $8 -; MIPS32-NEXT: subu $2, $2, $8 -; MIPS32-NEXT: andi $7, $7, 1 -; MIPS32-NEXT: movn $2, $6, $7 +; MIPS32-NEXT: not $7, $4 +; MIPS32-NEXT: addiu $8, $4, -1 +; MIPS32-NEXT: and $7, $7, $8 +; MIPS32-NEXT: clz $7, $7 +; MIPS32-NEXT: subu $2, $2, $7 +; MIPS32-NEXT: movz $2, $6, $4 ; MIPS32-NEXT: addiu $2, $2, 1 ; MIPS32-NEXT: sltu $6, $2, $3 ; MIPS32-NEXT: addiu $3, $1, 0 -; MIPS32-NEXT: andi $6, $6, 1 ; MIPS32-NEXT: addu $3, $3, $6 ; MIPS32-NEXT: xori $4, $4, 0 ; MIPS32-NEXT: xori $5, $5, 0 ; MIPS32-NEXT: or $4, $4, $5 -; MIPS32-NEXT: sltiu $4, $4, 1 -; MIPS32-NEXT: andi $4, $4, 1 -; MIPS32-NEXT: movn $2, $1, $4 -; MIPS32-NEXT: movn $3, $1, $4 +; MIPS32-NEXT: movz $2, $1, $4 +; MIPS32-NEXT: movz $3, $1, $4 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll index e9cc0b933f7197..823e0ecb6e9417 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll @@ -151,7 +151,6 @@ define i32 @f32tou32(float %a) { ; MIPS32-NEXT: addiu $3, $zero, 1 ; MIPS32-NEXT: c.ult.s $f12, $f0 ; MIPS32-NEXT: movf $3, $zero, $fcc0 -; MIPS32-NEXT: andi $3, $3, 1 ; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -175,7 +174,6 @@ define zeroext i16 @f32tou16(float %a) { ; MIPS32-NEXT: addiu $3, $zero, 1 ; MIPS32-NEXT: c.ult.s $f12, $f0 ; MIPS32-NEXT: movf $3, $zero, $fcc0 -; MIPS32-NEXT: andi $3, $3, 1 ; MIPS32-NEXT: movn $1, $2, $3 ; MIPS32-NEXT: andi $2, $1, 65535 ; MIPS32-NEXT: jr $ra @@ -200,7 +198,6 @@ define zeroext i8 @f32tou8(float %a) { ; MIPS32-NEXT: addiu $3, $zero, 1 ; MIPS32-NEXT: c.ult.s $f12, $f0 ; MIPS32-NEXT: movf $3, $zero, $fcc0 -; MIPS32-NEXT: andi $3, $3, 1 ; MIPS32-NEXT: movn $1, $2, $3 ; MIPS32-NEXT: andi $2, $1, 255 ; MIPS32-NEXT: jr $ra @@ -245,7 +242,6 @@ define i32 @f64tou32(double %a) { ; FP32-NEXT: addiu $3, $zero, 1 ; FP32-NEXT: c.ult.d $f12, $f0 ; FP32-NEXT: movf $3, $zero, $fcc0 -; FP32-NEXT: andi $3, $3, 1 ; FP32-NEXT: movn $2, $1, $3 ; FP32-NEXT: jr $ra ; FP32-NEXT: nop @@ -266,7 +262,6 @@ define i32 @f64tou32(double %a) { ; FP64-NEXT: addiu $3, $zero, 1 ; FP64-NEXT: c.ult.d $f12, $f0 ; FP64-NEXT: movf $3, $zero, $fcc0 -; FP64-NEXT: andi $3, $3, 1 ; FP64-NEXT: movn $2, $1, $3 ; FP64-NEXT: jr $ra ; FP64-NEXT: nop @@ -292,7 +287,6 @@ define zeroext i16 @f64tou16(double %a) { ; FP32-NEXT: addiu $3, $zero, 1 ; FP32-NEXT: c.ult.d $f12, $f0 ; FP32-NEXT: movf $3, $zero, $fcc0 -; FP32-NEXT: andi $3, $3, 1 ; FP32-NEXT: movn $1, $2, $3 ; FP32-NEXT: andi $2, $1, 65535 ; FP32-NEXT: jr $ra @@ -314,7 +308,6 @@ define zeroext i16 @f64tou16(double %a) { ; FP64-NEXT: addiu $3, $zero, 1 ; FP64-NEXT: c.ult.d $f12, $f0 ; FP64-NEXT: movf $3, $zero, $fcc0 -; FP64-NEXT: andi $3, $3, 1 ; FP64-NEXT: movn $1, $2, $3 ; FP64-NEXT: andi $2, $1, 65535 ; FP64-NEXT: jr $ra @@ -341,7 +334,6 @@ define zeroext i8 @f64tou8(double %a) { ; FP32-NEXT: addiu $3, $zero, 1 ; FP32-NEXT: c.ult.d $f12, $f0 ; FP32-NEXT: movf $3, $zero, $fcc0 -; FP32-NEXT: andi $3, $3, 1 ; FP32-NEXT: movn $1, $2, $3 ; FP32-NEXT: andi $2, $1, 255 ; FP32-NEXT: jr $ra @@ -363,7 +355,6 @@ define zeroext i8 @f64tou8(double %a) { ; FP64-NEXT: addiu $3, $zero, 1 ; FP64-NEXT: c.ult.d $f12, $f0 ; FP64-NEXT: movf $3, $zero, $fcc0 -; FP64-NEXT: andi $3, $3, 1 ; FP64-NEXT: movn $1, $2, $3 ; FP64-NEXT: andi $2, $1, 255 ; FP64-NEXT: jr $ra diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll index ab474e1f8e7223..49f2a78683b0a5 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll @@ -189,11 +189,9 @@ define i1 @sgt_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: sgt_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: slt $2, $7, $5 -; MIPS32-NEXT: xor $1, $5, $7 -; MIPS32-NEXT: sltiu $3, $1, 1 ; MIPS32-NEXT: sltu $1, $6, $4 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 +; MIPS32-NEXT: xor $3, $5, $7 +; MIPS32-NEXT: movz $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -206,12 +204,10 @@ define i1 @sge_i64(i64 %a, i64 %b) { ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: slt $1, $5, $7 ; MIPS32-NEXT: xori $2, $1, 1 -; MIPS32-NEXT: xor $1, $5, $7 -; MIPS32-NEXT: sltiu $3, $1, 1 ; MIPS32-NEXT: sltu $1, $4, $6 ; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 +; MIPS32-NEXT: xor $3, $5, $7 +; MIPS32-NEXT: movz $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -223,11 +219,9 @@ define i1 @slt_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: slt_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: slt $2, $5, $7 -; MIPS32-NEXT: xor $1, $5, $7 -; MIPS32-NEXT: sltiu $3, $1, 1 ; MIPS32-NEXT: sltu $1, $4, $6 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 +; MIPS32-NEXT: xor $3, $5, $7 +; MIPS32-NEXT: movz $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -240,12 +234,10 @@ define i1 @sle_i64(i64 %a, i64 %b) { ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: slt $1, $7, $5 ; MIPS32-NEXT: xori $2, $1, 1 -; MIPS32-NEXT: xor $1, $5, $7 -; MIPS32-NEXT: sltiu $3, $1, 1 ; MIPS32-NEXT: sltu $1, $6, $4 ; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 +; MIPS32-NEXT: xor $3, $5, $7 +; MIPS32-NEXT: movz $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -257,11 +249,9 @@ define i1 @ugt_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: ugt_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sltu $2, $7, $5 -; MIPS32-NEXT: xor $1, $5, $7 -; MIPS32-NEXT: sltiu $3, $1, 1 ; MIPS32-NEXT: sltu $1, $6, $4 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 +; MIPS32-NEXT: xor $3, $5, $7 +; MIPS32-NEXT: movz $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -274,12 +264,10 @@ define i1 @uge_i64(i64 %a, i64 %b) { ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sltu $1, $5, $7 ; MIPS32-NEXT: xori $2, $1, 1 -; MIPS32-NEXT: xor $1, $5, $7 -; MIPS32-NEXT: sltiu $3, $1, 1 ; MIPS32-NEXT: sltu $1, $4, $6 ; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 +; MIPS32-NEXT: xor $3, $5, $7 +; MIPS32-NEXT: movz $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -291,11 +279,9 @@ define i1 @ult_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: ult_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sltu $2, $5, $7 -; MIPS32-NEXT: xor $1, $5, $7 -; MIPS32-NEXT: sltiu $3, $1, 1 ; MIPS32-NEXT: sltu $1, $4, $6 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 +; MIPS32-NEXT: xor $3, $5, $7 +; MIPS32-NEXT: movz $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -308,12 +294,10 @@ define i1 @ule_i64(i64 %a, i64 %b) { ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sltu $1, $7, $5 ; MIPS32-NEXT: xori $2, $1, 1 -; MIPS32-NEXT: xor $1, $5, $7 -; MIPS32-NEXT: sltiu $3, $1, 1 ; MIPS32-NEXT: sltu $1, $6, $4 ; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 +; MIPS32-NEXT: xor $3, $5, $7 +; MIPS32-NEXT: movz $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll index 30a1b514ddafe5..4c10fedaa4a884 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll @@ -23,7 +23,6 @@ define i32 @mod4_0_to_11(i32 %a) { ; MIPS32-NEXT: subu $2, $4, $2 ; MIPS32-NEXT: sw $2, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sltu $1, $1, $2 -; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: bnez $1, $BB0_6 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_1: # %entry @@ -64,7 +63,6 @@ define i32 @mod4_0_to_11(i32 %a) { ; MIPS32-NEXT: subu $2, $2, $3 ; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sltu $1, $1, $2 -; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: bnez $1, $BB0_13 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_8: # %sw.epilog @@ -125,7 +123,6 @@ define i32 @mod4_0_to_11(i32 %a) { ; MIPS32_PIC-NEXT: subu $2, $4, $2 ; MIPS32_PIC-NEXT: sw $2, 36($sp) # 4-byte Folded Spill ; MIPS32_PIC-NEXT: sltu $1, $1, $2 -; MIPS32_PIC-NEXT: andi $1, $1, 1 ; MIPS32_PIC-NEXT: bnez $1, $BB0_6 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_1: # %entry @@ -168,7 +165,6 @@ define i32 @mod4_0_to_11(i32 %a) { ; MIPS32_PIC-NEXT: subu $2, $2, $3 ; MIPS32_PIC-NEXT: sw $2, 4($sp) # 4-byte Folded Spill ; MIPS32_PIC-NEXT: sltu $1, $1, $2 -; MIPS32_PIC-NEXT: andi $1, $1, 1 ; MIPS32_PIC-NEXT: bnez $1, $BB0_13 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_8: # %sw.epilog diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll index 44266f84379e10..c8c66fabf202bf 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll @@ -123,10 +123,8 @@ define i128 @mul_i128(i128 %a, i128 %b) { ; MIPS32-NEXT: mfhi $5 ; MIPS32-NEXT: addu $3, $3, $4 ; MIPS32-NEXT: sltu $4, $3, $4 -; MIPS32-NEXT: andi $4, $4, 1 ; MIPS32-NEXT: addu $3, $3, $5 ; MIPS32-NEXT: sltu $5, $3, $5 -; MIPS32-NEXT: andi $5, $5, 1 ; MIPS32-NEXT: addu $10, $4, $5 ; MIPS32-NEXT: mul $4, $8, $14 ; MIPS32-NEXT: mul $5, $7, $13 @@ -137,22 +135,17 @@ define i128 @mul_i128(i128 %a, i128 %b) { ; MIPS32-NEXT: mfhi $11 ; MIPS32-NEXT: addu $4, $4, $5 ; MIPS32-NEXT: sltu $5, $4, $5 -; MIPS32-NEXT: andi $5, $5, 1 ; MIPS32-NEXT: addu $4, $4, $24 ; MIPS32-NEXT: sltu $24, $4, $24 -; MIPS32-NEXT: andi $24, $24, 1 ; MIPS32-NEXT: addu $5, $5, $24 ; MIPS32-NEXT: addu $4, $4, $15 ; MIPS32-NEXT: sltu $15, $4, $15 -; MIPS32-NEXT: andi $15, $15, 1 ; MIPS32-NEXT: addu $5, $5, $15 ; MIPS32-NEXT: addu $4, $4, $11 ; MIPS32-NEXT: sltu $11, $4, $11 -; MIPS32-NEXT: andi $11, $11, 1 ; MIPS32-NEXT: addu $5, $5, $11 ; MIPS32-NEXT: addu $4, $4, $10 ; MIPS32-NEXT: sltu $10, $4, $10 -; MIPS32-NEXT: andi $10, $10, 1 ; MIPS32-NEXT: addu $5, $5, $10 ; MIPS32-NEXT: mul $1, $1, $14 ; MIPS32-NEXT: mul $11, $8, $13 @@ -187,7 +180,6 @@ define void @umul_with_overflow(i32 %lhs, i32 %rhs, ptr %pmul, ptr %pcarry_flag) ; MIPS32-NEXT: mul $1, $4, $5 ; MIPS32-NEXT: sltu $2, $zero, $2 ; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: andi $2, $2, 1 ; MIPS32-NEXT: sb $2, 0($7) ; MIPS32-NEXT: sw $1, 0($6) ; MIPS32-NEXT: jr $ra diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll index 30076a5ec7187a..7aba3bc18f63f3 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll @@ -59,7 +59,6 @@ define i32 @select_with_negation(i32 %a, i32 %b, i32 %x, i32 %y) { ; MIPS32-NEXT: move $2, $7 ; MIPS32-NEXT: slt $1, $4, $5 ; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: movn $2, $6, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll index 07d094604684d9..b7f42c2912132a 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll @@ -173,10 +173,9 @@ entry: define float @u16tof32(i16 zeroext %a) { ; FP32-LABEL: u16tof32: ; FP32: # %bb.0: # %entry -; FP32-NEXT: andi $1, $4, 65535 -; FP32-NEXT: lui $2, 17200 -; FP32-NEXT: mtc1 $1, $f0 -; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: lui $1, 17200 +; FP32-NEXT: mtc1 $4, $f0 +; FP32-NEXT: mtc1 $1, $f1 ; FP32-NEXT: lui $2, 17200 ; FP32-NEXT: ori $1, $zero, 0 ; FP32-NEXT: mtc1 $1, $f2 @@ -188,10 +187,9 @@ define float @u16tof32(i16 zeroext %a) { ; ; FP64-LABEL: u16tof32: ; FP64: # %bb.0: # %entry -; FP64-NEXT: andi $1, $4, 65535 -; FP64-NEXT: lui $2, 17200 -; FP64-NEXT: mtc1 $1, $f0 -; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: lui $1, 17200 +; FP64-NEXT: mtc1 $4, $f0 +; FP64-NEXT: mthc1 $1, $f0 ; FP64-NEXT: lui $2, 17200 ; FP64-NEXT: ori $1, $zero, 0 ; FP64-NEXT: mtc1 $1, $f1 @@ -208,10 +206,9 @@ entry: define float @u8tof32(i8 zeroext %a) { ; FP32-LABEL: u8tof32: ; FP32: # %bb.0: # %entry -; FP32-NEXT: andi $1, $4, 255 -; FP32-NEXT: lui $2, 17200 -; FP32-NEXT: mtc1 $1, $f0 -; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: lui $1, 17200 +; FP32-NEXT: mtc1 $4, $f0 +; FP32-NEXT: mtc1 $1, $f1 ; FP32-NEXT: lui $2, 17200 ; FP32-NEXT: ori $1, $zero, 0 ; FP32-NEXT: mtc1 $1, $f2 @@ -223,10 +220,9 @@ define float @u8tof32(i8 zeroext %a) { ; ; FP64-LABEL: u8tof32: ; FP64: # %bb.0: # %entry -; FP64-NEXT: andi $1, $4, 255 -; FP64-NEXT: lui $2, 17200 -; FP64-NEXT: mtc1 $1, $f0 -; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: lui $1, 17200 +; FP64-NEXT: mtc1 $4, $f0 +; FP64-NEXT: mthc1 $1, $f0 ; FP64-NEXT: lui $2, 17200 ; FP64-NEXT: ori $1, $zero, 0 ; FP64-NEXT: mtc1 $1, $f1 @@ -292,10 +288,9 @@ entry: define double @u16tof64(i16 zeroext %a) { ; FP32-LABEL: u16tof64: ; FP32: # %bb.0: # %entry -; FP32-NEXT: andi $1, $4, 65535 -; FP32-NEXT: lui $2, 17200 -; FP32-NEXT: mtc1 $1, $f0 -; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: lui $1, 17200 +; FP32-NEXT: mtc1 $4, $f0 +; FP32-NEXT: mtc1 $1, $f1 ; FP32-NEXT: lui $2, 17200 ; FP32-NEXT: ori $1, $zero, 0 ; FP32-NEXT: mtc1 $1, $f2 @@ -306,10 +301,9 @@ define double @u16tof64(i16 zeroext %a) { ; ; FP64-LABEL: u16tof64: ; FP64: # %bb.0: # %entry -; FP64-NEXT: andi $1, $4, 65535 -; FP64-NEXT: lui $2, 17200 -; FP64-NEXT: mtc1 $1, $f0 -; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: lui $1, 17200 +; FP64-NEXT: mtc1 $4, $f0 +; FP64-NEXT: mthc1 $1, $f0 ; FP64-NEXT: lui $2, 17200 ; FP64-NEXT: ori $1, $zero, 0 ; FP64-NEXT: mtc1 $1, $f1 @@ -325,10 +319,9 @@ entry: define double @u8tof64(i8 zeroext %a) { ; FP32-LABEL: u8tof64: ; FP32: # %bb.0: # %entry -; FP32-NEXT: andi $1, $4, 255 -; FP32-NEXT: lui $2, 17200 -; FP32-NEXT: mtc1 $1, $f0 -; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: lui $1, 17200 +; FP32-NEXT: mtc1 $4, $f0 +; FP32-NEXT: mtc1 $1, $f1 ; FP32-NEXT: lui $2, 17200 ; FP32-NEXT: ori $1, $zero, 0 ; FP32-NEXT: mtc1 $1, $f2 @@ -339,10 +332,9 @@ define double @u8tof64(i8 zeroext %a) { ; ; FP64-LABEL: u8tof64: ; FP64: # %bb.0: # %entry -; FP64-NEXT: andi $1, $4, 255 -; FP64-NEXT: lui $2, 17200 -; FP64-NEXT: mtc1 $1, $f0 -; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: lui $1, 17200 +; FP64-NEXT: mtc1 $4, $f0 +; FP64-NEXT: mthc1 $1, $f0 ; FP64-NEXT: lui $2, 17200 ; FP64-NEXT: ori $1, $zero, 0 ; FP64-NEXT: mtc1 $1, $f1 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll index 87df1d20349002..e9ada30d43825d 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll @@ -90,7 +90,6 @@ define i64 @sub_i64(i64 %a, i64 %b) { ; MIPS32-NEXT: subu $2, $6, $4 ; MIPS32-NEXT: sltu $3, $6, $4 ; MIPS32-NEXT: subu $1, $7, $5 -; MIPS32-NEXT: andi $3, $3, 1 ; MIPS32-NEXT: subu $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -115,8 +114,7 @@ define i128 @sub_i128(i128 %a, i128 %b) { ; MIPS32-NEXT: sltu $9, $8, $9 ; MIPS32-NEXT: subu $8, $3, $5 ; MIPS32-NEXT: sltu $5, $3, $8 -; MIPS32-NEXT: andi $3, $9, 1 -; MIPS32-NEXT: subu $3, $8, $3 +; MIPS32-NEXT: subu $3, $8, $9 ; MIPS32-NEXT: sltiu $8, $8, 1 ; MIPS32-NEXT: and $8, $8, $9 ; MIPS32-NEXT: or $8, $5, $8 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-abs.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-abs.mir index ca4d5a526d474b..71817809b399df 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-abs.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-abs.mir @@ -84,9 +84,7 @@ body: | ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[ASHR]] ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[AND]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD2]], [[ASHR1]] ; CHECK-NEXT: $x10 = COPY [[XOR]](s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-add.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-add.mir index 097f96a4f0566f..d169eb316dfcb7 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-add.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-add.mir @@ -90,9 +90,7 @@ body: | ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD %xlo, %ylo ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), %ylo ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD %xhi, %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[AND]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[ADD]](s32) ; CHECK-NEXT: $x11 = COPY [[ADD2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -124,9 +122,7 @@ body: | ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD %lo1, %lo2 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), %lo2 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD %hi1, %hi2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[AND]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[ADD]](s32) ; CHECK-NEXT: $x11 = COPY [[ADD2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -158,17 +154,13 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), %lo2 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD %mid1, %mid2 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), %mid1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[AND]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD2]](s32), [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD2]](s32), [[C]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND]] ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD %hi1, %hi2 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C2]] - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[AND2]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[OR]] ; CHECK-NEXT: $x10 = COPY [[ADD]](s32) ; CHECK-NEXT: $x11 = COPY [[ADD2]](s32) ; CHECK-NEXT: $x12 = COPY [[ADD4]](s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-addo-subo.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-addo-subo.mir index f831e50126ca94..c348ec6f73ad3d 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-addo-subo.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-addo-subo.mir @@ -120,23 +120,17 @@ body: | ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY2]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[COPY2]] ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY3]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[AND]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[ADD2]](s32), [[COPY1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD2]](s32), [[COPY1]] ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[COPY]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[ICMP3]], [[ICMP1]] - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY3]](s32), [[C2]] - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY3]](s32), [[C2]] - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP5]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ICMP6]], [[ICMP4]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s32), [[ICMP3]], [[ICMP1]] + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY3]](s32), [[C1]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY3]](s32), [[C1]] + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s32), [[ICMP6]], [[ICMP4]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[SELECT]] ; CHECK-NEXT: $x10 = COPY [[ADD]](s32) ; CHECK-NEXT: $x11 = COPY [[ADD2]](s32) @@ -275,23 +269,17 @@ body: | ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY2]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY2]] ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY3]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[ICMP]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[SUB2]](s32), [[COPY1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB2]](s32), [[COPY1]] ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB]](s32), [[COPY]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[ICMP3]], [[ICMP1]] - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY3]](s32), [[C2]] - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY3]](s32), [[C2]] - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP5]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ICMP6]], [[ICMP4]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s32), [[ICMP3]], [[ICMP1]] + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY3]](s32), [[C1]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY3]](s32), [[C1]] + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY2]](s32), [[C]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s32), [[ICMP6]], [[ICMP4]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[SELECT]] ; CHECK-NEXT: $x10 = COPY [[SUB]](s32) ; CHECK-NEXT: $x11 = COPY [[SUB2]](s32) @@ -417,15 +405,11 @@ body: | ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY2]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[COPY2]] ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY3]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[AND]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD2]](s32), [[COPY3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD2]](s32), [[COPY3]] ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[COPY2]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[ICMP3]], [[ICMP1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s32), [[ICMP3]], [[ICMP1]] ; CHECK-NEXT: $x10 = COPY [[ADD]](s32) ; CHECK-NEXT: $x11 = COPY [[ADD2]](s32) ; CHECK-NEXT: $x12 = COPY [[SELECT]](s32) @@ -550,15 +534,11 @@ body: | ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY2]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY2]] ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY3]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[ICMP]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY1]](s32), [[COPY3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[ICMP3]], [[ICMP1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s32), [[ICMP3]], [[ICMP1]] ; CHECK-NEXT: $x10 = COPY [[SUB]](s32) ; CHECK-NEXT: $x11 = COPY [[SUB2]](s32) ; CHECK-NEXT: $x12 = COPY [[SELECT]](s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-ashr.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-ashr.mir index 468077e51c3bfc..e24c7ad5391337 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-ashr.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-ashr.mir @@ -117,15 +117,9 @@ body: | ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[C3]](s32) ; CHECK-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[SUB]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[OR]], [[ASHR3]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C5]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), %xlo, [[SELECT]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C6]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ASHR1]], [[ASHR2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR]], [[ASHR3]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %xlo, [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ASHR1]], [[ASHR2]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -166,15 +160,9 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR %hi1, [[C2]](s32) ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR %hi1, [[SUB]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[OR]], [[ASHR2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), %lo1, [[SELECT]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C5]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ASHR]], [[ASHR1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR]], [[ASHR2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %lo1, [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ASHR]], [[ASHR1]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -218,95 +206,63 @@ body: | ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ASHR]], [[SUB3]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[SUB2]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C5]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[OR]], [[ASHR1]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP3]], [[C6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), %hi1, [[SELECT]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB %lo2, [[C7]] - ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C7]], %lo2 - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %lo2(s32), [[C7]] - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %lo2(s32), [[C8]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s32), [[OR]], [[ASHR1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s32), %hi1, [[SELECT]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB %lo2, [[C5]] + ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C5]], %lo2 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %lo2(s32), [[C5]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %lo2(s32), [[C6]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR %mid1, %lo2(s32) ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR %lo1, %lo2(s32) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL %mid1, [[SUB5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL1]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR %mid1, [[SUB4]](s32) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[C10]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[OR1]], [[LSHR3]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP5]], [[C11]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), %lo1, [[SELECT2]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[C12]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[AND4]](s32), [[LSHR1]], [[C9]] - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C13]] - ; CHECK-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C13]], [[SUB1]] - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB1]](s32), [[C13]] - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C14]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s32), [[OR1]], [[LSHR3]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s32), %lo1, [[SELECT2]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s32), [[LSHR1]], [[C7]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C8]] + ; CHECK-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C8]], [[SUB1]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB1]](s32), [[C8]] + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C9]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL %hi1, [[SUB1]](s32) ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR %hi1, [[SUB7]](s32) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ASHR]], [[SUB1]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL3]] - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL %hi1, [[SUB6]](s32) - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ICMP6]], [[C16]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s32), [[SHL2]], [[C15]] - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ICMP6]], [[C17]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND6]](s32), [[OR2]], [[SHL4]] - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ICMP7]], [[C18]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[AND7]](s32), [[ASHR]], [[SELECT6]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s32), [[SHL2]], [[C10]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s32), [[OR2]], [[SHL4]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s32), [[ASHR]], [[SELECT6]] ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SELECT3]], [[SELECT5]] ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SELECT4]], [[SELECT7]] - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[C19]](s32) - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C20]] - ; CHECK-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C20]], [[SUB]] - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB]](s32), [[C20]] - ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB]](s32), [[C21]] + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[C11]](s32) + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C12]] + ; CHECK-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C12]], [[SUB]] + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB]](s32), [[C12]] + ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB]](s32), [[C13]] ; CHECK-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[SUB]](s32) ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR %hi1, [[SUB]](s32) ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ASHR]], [[SUB9]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL5]] - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[C22]](s32) + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[C14]](s32) ; CHECK-NEXT: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[SUB8]](s32) - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ICMP8]], [[C23]] - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[AND8]](s32), [[OR5]], [[ASHR5]] - ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ICMP9]], [[C24]] - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s32) = G_SELECT [[AND9]](s32), %hi1, [[SELECT8]] - ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ICMP8]], [[C25]] - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s32) = G_SELECT [[AND10]](s32), [[ASHR3]], [[ASHR4]] - ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C26]] - ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:_(s32) = G_SELECT [[AND11]](s32), [[OR3]], [[SELECT9]] - ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C27]] - ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:_(s32) = G_SELECT [[AND12]](s32), [[OR4]], [[SELECT10]] - ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C28]] - ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:_(s32) = G_SELECT [[AND13]](s32), %lo1, [[SELECT11]] - ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C29]] - ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:_(s32) = G_SELECT [[AND14]](s32), %mid1, [[SELECT12]] - ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C30]] - ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:_(s32) = G_SELECT [[AND15]](s32), [[SELECT1]], [[ASHR2]] + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[ICMP8]](s32), [[OR5]], [[ASHR5]] + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s32) = G_SELECT [[ICMP9]](s32), %hi1, [[SELECT8]] + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s32) = G_SELECT [[ICMP8]](s32), [[ASHR3]], [[ASHR4]] + ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR3]], [[SELECT9]] + ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR4]], [[SELECT10]] + ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %lo1, [[SELECT11]] + ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %mid1, [[SELECT12]] + ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[SELECT1]], [[ASHR2]] ; CHECK-NEXT: $x10 = COPY [[SELECT13]](s32) ; CHECK-NEXT: $x11 = COPY [[SELECT14]](s32) ; CHECK-NEXT: $x12 = COPY [[SELECT15]](s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-bswap.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-bswap.mir index f01397078e4095..9667b85a603db2 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-bswap.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-bswap.mir @@ -13,14 +13,11 @@ body: | ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ASSERT_ZEXT]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASSERT_ZEXT]], [[C2]] - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ASSERT_ZEXT]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; CHECK-NEXT: $x10 = COPY [[AND1]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C1]] + ; CHECK-NEXT: $x10 = COPY [[AND]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s32) = COPY $x10 %1:_(s32) = G_ASSERT_ZEXT %0, 16 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-icmp.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-icmp.mir index 62a77ea069ad54..894a6a0aa954ab 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-icmp.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-icmp.mir @@ -115,9 +115,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[ASHR]](s32), [[ASHR1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ASHR]](s32), [[ASHR1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), %xhi(s32), %yhi - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -146,9 +144,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), %xhi(s32), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -277,9 +273,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[ASHR]](s32), [[ASHR1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ASHR]](s32), [[ASHR1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %xhi(s32), %yhi - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -308,9 +302,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %xhi(s32), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -439,9 +431,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sge), [[ASHR]](s32), [[ASHR1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ASHR]](s32), [[ASHR1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), %xhi(s32), %yhi - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -470,9 +460,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sge), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), %xhi(s32), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -601,9 +589,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sle), [[ASHR]](s32), [[ASHR1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ASHR]](s32), [[ASHR1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), %xhi(s32), %yhi - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -632,9 +618,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sle), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), %xhi(s32), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -759,9 +743,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND1]](s32), [[AND3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[AND1]](s32), [[AND3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[AND2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND4]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -790,9 +772,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), %xhi(s32), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -917,9 +897,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND1]](s32), [[AND3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[AND1]](s32), [[AND3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND4]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -948,9 +926,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sge), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), %xhi(s32), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -1075,9 +1051,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[AND1]](s32), [[AND3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[AND1]](s32), [[AND3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[AND]](s32), [[AND2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND4]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -1106,9 +1080,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), %xhi(s32), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -1233,9 +1205,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[AND1]](s32), [[AND3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[AND1]](s32), [[AND3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[AND]](s32), [[AND2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND4]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -1264,9 +1234,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), %xhi(s32), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-lshr.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-lshr.mir index da6fb7802bd420..604ae2a4b7928f 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-lshr.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-lshr.mir @@ -114,15 +114,9 @@ body: | ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL]] ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[SUB]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C5]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[OR]], [[LSHR2]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[AND]], [[SELECT]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C7]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND4]](s32), [[LSHR]], [[C4]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR]], [[LSHR2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[AND]], [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[LSHR]], [[C4]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -162,15 +156,9 @@ body: | ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR %hi1, [[SUB]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[OR]], [[LSHR2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), %lo1, [[SELECT]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C5]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[LSHR]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR]], [[LSHR2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %lo1, [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[LSHR]], [[C2]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -213,93 +201,61 @@ body: | ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[SUB3]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[C]], [[SUB2]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C5]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[OR]], [[LSHR1]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP3]], [[C6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), %hi1, [[SELECT]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB %lo2, [[C7]] - ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C7]], %lo2 - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %lo2(s32), [[C7]] - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %lo2(s32), [[C8]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s32), [[OR]], [[LSHR1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s32), %hi1, [[SELECT]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB %lo2, [[C5]] + ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C5]], %lo2 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %lo2(s32), [[C5]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %lo2(s32), [[C6]] ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR %mid1, %lo2(s32) ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR %lo1, %lo2(s32) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL %mid1, [[SUB5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL1]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR %mid1, [[SUB4]](s32) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[C10]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[OR1]], [[LSHR4]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP5]], [[C11]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), %lo1, [[SELECT2]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[C12]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[AND4]](s32), [[LSHR2]], [[C9]] - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C13]] - ; CHECK-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C13]], [[SUB1]] - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB1]](s32), [[C13]] - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C14]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s32), [[OR1]], [[LSHR4]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s32), %lo1, [[SELECT2]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s32), [[LSHR2]], [[C7]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C8]] + ; CHECK-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C8]], [[SUB1]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB1]](s32), [[C8]] + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C9]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL %hi1, [[SUB1]](s32) ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR %hi1, [[SUB7]](s32) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[SUB1]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL3]] - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL %hi1, [[SUB6]](s32) - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ICMP6]], [[C16]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s32), [[SHL2]], [[C15]] - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ICMP6]], [[C17]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND6]](s32), [[OR2]], [[SHL4]] - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ICMP7]], [[C18]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[AND7]](s32), [[C]], [[SELECT6]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s32), [[SHL2]], [[C10]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s32), [[OR2]], [[SHL4]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s32), [[C]], [[SELECT6]] ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SELECT3]], [[SELECT5]] ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SELECT4]], [[SELECT7]] - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C20]] - ; CHECK-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C20]], [[SUB]] - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB]](s32), [[C20]] - ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB]](s32), [[C21]] + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C12]] + ; CHECK-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C12]], [[SUB]] + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB]](s32), [[C12]] + ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB]](s32), [[C13]] ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[C]], [[SUB]](s32) ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR %hi1, [[SUB]](s32) ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C]], [[SUB9]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR7]], [[SHL5]] - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[C]], [[SUB8]](s32) - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ICMP8]], [[C23]] - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[AND8]](s32), [[OR5]], [[LSHR8]] - ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ICMP9]], [[C24]] - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s32) = G_SELECT [[AND9]](s32), %hi1, [[SELECT8]] - ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ICMP8]], [[C25]] - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s32) = G_SELECT [[AND10]](s32), [[LSHR6]], [[C22]] - ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C26]] - ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:_(s32) = G_SELECT [[AND11]](s32), [[OR3]], [[SELECT9]] - ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C27]] - ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:_(s32) = G_SELECT [[AND12]](s32), [[OR4]], [[SELECT10]] - ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C28]] - ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:_(s32) = G_SELECT [[AND13]](s32), %lo1, [[SELECT11]] - ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C29]] - ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:_(s32) = G_SELECT [[AND14]](s32), %mid1, [[SELECT12]] - ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C30]] - ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:_(s32) = G_SELECT [[AND15]](s32), [[SELECT1]], [[C19]] + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[ICMP8]](s32), [[OR5]], [[LSHR8]] + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s32) = G_SELECT [[ICMP9]](s32), %hi1, [[SELECT8]] + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s32) = G_SELECT [[ICMP8]](s32), [[LSHR6]], [[C14]] + ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR3]], [[SELECT9]] + ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR4]], [[SELECT10]] + ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %lo1, [[SELECT11]] + ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %mid1, [[SELECT12]] + ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[SELECT1]], [[C11]] ; CHECK-NEXT: $x10 = COPY [[SELECT13]](s32) ; CHECK-NEXT: $x11 = COPY [[SELECT14]](s32) ; CHECK-NEXT: $x12 = COPY [[SELECT15]](s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-mul-ext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-mul-ext.mir index 04e99d7407f050..433d6e6b821f32 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-mul-ext.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-mul-ext.mir @@ -162,13 +162,9 @@ body: | ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH %lo1, %lo2 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ICMP]], [[ICMP1]] ; CHECK-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL %hi1, %lo2 ; CHECK-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL %mid1, %mid2 ; CHECK-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL %lo1, %hi2 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-shl.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-shl.mir index bc18361555c5fa..ce00388c61dbe1 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-shl.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-shl.mir @@ -104,15 +104,9 @@ body: | ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL %xlo, [[SUB]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[SHL]], [[C2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[OR]], [[SHL2]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C5]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), %xhi, [[SELECT1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[SHL]], [[C2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR]], [[SHL2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %xhi, [[SELECT1]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -152,15 +146,9 @@ body: | ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL %lo1, [[SUB]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[SHL]], [[C2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[OR]], [[SHL2]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C5]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), %hi1, [[SELECT1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[SHL]], [[C2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR]], [[SHL2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %hi1, [[SELECT1]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -204,60 +192,38 @@ body: | ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL %lo1, [[SUB2]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C5]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[SHL]], [[C4]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[OR]], [[SHL2]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP3]], [[C7]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), %mid1, [[SELECT1]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C8]] - ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C8]], [[SUB1]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB1]](s32), [[C8]] - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C9]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s32), [[SHL]], [[C4]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s32), [[OR]], [[SHL2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s32), %mid1, [[SELECT1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C5]] + ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C5]], [[SUB1]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB1]](s32), [[C5]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C6]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR %lo1, [[SUB1]](s32) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL %mid1, [[SUB5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL3]] ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR %mid1, [[SUB4]](s32) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[C10]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[OR1]], [[LSHR2]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP5]], [[C11]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[AND4]](s32), %lo1, [[SELECT3]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %lo2(s32), [[C12]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s32), [[OR1]], [[LSHR2]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s32), %lo1, [[SELECT3]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %lo2(s32), [[C7]] ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL %hi1, %lo2(s32) - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ICMP6]], [[C14]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s32), [[SHL4]], [[C13]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s32), [[SHL4]], [[C8]] ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SELECT4]], [[SELECT5]] - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB]](s32), [[C17]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB]](s32), [[C11]] ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL %lo1, [[SUB]](s32) - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ICMP7]], [[C19]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND6]](s32), [[SHL5]], [[C18]] - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C20]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[AND7]](s32), [[SELECT]], [[C15]] - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C21]] - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[AND8]](s32), [[SELECT2]], [[C16]] - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C22]] - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s32) = G_SELECT [[AND9]](s32), [[OR2]], [[SELECT6]] - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C23]] - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s32) = G_SELECT [[AND10]](s32), %hi1, [[SELECT9]] + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s32), [[SHL5]], [[C12]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[SELECT]], [[C9]] + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[SELECT2]], [[C10]] + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR2]], [[SELECT6]] + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %hi1, [[SELECT9]] ; CHECK-NEXT: $x10 = COPY [[SELECT7]](s32) ; CHECK-NEXT: $x11 = COPY [[SELECT8]](s32) ; CHECK-NEXT: $x12 = COPY [[SELECT10]](s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-smax.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-smax.mir index 31df394c4f754f..4b1a5293f6282a 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-smax.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-smax.mir @@ -13,12 +13,10 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C3]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C3]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C2]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C2]](s32) ; CHECK-NEXT: $x10 = COPY [[ASHR]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s32) = COPY $x10 @@ -43,12 +41,10 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C3]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C3]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C2]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C2]](s32) ; CHECK-NEXT: $x10 = COPY [[ASHR]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s32) = COPY $x10 @@ -69,9 +65,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s32) = COPY $x10 @@ -93,12 +87,8 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY1]](s32), [[COPY3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C1]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[SELECT]](s32), [[COPY]], [[COPY2]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s32) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-smin.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-smin.mir index 2b589e6bb63c14..dac2be4119c141 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-smin.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-smin.mir @@ -13,12 +13,10 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C3]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C3]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C2]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C2]](s32) ; CHECK-NEXT: $x10 = COPY [[ASHR]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s32) = COPY $x10 @@ -43,12 +41,10 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C3]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C3]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C2]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C2]](s32) ; CHECK-NEXT: $x10 = COPY [[ASHR]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s32) = COPY $x10 @@ -69,9 +65,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s32) = COPY $x10 @@ -93,12 +87,8 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY1]](s32), [[COPY3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C1]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[SELECT]](s32), [[COPY]], [[COPY2]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s32) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-store.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-store.mir index fb3367fcd30473..2ece5a8c9d4142 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-store.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-store.mir @@ -232,11 +232,9 @@ body: | ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store (s8)) ; CHECK-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p0) :: (store (s8) into unknown-address + 1) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C6]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C5]](s32) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C7]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[C5]](s32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C6]](s32) ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into unknown-address + 2) ; CHECK-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p0) :: (store (s8) into unknown-address + 3) ; CHECK-NEXT: PseudoRET diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-sub.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-sub.mir index bb3a68fa01142f..258d02646186cd 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-sub.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-sub.mir @@ -90,9 +90,7 @@ body: | ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB %xlo, %ylo ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %xlo(s32), %ylo ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB %xhi, %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SUB]](s32) ; CHECK-NEXT: $x11 = COPY [[SUB2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -124,9 +122,7 @@ body: | ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB %lo1, %lo2 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %lo1(s32), %lo2 ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB %hi1, %hi2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SUB]](s32) ; CHECK-NEXT: $x11 = COPY [[SUB2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -158,17 +154,13 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %lo1(s32), %lo2 ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB %mid1, %mid2 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[SUB1]](s32), %mid1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND1]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[ICMP]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND]] ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB %hi1, %hi2 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C2]] - ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB3]], [[AND2]] + ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB3]], [[OR]] ; CHECK-NEXT: $x10 = COPY [[SUB]](s32) ; CHECK-NEXT: $x11 = COPY [[SUB2]](s32) ; CHECK-NEXT: $x12 = COPY [[SUB4]](s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-umax.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-umax.mir index 8dea2cb875073f..a05dd43ce0aa3d 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-umax.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-umax.mir @@ -13,12 +13,10 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; CHECK-NEXT: $x10 = COPY [[AND3]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; CHECK-NEXT: $x10 = COPY [[AND2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s32) = COPY $x10 %1:_(s32) = COPY $x11 @@ -42,12 +40,10 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; CHECK-NEXT: $x10 = COPY [[AND3]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; CHECK-NEXT: $x10 = COPY [[AND2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s32) = COPY $x10 %1:_(s32) = COPY $x11 @@ -67,9 +63,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s32) = COPY $x10 @@ -91,12 +85,8 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY1]](s32), [[COPY3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C1]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[SELECT]](s32), [[COPY]], [[COPY2]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s32) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-umin.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-umin.mir index cd180a2a5b329a..d9282d84455bcf 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-umin.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-umin.mir @@ -13,12 +13,10 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; CHECK-NEXT: $x10 = COPY [[AND3]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; CHECK-NEXT: $x10 = COPY [[AND2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s32) = COPY $x10 %1:_(s32) = COPY $x11 @@ -42,12 +40,10 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; CHECK-NEXT: $x10 = COPY [[AND3]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; CHECK-NEXT: $x10 = COPY [[AND2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s32) = COPY $x10 %1:_(s32) = COPY $x11 @@ -67,9 +63,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s32) = COPY $x10 @@ -91,12 +85,8 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY1]](s32), [[COPY3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C1]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[SELECT]](s32), [[COPY]], [[COPY2]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s32) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-abs.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-abs.mir index d7b694b9dfdb82..ee92bb5197834c 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-abs.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-abs.mir @@ -16,8 +16,8 @@ body: | ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[ASSERT_ZEXT]](s64) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[TRUNC1]], [[ASHR1]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[XOR]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] ; CHECK-NEXT: $x10 = COPY [[AND]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-add.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-add.mir index 11ac3c5991376a..f394e4d5064edc 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-add.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-add.mir @@ -122,9 +122,7 @@ body: | ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD %x00, %y00 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD]](s64), %y00 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD %x01, %y01 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ADD1]], [[AND]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ADD1]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[ADD]](s64) ; CHECK-NEXT: $x11 = COPY [[ADD2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -156,9 +154,7 @@ body: | ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD %lo1, %lo2 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD]](s64), %lo2 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD %hi1, %hi2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ADD1]], [[AND]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ADD1]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[ADD]](s64) ; CHECK-NEXT: $x11 = COPY [[ADD2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -190,21 +186,19 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD]](s64), %lo2 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD %mid1, %mid2 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD1]](s64), %mid1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ADD1]], [[AND]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ADD2]](s64), [[C1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ADD1]], [[ICMP]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ADD2]](s64), [[C]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP2]](s64) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP]](s64) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP1]](s64) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC2]], [[AND1]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC2]], [[AND]] ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s64) = G_ADD %hi1, %hi2 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s64) = G_ADD [[ADD3]], [[AND2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s64) = G_ADD [[ADD3]], [[AND1]] ; CHECK-NEXT: $x10 = COPY [[ADD]](s64) ; CHECK-NEXT: $x11 = COPY [[ADD2]](s64) ; CHECK-NEXT: $x12 = COPY [[ADD4]](s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-addo-subo.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-addo-subo.mir index 6600f7f30d7296..5506f5228e9dbd 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-addo-subo.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-addo-subo.mir @@ -288,8 +288,8 @@ body: | ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ADD]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] @@ -324,8 +324,8 @@ body: | ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ADD]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-ashr.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-ashr.mir index a422c42e77684d..1a2233cb799078 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-ashr.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-ashr.mir @@ -8,8 +8,8 @@ body: | ; CHECK-LABEL: name: ashr_i8 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 @@ -36,8 +36,8 @@ body: | ; CHECK-LABEL: name: ashr_i15 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 @@ -64,8 +64,8 @@ body: | ; CHECK-LABEL: name: ashr_i16 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -149,15 +149,9 @@ body: | ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C3]](s64) ; CHECK-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[SUB]](s64) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[OR]], [[ASHR3]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C5]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s64), %x00, [[SELECT]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C6]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[ASHR1]], [[ASHR2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR]], [[ASHR3]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %x00, [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[ASHR1]], [[ASHR2]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s64) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -198,15 +192,9 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR %hi1, [[C2]](s64) ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR %hi1, [[SUB]](s64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[OR]], [[ASHR2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s64), %lo1, [[SELECT]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C5]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[ASHR]], [[ASHR1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR]], [[ASHR2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %lo1, [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[ASHR]], [[ASHR1]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s64) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -250,95 +238,63 @@ body: | ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ASHR]], [[SUB3]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[SUB2]](s64) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP2]], [[C5]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[OR]], [[ASHR1]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP3]], [[C6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s64), %hi1, [[SELECT]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB %lo2, [[C7]] - ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s64) = G_SUB [[C7]], %lo2 - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %lo2(s64), [[C7]] - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %lo2(s64), [[C8]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s64), [[OR]], [[ASHR1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s64), %hi1, [[SELECT]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB %lo2, [[C5]] + ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s64) = G_SUB [[C5]], %lo2 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %lo2(s64), [[C5]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %lo2(s64), [[C6]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR %mid1, %lo2(s64) ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR %lo1, %lo2(s64) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL %mid1, [[SUB5]](s64) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL1]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR %mid1, [[SUB4]](s64) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP4]], [[C10]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[OR1]], [[LSHR3]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ICMP5]], [[C11]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[AND3]](s64), %lo1, [[SELECT2]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ICMP4]], [[C12]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s64), [[LSHR1]], [[C9]] - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[SUB6:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[C13]] - ; CHECK-NEXT: [[SUB7:%[0-9]+]]:_(s64) = G_SUB [[C13]], [[SUB1]] - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB1]](s64), [[C13]] - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB1]](s64), [[C14]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s64), [[OR1]], [[LSHR3]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s64), %lo1, [[SELECT2]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s64), [[LSHR1]], [[C7]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[SUB6:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[C8]] + ; CHECK-NEXT: [[SUB7:%[0-9]+]]:_(s64) = G_SUB [[C8]], [[SUB1]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB1]](s64), [[C8]] + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB1]](s64), [[C9]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL %hi1, [[SUB1]](s64) ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR %hi1, [[SUB7]](s64) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[ASHR]], [[SUB1]](s64) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL3]] - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL %hi1, [[SUB6]](s64) - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ICMP6]], [[C16]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[AND5]](s64), [[SHL2]], [[C15]] - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[ICMP6]], [[C17]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[AND6]](s64), [[OR2]], [[SHL4]] - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s64) = G_AND [[ICMP7]], [[C18]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[AND7]](s64), [[ASHR]], [[SELECT6]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s64), [[SHL2]], [[C10]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s64), [[OR2]], [[SHL4]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s64), [[ASHR]], [[SELECT6]] ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT3]], [[SELECT5]] ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT7]] - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C19]](s64) - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[SUB8:%[0-9]+]]:_(s64) = G_SUB [[SUB]], [[C20]] - ; CHECK-NEXT: [[SUB9:%[0-9]+]]:_(s64) = G_SUB [[C20]], [[SUB]] - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB]](s64), [[C20]] - ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB]](s64), [[C21]] + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C11]](s64) + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[SUB8:%[0-9]+]]:_(s64) = G_SUB [[SUB]], [[C12]] + ; CHECK-NEXT: [[SUB9:%[0-9]+]]:_(s64) = G_SUB [[C12]], [[SUB]] + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB]](s64), [[C12]] + ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB]](s64), [[C13]] ; CHECK-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[SUB]](s64) ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR %hi1, [[SUB]](s64) ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ASHR]], [[SUB9]](s64) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL5]] - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; CHECK-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C22]](s64) + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C14]](s64) ; CHECK-NEXT: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[SUB8]](s64) - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s64) = G_AND [[ICMP8]], [[C23]] - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[AND8]](s64), [[OR5]], [[ASHR5]] - ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s64) = G_AND [[ICMP9]], [[C24]] - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[AND9]](s64), %hi1, [[SELECT8]] - ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s64) = G_AND [[ICMP8]], [[C25]] - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[AND10]](s64), [[ASHR3]], [[ASHR4]] - ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C26]] - ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[AND11]](s64), [[OR3]], [[SELECT9]] - ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C27]] - ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[AND12]](s64), [[OR4]], [[SELECT10]] - ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C28]] - ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[AND13]](s64), %lo1, [[SELECT11]] - ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C29]] - ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[AND14]](s64), %mid1, [[SELECT12]] - ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C30]] - ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[AND15]](s64), [[SELECT1]], [[ASHR2]] + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s64), [[OR5]], [[ASHR5]] + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s64), %hi1, [[SELECT8]] + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s64), [[ASHR3]], [[ASHR4]] + ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR3]], [[SELECT9]] + ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR4]], [[SELECT10]] + ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %lo1, [[SELECT11]] + ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %mid1, [[SELECT12]] + ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[SELECT1]], [[ASHR2]] ; CHECK-NEXT: $x10 = COPY [[SELECT13]](s64) ; CHECK-NEXT: $x11 = COPY [[SELECT14]](s64) ; CHECK-NEXT: $x12 = COPY [[SELECT15]](s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-bswap.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-bswap.mir index 07bdb346fd2ae3..b8b5a9f59c00a3 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-bswap.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-bswap.mir @@ -14,16 +14,13 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ASSERT_ZEXT]](s64) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[ASSERT_ZEXT]](s64) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C2]] - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: $x10 = COPY [[AND1]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: $x10 = COPY [[AND]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %1:_(s64) = G_ASSERT_ZEXT %0, 16 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-div.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-div.mir index d3895cb54092e1..63361e903577c5 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-div.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-div.mir @@ -332,11 +332,11 @@ body: | ; CHECK-M-LABEL: name: udiv_i8 ; CHECK-M: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-M-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-M-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-M-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-M-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-M-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] ; CHECK-M-NEXT: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] ; CHECK-M-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UDIV]](s32) @@ -373,11 +373,11 @@ body: | ; CHECK-M-LABEL: name: udiv_i15 ; CHECK-M: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-M-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; CHECK-M-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; CHECK-M-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; CHECK-M-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; CHECK-M-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] ; CHECK-M-NEXT: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] ; CHECK-M-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UDIV]](s32) @@ -414,11 +414,11 @@ body: | ; CHECK-M-LABEL: name: udiv_i16 ; CHECK-M: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-M-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-M-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-M-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-M-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-M-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] ; CHECK-M-NEXT: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] ; CHECK-M-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UDIV]](s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-icmp.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-icmp.mir index 351fdf18bfc95e..de47b1493e4daf 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-icmp.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-icmp.mir @@ -137,9 +137,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sgt), [[ASHR]](s64), [[ASHR1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ASHR]](s64), [[ASHR1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), %xhi(s64), %yhi - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -168,9 +166,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sgt), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), %xhi(s64), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -321,9 +317,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sge), [[ASHR]](s64), [[ASHR1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ASHR]](s64), [[ASHR1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(uge), %xhi(s64), %yhi - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -352,9 +346,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(slt), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %xhi(s64), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -505,9 +497,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sge), [[ASHR]](s64), [[ASHR1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ASHR]](s64), [[ASHR1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(uge), %xhi(s64), %yhi - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -536,9 +526,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sge), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(uge), %xhi(s64), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -689,9 +677,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sle), [[ASHR]](s64), [[ASHR1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ASHR]](s64), [[ASHR1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ule), %xhi(s64), %yhi - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -720,9 +706,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sle), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ule), %xhi(s64), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -871,9 +855,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[AND1]](s64), [[AND3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[AND1]](s64), [[AND3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[AND]](s64), [[AND2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -902,9 +884,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), %xhi(s64), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -1053,9 +1033,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[AND1]](s64), [[AND3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[AND1]](s64), [[AND3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[AND]](s64), [[AND2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -1084,9 +1062,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sge), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(uge), %xhi(s64), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -1235,9 +1211,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(uge), [[AND1]](s64), [[AND3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[AND1]](s64), [[AND3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(uge), [[AND]](s64), [[AND2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -1266,9 +1240,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(uge), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(uge), %xhi(s64), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -1417,9 +1389,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ule), [[AND1]](s64), [[AND3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[AND1]](s64), [[AND3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ule), [[AND]](s64), [[AND2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -1448,9 +1418,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ule), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ule), %xhi(s64), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-lshr.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-lshr.mir index aa34e7d2cdcae9..8cbae0fa0173e2 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-lshr.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-lshr.mir @@ -8,11 +8,11 @@ body: | ; CHECK-LABEL: name: lshr_i8 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR]](s32) @@ -35,11 +35,11 @@ body: | ; CHECK-LABEL: name: lshr_i15 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR]](s32) @@ -62,11 +62,11 @@ body: | ; CHECK-LABEL: name: lshr_i16 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR]](s32) @@ -146,15 +146,9 @@ body: | ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[SUB]](s64) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C5]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[OR]], [[LSHR2]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND3]](s64), [[AND]], [[SELECT]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C7]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s64), [[LSHR]], [[C4]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR]], [[LSHR2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[AND]], [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[LSHR]], [[C4]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s64) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -194,15 +188,9 @@ body: | ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR %hi1, [[SUB]](s64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[OR]], [[LSHR2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s64), %lo1, [[SELECT]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C5]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[LSHR]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR]], [[LSHR2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %lo1, [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[LSHR]], [[C2]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s64) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -245,93 +233,61 @@ body: | ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[C]], [[SUB3]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[C]], [[SUB2]](s64) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP2]], [[C5]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[OR]], [[LSHR1]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP3]], [[C6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s64), %hi1, [[SELECT]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB %lo2, [[C7]] - ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s64) = G_SUB [[C7]], %lo2 - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %lo2(s64), [[C7]] - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %lo2(s64), [[C8]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s64), [[OR]], [[LSHR1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s64), %hi1, [[SELECT]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB %lo2, [[C5]] + ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s64) = G_SUB [[C5]], %lo2 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %lo2(s64), [[C5]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %lo2(s64), [[C6]] ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR %mid1, %lo2(s64) ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR %lo1, %lo2(s64) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL %mid1, [[SUB5]](s64) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR3]], [[SHL1]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR %mid1, [[SUB4]](s64) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP4]], [[C10]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[OR1]], [[LSHR4]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ICMP5]], [[C11]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[AND3]](s64), %lo1, [[SELECT2]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ICMP4]], [[C12]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s64), [[LSHR2]], [[C9]] - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[SUB6:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[C13]] - ; CHECK-NEXT: [[SUB7:%[0-9]+]]:_(s64) = G_SUB [[C13]], [[SUB1]] - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB1]](s64), [[C13]] - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB1]](s64), [[C14]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s64), [[OR1]], [[LSHR4]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s64), %lo1, [[SELECT2]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s64), [[LSHR2]], [[C7]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[SUB6:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[C8]] + ; CHECK-NEXT: [[SUB7:%[0-9]+]]:_(s64) = G_SUB [[C8]], [[SUB1]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB1]](s64), [[C8]] + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB1]](s64), [[C9]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL %hi1, [[SUB1]](s64) ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR %hi1, [[SUB7]](s64) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[C]], [[SUB1]](s64) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL3]] - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL %hi1, [[SUB6]](s64) - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ICMP6]], [[C16]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[AND5]](s64), [[SHL2]], [[C15]] - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[ICMP6]], [[C17]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[AND6]](s64), [[OR2]], [[SHL4]] - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s64) = G_AND [[ICMP7]], [[C18]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[AND7]](s64), [[C]], [[SELECT6]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s64), [[SHL2]], [[C10]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s64), [[OR2]], [[SHL4]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s64), [[C]], [[SELECT6]] ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT3]], [[SELECT5]] ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT7]] - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[SUB8:%[0-9]+]]:_(s64) = G_SUB [[SUB]], [[C20]] - ; CHECK-NEXT: [[SUB9:%[0-9]+]]:_(s64) = G_SUB [[C20]], [[SUB]] - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB]](s64), [[C20]] - ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB]](s64), [[C21]] + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[SUB8:%[0-9]+]]:_(s64) = G_SUB [[SUB]], [[C12]] + ; CHECK-NEXT: [[SUB9:%[0-9]+]]:_(s64) = G_SUB [[C12]], [[SUB]] + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB]](s64), [[C12]] + ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB]](s64), [[C13]] ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s64) = G_LSHR [[C]], [[SUB]](s64) ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR %hi1, [[SUB]](s64) ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[C]], [[SUB9]](s64) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR7]], [[SHL5]] - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s64) = G_LSHR [[C]], [[SUB8]](s64) - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s64) = G_AND [[ICMP8]], [[C23]] - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[AND8]](s64), [[OR5]], [[LSHR8]] - ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s64) = G_AND [[ICMP9]], [[C24]] - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[AND9]](s64), %hi1, [[SELECT8]] - ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s64) = G_AND [[ICMP8]], [[C25]] - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[AND10]](s64), [[LSHR6]], [[C22]] - ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C26]] - ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[AND11]](s64), [[OR3]], [[SELECT9]] - ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C27]] - ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[AND12]](s64), [[OR4]], [[SELECT10]] - ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C28]] - ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[AND13]](s64), %lo1, [[SELECT11]] - ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C29]] - ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[AND14]](s64), %mid1, [[SELECT12]] - ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C30]] - ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[AND15]](s64), [[SELECT1]], [[C19]] + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s64), [[OR5]], [[LSHR8]] + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s64), %hi1, [[SELECT8]] + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s64), [[LSHR6]], [[C14]] + ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR3]], [[SELECT9]] + ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR4]], [[SELECT10]] + ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %lo1, [[SELECT11]] + ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %mid1, [[SELECT12]] + ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[SELECT1]], [[C11]] ; CHECK-NEXT: $x10 = COPY [[SELECT13]](s64) ; CHECK-NEXT: $x11 = COPY [[SELECT14]](s64) ; CHECK-NEXT: $x12 = COPY [[SELECT15]](s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-mul-ext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-mul-ext.mir index 8d8c23e25d172d..09e002e8428d79 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-mul-ext.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-mul-ext.mir @@ -194,13 +194,9 @@ body: | ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH %lo1, %lo2 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[MUL1]], [[MUL2]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD]](s64), [[MUL2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[ADD]], [[UMULH]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD1]](s64), [[UMULH]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[AND]], [[AND1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ICMP]], [[ICMP1]] ; CHECK-NEXT: [[MUL3:%[0-9]+]]:_(s64) = G_MUL %hi1, %lo2 ; CHECK-NEXT: [[MUL4:%[0-9]+]]:_(s64) = G_MUL %mid1, %mid2 ; CHECK-NEXT: [[MUL5:%[0-9]+]]:_(s64) = G_MUL %lo1, %hi2 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-rem.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-rem.mir index e43516ee4acbb4..f076b5988948d1 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-rem.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-rem.mir @@ -332,11 +332,11 @@ body: | ; CHECK-M-LABEL: name: urem_i8 ; CHECK-M: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-M-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-M-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-M-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-M-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-M-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] ; CHECK-M-NEXT: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] ; CHECK-M-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UREM]](s32) @@ -373,11 +373,11 @@ body: | ; CHECK-M-LABEL: name: urem_i15 ; CHECK-M: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-M-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; CHECK-M-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; CHECK-M-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; CHECK-M-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; CHECK-M-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] ; CHECK-M-NEXT: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] ; CHECK-M-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UREM]](s32) @@ -414,11 +414,11 @@ body: | ; CHECK-M-LABEL: name: urem_i16 ; CHECK-M: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-M-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-M-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-M-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-M-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-M-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] ; CHECK-M-NEXT: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] ; CHECK-M-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UREM]](s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-shl.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-shl.mir index 0f9587b4c8b8d6..057cbcb9972ac7 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-shl.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-shl.mir @@ -8,8 +8,8 @@ body: | ; CHECK-LABEL: name: shl_i8 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC1]], [[AND]](s32) @@ -33,8 +33,8 @@ body: | ; CHECK-LABEL: name: shl_i15 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC1]], [[AND]](s32) @@ -58,8 +58,8 @@ body: | ; CHECK-LABEL: name: shl_i16 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC1]], [[AND]](s32) @@ -136,15 +136,9 @@ body: | ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL %x00, [[SUB]](s64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[SHL]], [[C2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s64), [[OR]], [[SHL2]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C5]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), %x01, [[SELECT1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[SHL]], [[C2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR]], [[SHL2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %x01, [[SELECT1]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -184,15 +178,9 @@ body: | ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL %lo1, [[SUB]](s64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[SHL]], [[C2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s64), [[OR]], [[SHL2]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C5]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), %hi1, [[SELECT1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[SHL]], [[C2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR]], [[SHL2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %hi1, [[SELECT1]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -236,60 +224,38 @@ body: | ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL %lo1, [[SUB2]](s64) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP2]], [[C5]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[SHL]], [[C4]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP2]], [[C6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s64), [[OR]], [[SHL2]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP3]], [[C7]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), %mid1, [[SELECT1]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[C8]] - ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s64) = G_SUB [[C8]], [[SUB1]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB1]](s64), [[C8]] - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB1]](s64), [[C9]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s64), [[SHL]], [[C4]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s64), [[OR]], [[SHL2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s64), %mid1, [[SELECT1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[C5]] + ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s64) = G_SUB [[C5]], [[SUB1]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB1]](s64), [[C5]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB1]](s64), [[C6]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR %lo1, [[SUB1]](s64) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL %mid1, [[SUB5]](s64) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL3]] ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR %mid1, [[SUB4]](s64) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ICMP4]], [[C10]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[AND3]](s64), [[OR1]], [[LSHR2]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ICMP5]], [[C11]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s64), %lo1, [[SELECT3]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %lo2(s64), [[C12]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s64), [[OR1]], [[LSHR2]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s64), %lo1, [[SELECT3]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %lo2(s64), [[C7]] ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL %hi1, %lo2(s64) - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ICMP6]], [[C14]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[AND5]](s64), [[SHL4]], [[C13]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s64), [[SHL4]], [[C8]] ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT5]] - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB]](s64), [[C17]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB]](s64), [[C11]] ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL %lo1, [[SUB]](s64) - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[ICMP7]], [[C19]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[AND6]](s64), [[SHL5]], [[C18]] - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C20]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[AND7]](s64), [[SELECT]], [[C15]] - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C21]] - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[AND8]](s64), [[SELECT2]], [[C16]] - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C22]] - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[AND9]](s64), [[OR2]], [[SELECT6]] - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C23]] - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[AND10]](s64), %hi1, [[SELECT9]] + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s64), [[SHL5]], [[C12]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[SELECT]], [[C9]] + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[SELECT2]], [[C10]] + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR2]], [[SELECT6]] + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %hi1, [[SELECT9]] ; CHECK-NEXT: $x10 = COPY [[SELECT7]](s64) ; CHECK-NEXT: $x11 = COPY [[SELECT8]](s64) ; CHECK-NEXT: $x12 = COPY [[SELECT10]](s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-smax.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-smax.mir index 43f4309dc5670c..7a69d1942fafe1 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-smax.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-smax.mir @@ -13,12 +13,10 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[AND]](s64), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SELECT]], [[C3]](s64) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C3]](s64) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SELECT]], [[C2]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C2]](s64) ; CHECK-NEXT: $x10 = COPY [[ASHR]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 @@ -43,12 +41,10 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[AND]](s64), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SELECT]], [[C3]](s64) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C3]](s64) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SELECT]], [[C2]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C2]](s64) ; CHECK-NEXT: $x10 = COPY [[ASHR]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 @@ -73,9 +69,7 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[AND]](s64), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[COPY]], [[COPY1]] ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SELECT]], 32 ; CHECK-NEXT: $x10 = COPY [[SEXT_INREG]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 @@ -97,9 +91,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-smin.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-smin.mir index 85fea46b4bc46f..0f86b8960d4d62 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-smin.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-smin.mir @@ -13,12 +13,10 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[AND]](s64), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SELECT]], [[C3]](s64) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C3]](s64) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SELECT]], [[C2]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C2]](s64) ; CHECK-NEXT: $x10 = COPY [[ASHR]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 @@ -43,12 +41,10 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[AND]](s64), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SELECT]], [[C3]](s64) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C3]](s64) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SELECT]], [[C2]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C2]](s64) ; CHECK-NEXT: $x10 = COPY [[ASHR]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 @@ -73,9 +69,7 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[AND]](s64), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[COPY]], [[COPY1]] ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SELECT]], 32 ; CHECK-NEXT: $x10 = COPY [[SEXT_INREG]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 @@ -97,9 +91,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-store.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-store.mir index fb4ead8f8d711a..39821784687330 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-store.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-store.mir @@ -261,11 +261,9 @@ body: | ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store (s8)) ; CHECK-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p0) :: (store (s8) into unknown-address + 1) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C6]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C5]](s32) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C7]](s64) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[C5]](s32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into unknown-address + 2) ; CHECK-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p0) :: (store (s8) into unknown-address + 3) ; CHECK-NEXT: PseudoRET diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-sub.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-sub.mir index 20bf8929c55529..c2504273c2af67 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-sub.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-sub.mir @@ -122,9 +122,7 @@ body: | ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB %x00, %y00 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %x00(s64), %y00 ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s64) = G_SUB %x01, %y01 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[AND]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SUB]](s64) ; CHECK-NEXT: $x11 = COPY [[SUB2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -156,9 +154,7 @@ body: | ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB %lo1, %lo2 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %lo1(s64), %lo2 ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s64) = G_SUB %hi1, %hi2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[AND]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SUB]](s64) ; CHECK-NEXT: $x11 = COPY [[SUB2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -190,21 +186,19 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %lo1(s64), %lo2 ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s64) = G_SUB %mid1, %mid2 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[SUB1]](s64), %mid1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[AND]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB1]](s64), [[C1]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[ICMP]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB1]](s64), [[C]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP2]](s64) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP]](s64) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP1]](s64) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC2]], [[AND1]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC2]], [[AND]] ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(s64) = G_SUB %hi1, %hi2 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB [[SUB3]], [[AND2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB [[SUB3]], [[AND1]] ; CHECK-NEXT: $x10 = COPY [[SUB]](s64) ; CHECK-NEXT: $x11 = COPY [[SUB2]](s64) ; CHECK-NEXT: $x12 = COPY [[SUB4]](s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-umax.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-umax.mir index d0310e3e21ec93..e07aa9978fb459 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-umax.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-umax.mir @@ -13,12 +13,10 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[AND]](s64), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C3]] - ; CHECK-NEXT: $x10 = COPY [[AND3]](s64) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C2]] + ; CHECK-NEXT: $x10 = COPY [[AND2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 @@ -42,12 +40,10 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[AND]](s64), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C3]] - ; CHECK-NEXT: $x10 = COPY [[AND3]](s64) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C2]] + ; CHECK-NEXT: $x10 = COPY [[AND2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 @@ -71,12 +67,10 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[AND]](s64), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C3]] - ; CHECK-NEXT: $x10 = COPY [[AND3]](s64) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C2]] + ; CHECK-NEXT: $x10 = COPY [[AND2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 @@ -96,9 +90,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-umin.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-umin.mir index a0eec3298a5869..23c6a26bec57a0 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-umin.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-umin.mir @@ -13,12 +13,10 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[AND]](s64), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C3]] - ; CHECK-NEXT: $x10 = COPY [[AND3]](s64) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C2]] + ; CHECK-NEXT: $x10 = COPY [[AND2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 @@ -42,12 +40,10 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[AND]](s64), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C3]] - ; CHECK-NEXT: $x10 = COPY [[AND3]](s64) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C2]] + ; CHECK-NEXT: $x10 = COPY [[AND2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 @@ -71,12 +67,10 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[AND]](s64), [[AND1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C3]] - ; CHECK-NEXT: $x10 = COPY [[AND3]](s64) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C2]] + ; CHECK-NEXT: $x10 = COPY [[AND2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 @@ -96,9 +90,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll index f2228e9013ce9f..a962a72d703733 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll @@ -184,14 +184,14 @@ define i8 @udiv8_constant(i8 %a) nounwind { define i8 @udiv8_pow2(i8 %a) nounwind { ; RV64I-LABEL: udiv8_pow2: ; RV64I: # %bb.0: -; RV64I-NEXT: andi a0, a0, 248 -; RV64I-NEXT: srliw a0, a0, 3 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srli a0, a0, 59 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: udiv8_pow2: ; RV64IM: # %bb.0: -; RV64IM-NEXT: andi a0, a0, 248 -; RV64IM-NEXT: srliw a0, a0, 3 +; RV64IM-NEXT: slli a0, a0, 56 +; RV64IM-NEXT: srli a0, a0, 59 ; RV64IM-NEXT: ret %1 = udiv i8 %a, 8 ret i8 %1 @@ -260,11 +260,10 @@ define i16 @udiv16_constant(i16 %a) nounwind { ; ; RV64IM-LABEL: udiv16_constant: ; RV64IM: # %bb.0: +; RV64IM-NEXT: lui a1, 52429 +; RV64IM-NEXT: slli a1, a1, 4 ; RV64IM-NEXT: slli a0, a0, 48 -; RV64IM-NEXT: srli a0, a0, 48 -; RV64IM-NEXT: lui a1, 13 -; RV64IM-NEXT: addi a1, a1, -819 -; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: mulhu a0, a0, a1 ; RV64IM-NEXT: srliw a0, a0, 18 ; RV64IM-NEXT: ret %1 = udiv i16 %a, 5 @@ -275,15 +274,13 @@ define i16 @udiv16_pow2(i16 %a) nounwind { ; RV64I-LABEL: udiv16_pow2: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 48 -; RV64I-NEXT: srli a0, a0, 48 -; RV64I-NEXT: srliw a0, a0, 3 +; RV64I-NEXT: srli a0, a0, 51 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: udiv16_pow2: ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a0, a0, 48 -; RV64IM-NEXT: srli a0, a0, 48 -; RV64IM-NEXT: srliw a0, a0, 3 +; RV64IM-NEXT: srli a0, a0, 51 ; RV64IM-NEXT: ret %1 = udiv i16 %a, 8 ret i16 %1 @@ -304,8 +301,9 @@ define i16 @udiv16_constant_lhs(i16 %a) nounwind { ; ; RV64IM-LABEL: udiv16_constant_lhs: ; RV64IM: # %bb.0: -; RV64IM-NEXT: slli a0, a0, 48 -; RV64IM-NEXT: srli a0, a0, 48 +; RV64IM-NEXT: lui a1, 16 +; RV64IM-NEXT: addi a1, a1, -1 +; RV64IM-NEXT: and a0, a0, a1 ; RV64IM-NEXT: li a1, 10 ; RV64IM-NEXT: divuw a0, a1, a0 ; RV64IM-NEXT: ret @@ -536,8 +534,8 @@ define i8 @sdiv8_constant(i8 %a) nounwind { ; RV64IM-NEXT: li a1, 103 ; RV64IM-NEXT: mul a0, a0, a1 ; RV64IM-NEXT: sraiw a1, a0, 9 -; RV64IM-NEXT: srliw a0, a0, 15 -; RV64IM-NEXT: andi a0, a0, 1 +; RV64IM-NEXT: slli a0, a0, 48 +; RV64IM-NEXT: srli a0, a0, 63 ; RV64IM-NEXT: addw a0, a1, a0 ; RV64IM-NEXT: ret %1 = sdiv i8 %a, 5 @@ -549,8 +547,8 @@ define i8 @sdiv8_pow2(i8 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 24 ; RV64I-NEXT: sraiw a1, a1, 24 -; RV64I-NEXT: srliw a1, a1, 12 -; RV64I-NEXT: andi a1, a1, 7 +; RV64I-NEXT: slli a1, a1, 49 +; RV64I-NEXT: srli a1, a1, 61 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: slli a0, a0, 24 ; RV64I-NEXT: sraiw a0, a0, 27 @@ -560,8 +558,8 @@ define i8 @sdiv8_pow2(i8 %a) nounwind { ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a1, a0, 24 ; RV64IM-NEXT: sraiw a1, a1, 24 -; RV64IM-NEXT: srliw a1, a1, 12 -; RV64IM-NEXT: andi a1, a1, 7 +; RV64IM-NEXT: slli a1, a1, 49 +; RV64IM-NEXT: srli a1, a1, 61 ; RV64IM-NEXT: add a0, a0, a1 ; RV64IM-NEXT: slli a0, a0, 24 ; RV64IM-NEXT: sraiw a0, a0, 27 @@ -653,8 +651,8 @@ define i16 @sdiv16_pow2(i16 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 16 ; RV64I-NEXT: sraiw a1, a1, 16 -; RV64I-NEXT: srliw a1, a1, 28 -; RV64I-NEXT: andi a1, a1, 7 +; RV64I-NEXT: slli a1, a1, 33 +; RV64I-NEXT: srli a1, a1, 61 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: slli a0, a0, 16 ; RV64I-NEXT: sraiw a0, a0, 19 @@ -664,8 +662,8 @@ define i16 @sdiv16_pow2(i16 %a) nounwind { ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a1, a0, 16 ; RV64IM-NEXT: sraiw a1, a1, 16 -; RV64IM-NEXT: srliw a1, a1, 28 -; RV64IM-NEXT: andi a1, a1, 7 +; RV64IM-NEXT: slli a1, a1, 33 +; RV64IM-NEXT: srli a1, a1, 61 ; RV64IM-NEXT: add a0, a0, a1 ; RV64IM-NEXT: slli a0, a0, 16 ; RV64IM-NEXT: sraiw a0, a0, 19 diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rem.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rem.ll index 11adbbdd245f1d..292c1d8087c536 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rem.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rem.ll @@ -330,8 +330,9 @@ define i16 @urem16_constant_lhs(i16 %a) nounwind { ; ; RV64IM-LABEL: urem16_constant_lhs: ; RV64IM: # %bb.0: -; RV64IM-NEXT: slli a0, a0, 48 -; RV64IM-NEXT: srli a0, a0, 48 +; RV64IM-NEXT: lui a1, 16 +; RV64IM-NEXT: addi a1, a1, -1 +; RV64IM-NEXT: and a0, a0, a1 ; RV64IM-NEXT: li a1, 10 ; RV64IM-NEXT: remuw a0, a1, a0 ; RV64IM-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll index bb2f2b73d4a0c7..ef505afc2d1d26 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll @@ -238,10 +238,10 @@ define signext i32 @findLastSet_i32(i32 signext %a) nounwind { ; RV64XTHEADBB: # %bb.0: ; RV64XTHEADBB-NEXT: th.extu a1, a0, 31, 0 ; RV64XTHEADBB-NEXT: th.ff1 a1, a1 -; RV64XTHEADBB-NEXT: addi a1, a1, -32 +; RV64XTHEADBB-NEXT: addiw a1, a1, -32 ; RV64XTHEADBB-NEXT: xori a1, a1, 31 ; RV64XTHEADBB-NEXT: snez a0, a0 -; RV64XTHEADBB-NEXT: addi a0, a0, -1 +; RV64XTHEADBB-NEXT: addiw a0, a0, -1 ; RV64XTHEADBB-NEXT: or a0, a0, a1 ; RV64XTHEADBB-NEXT: ret %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 true) @@ -749,8 +749,9 @@ define i64 @no_sexth_i64(i64 %a) nounwind { define i32 @zexth_i32(i32 %a) nounwind { ; RV64I-LABEL: zexth_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 48 -; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64XTHEADBB-LABEL: zexth_i32: diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-zbkb.ll index e6e9829c16f22b..39a5b9b0f3676c 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-zbkb.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-zbkb.ll @@ -71,16 +71,11 @@ define i64 @orn_i64(i64 %a, i64 %b) nounwind { } define signext i32 @xnor_i32(i32 signext %a, i32 signext %b) nounwind { -; RV64I-LABEL: xnor_i32: -; RV64I: # %bb.0: -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: ret -; -; RV64ZBB-ZBKB-LABEL: xnor_i32: -; RV64ZBB-ZBKB: # %bb.0: -; RV64ZBB-ZBKB-NEXT: xnor a0, a0, a1 -; RV64ZBB-ZBKB-NEXT: ret +; CHECK-LABEL: xnor_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: not a0, a0 +; CHECK-NEXT: ret %neg = xor i32 %a, -1 %xor = xor i32 %neg, %b ret i32 %xor @@ -446,8 +441,8 @@ define i64 @not_shl_one_i64(i64 %x) { define i8 @srli_i8(i8 %a) nounwind { ; CHECK-LABEL: srli_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 192 -; CHECK-NEXT: srliw a0, a0, 6 +; CHECK-NEXT: slli a0, a0, 56 +; CHECK-NEXT: srli a0, a0, 62 ; CHECK-NEXT: ret %1 = lshr i8 %a, 6 ret i8 %1 @@ -480,25 +475,11 @@ define i8 @srai_i8(i8 %a) nounwind { ; We could use zext.h+srli, but slli+srli offers more opportunities for ; comppressed instructions. define i16 @srli_i16(i16 %a) nounwind { -; RV64I-LABEL: srli_i16: -; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 48 -; RV64I-NEXT: srli a0, a0, 48 -; RV64I-NEXT: srliw a0, a0, 6 -; RV64I-NEXT: ret -; -; RV64ZBB-LABEL: srli_i16: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: zext.h a0, a0 -; RV64ZBB-NEXT: srliw a0, a0, 6 -; RV64ZBB-NEXT: ret -; -; RV64ZBKB-LABEL: srli_i16: -; RV64ZBKB: # %bb.0: -; RV64ZBKB-NEXT: slli a0, a0, 48 -; RV64ZBKB-NEXT: srli a0, a0, 48 -; RV64ZBKB-NEXT: srliw a0, a0, 6 -; RV64ZBKB-NEXT: ret +; CHECK-LABEL: srli_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 48 +; CHECK-NEXT: srli a0, a0, 54 +; CHECK-NEXT: ret %1 = lshr i16 %a, 6 ret i16 %1 } diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll index acc175186b8586..695ddd6d308ecb 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll @@ -944,13 +944,16 @@ define i64 @abs_i64(i64 %x) { define i32 @zexth_i32(i32 %a) nounwind { ; RV64I-LABEL: zexth_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 48 -; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: zexth_i32: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: zext.h a0, a0 +; RV64ZBB-NEXT: lui a1, 16 +; RV64ZBB-NEXT: addiw a1, a1, -1 +; RV64ZBB-NEXT: and a0, a0, a1 ; RV64ZBB-NEXT: ret %and = and i32 %a, 65535 ret i32 %and diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbs.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbs.ll index c4680a5e15120f..0dfa56d4a00595 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbs.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbs.ll @@ -273,7 +273,8 @@ define signext i32 @bext_i32(i32 signext %a, i32 signext %b) nounwind { ; RV64ZBS-LABEL: bext_i32: ; RV64ZBS: # %bb.0: ; RV64ZBS-NEXT: andi a1, a1, 31 -; RV64ZBS-NEXT: bext a0, a0, a1 +; RV64ZBS-NEXT: srl a0, a0, a1 +; RV64ZBS-NEXT: andi a0, a0, 1 ; RV64ZBS-NEXT: ret %and = and i32 %b, 31 %shr = lshr i32 %a, %and @@ -290,7 +291,8 @@ define signext i32 @bext_i32_no_mask(i32 signext %a, i32 signext %b) nounwind { ; ; RV64ZBS-LABEL: bext_i32_no_mask: ; RV64ZBS: # %bb.0: -; RV64ZBS-NEXT: bext a0, a0, a1 +; RV64ZBS-NEXT: srl a0, a0, a1 +; RV64ZBS-NEXT: andi a0, a0, 1 ; RV64ZBS-NEXT: ret %shr = lshr i32 %a, %b %and1 = and i32 %shr, 1 @@ -367,11 +369,17 @@ define i64 @bext_i64_no_mask(i64 %a, i64 %b) nounwind { } define signext i32 @bexti_i32(i32 signext %a) nounwind { -; CHECK-LABEL: bexti_i32: -; CHECK: # %bb.0: -; CHECK-NEXT: srliw a0, a0, 5 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: ret +; RV64I-LABEL: bexti_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 58 +; RV64I-NEXT: srli a0, a0, 63 +; RV64I-NEXT: ret +; +; RV64ZBS-LABEL: bexti_i32: +; RV64ZBS: # %bb.0: +; RV64ZBS-NEXT: srliw a0, a0, 5 +; RV64ZBS-NEXT: andi a0, a0, 1 +; RV64ZBS-NEXT: ret %shr = lshr i32 %a, 5 %and = and i32 %shr, 1 ret i32 %and @@ -394,16 +402,11 @@ define i64 @bexti_i64(i64 %a) nounwind { } define signext i32 @bexti_i32_cmp(i32 signext %a) nounwind { -; RV64I-LABEL: bexti_i32_cmp: -; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 58 -; RV64I-NEXT: srli a0, a0, 63 -; RV64I-NEXT: ret -; -; RV64ZBS-LABEL: bexti_i32_cmp: -; RV64ZBS: # %bb.0: -; RV64ZBS-NEXT: bexti a0, a0, 5 -; RV64ZBS-NEXT: ret +; CHECK-LABEL: bexti_i32_cmp: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a0, a0, 32 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: ret %and = and i32 %a, 32 %cmp = icmp ne i32 %and, 0 %zext = zext i1 %cmp to i32 @@ -469,11 +472,18 @@ define signext i32 @bclri_i32_30(i32 signext %a) nounwind { } define signext i32 @bclri_i32_31(i32 signext %a) nounwind { -; CHECK-LABEL: bclri_i32_31: -; CHECK: # %bb.0: -; CHECK-NEXT: slli a0, a0, 33 -; CHECK-NEXT: srli a0, a0, 33 -; CHECK-NEXT: ret +; RV64I-LABEL: bclri_i32_31: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBS-LABEL: bclri_i32_31: +; RV64ZBS: # %bb.0: +; RV64ZBS-NEXT: bclri a0, a0, 31 +; RV64ZBS-NEXT: sext.w a0, a0 +; RV64ZBS-NEXT: ret %and = and i32 %a, -2147483649 ret i32 %and } @@ -614,7 +624,7 @@ define signext i32 @bseti_i32_11(i32 signext %a) nounwind { ; RV64I-LABEL: bseti_i32_11: ; RV64I: # %bb.0: ; RV64I-NEXT: li a1, 1 -; RV64I-NEXT: slli a1, a1, 11 +; RV64I-NEXT: slliw a1, a1, 11 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; @@ -642,11 +652,17 @@ define signext i32 @bseti_i32_30(i32 signext %a) nounwind { } define signext i32 @bseti_i32_31(i32 signext %a) nounwind { -; CHECK-LABEL: bseti_i32_31: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, 524288 -; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: bseti_i32_31: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBS-LABEL: bseti_i32_31: +; RV64ZBS: # %bb.0: +; RV64ZBS-NEXT: bseti a0, a0, 31 +; RV64ZBS-NEXT: sext.w a0, a0 +; RV64ZBS-NEXT: ret %or = or i32 %a, 2147483648 ret i32 %or } @@ -752,7 +768,7 @@ define signext i32 @binvi_i32_11(i32 signext %a) nounwind { ; RV64I-LABEL: binvi_i32_11: ; RV64I: # %bb.0: ; RV64I-NEXT: li a1, 1 -; RV64I-NEXT: slli a1, a1, 11 +; RV64I-NEXT: slliw a1, a1, 11 ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: ret ; @@ -780,11 +796,17 @@ define signext i32 @binvi_i32_30(i32 signext %a) nounwind { } define signext i32 @binvi_i32_31(i32 signext %a) nounwind { -; CHECK-LABEL: binvi_i32_31: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, 524288 -; CHECK-NEXT: xor a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: binvi_i32_31: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBS-LABEL: binvi_i32_31: +; RV64ZBS: # %bb.0: +; RV64ZBS-NEXT: binvi a0, a0, 31 +; RV64ZBS-NEXT: sext.w a0, a0 +; RV64ZBS-NEXT: ret %xor = xor i32 %a, 2147483648 ret i32 %xor } diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll index 774d1398644b98..20eb4cc10f4054 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll @@ -758,7 +758,8 @@ define i1 @smulo.not.i32(i32 signext %v1, i32 signext %v2) { ; RV64-NEXT: srai a1, a0, 32 ; RV64-NEXT: sraiw a0, a0, 31 ; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: xori a0, a0, 1 ; RV64-NEXT: ret entry: %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) @@ -792,7 +793,8 @@ define i1 @smulo.not.i64(i64 %v1, i64 %v2) { ; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: srai a0, a0, 63 ; RV64-NEXT: xor a0, a2, a0 -; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: xori a0, a0, 1 ; RV64-NEXT: ret entry: %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) @@ -827,7 +829,8 @@ define i1 @umulo.not.i32(i32 signext %v1, i32 signext %v2) { ; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: mulhu a0, a0, a1 ; RV64-NEXT: srai a0, a0, 32 -; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: xori a0, a0, 1 ; RV64-NEXT: ret entry: %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) @@ -856,7 +859,8 @@ define i1 @umulo.not.i64(i64 %v1, i64 %v2) { ; RV64-LABEL: umulo.not.i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: mulhu a0, a0, a1 -; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: xori a0, a0, 1 ; RV64-NEXT: ret entry: %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-cmp.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-cmp.mir index 32afcb2db4dadd..1184c075f42205 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-cmp.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-cmp.mir @@ -48,14 +48,16 @@ body: | liveins: $edi, $esi ; CHECK-LABEL: name: test_cmp_i8 - ; CHECK: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; CHECK: [[COPY1:%[0-9]+]]:_(s8) = COPY $sil - ; CHECK: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](s8), [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; CHECK: $eax = COPY [[AND]](s32) - ; CHECK: RET 0, implicit $eax + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY $sil + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](s8), [[COPY1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: $eax = COPY [[AND]](s32) + ; CHECK-NEXT: RET 0, implicit $eax %0(s8) = COPY $dil %1(s8) = COPY $sil %2(s1) = G_ICMP intpred(ult), %0(s8), %1 @@ -79,14 +81,16 @@ body: | liveins: $edi, $esi ; CHECK-LABEL: name: test_cmp_i16 - ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY $si - ; CHECK: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](s16), [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; CHECK: $eax = COPY [[AND]](s32) - ; CHECK: RET 0, implicit $eax + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $si + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](s16), [[COPY1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: $eax = COPY [[AND]](s32) + ; CHECK-NEXT: RET 0, implicit $eax %0(s16) = COPY $di %1(s16) = COPY $si %2(s1) = G_ICMP intpred(ult), %0(s16), %1 @@ -110,14 +114,16 @@ body: | liveins: $edi, $esi ; CHECK-LABEL: name: test_cmp_i32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; CHECK: $eax = COPY [[AND]](s32) - ; CHECK: RET 0, implicit $eax + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: $eax = COPY [[AND]](s32) + ; CHECK-NEXT: RET 0, implicit $eax %0(s32) = COPY $edi %1(s32) = COPY $esi %2(s1) = G_ICMP intpred(ult), %0(s32), %1 @@ -141,14 +147,16 @@ body: | liveins: $rdi, $rsi ; CHECK-LABEL: name: test_cmp_i64 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $rdi - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $rsi - ; CHECK: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; CHECK: $eax = COPY [[AND]](s32) - ; CHECK: RET 0, implicit $eax + ; CHECK: liveins: $rdi, $rsi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $rdi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $rsi + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: $eax = COPY [[AND]](s32) + ; CHECK-NEXT: RET 0, implicit $eax %0(s64) = COPY $rdi %1(s64) = COPY $rsi %2(s1) = G_ICMP intpred(ult), %0(s64), %1 @@ -172,14 +180,16 @@ body: | liveins: $rdi, $rsi ; CHECK-LABEL: name: test_cmp_p0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $rdi - ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $rsi - ; CHECK: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](p0), [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; CHECK: $eax = COPY [[AND]](s32) - ; CHECK: RET 0, implicit $eax + ; CHECK: liveins: $rdi, $rsi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $rdi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $rsi + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](p0), [[COPY1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: $eax = COPY [[AND]](s32) + ; CHECK-NEXT: RET 0, implicit $eax %0(p0) = COPY $rdi %1(p0) = COPY $rsi %2(s1) = G_ICMP intpred(ult), %0(p0), %1 diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-ext-x86-64.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-ext-x86-64.mir index 60c21e40a5ec85..1ea2da9757f1ad 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-ext-x86-64.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-ext-x86-64.mir @@ -76,14 +76,16 @@ body: | liveins: $edi ; CHECK-LABEL: name: test_sext_i1 - ; CHECK: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s8) - ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 63 - ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C]](s8) - ; CHECK: [[COPY1:%[0-9]+]]:_(s8) = COPY [[C]](s8) - ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[COPY1]](s8) - ; CHECK: $rax = COPY [[ASHR]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 63 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C]](s8) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[C]](s8) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[COPY1]](s8) + ; CHECK-NEXT: $rax = COPY [[ASHR]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s8) = COPY $dil %1(s1) = G_TRUNC %0(s8) %2(s64) = G_SEXT %1(s1) @@ -104,10 +106,12 @@ body: | liveins: $edi ; CHECK-LABEL: name: test_sext_i8 - ; CHECK: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s8) - ; CHECK: $rax = COPY [[SEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s8) + ; CHECK-NEXT: $rax = COPY [[SEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s8) = COPY $dil %1(s64) = G_SEXT %0(s8) $rax = COPY %1(s64) @@ -127,10 +131,12 @@ body: | liveins: $edi ; CHECK-LABEL: name: test_sext_i16 - ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s16) - ; CHECK: $rax = COPY [[SEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s16) + ; CHECK-NEXT: $rax = COPY [[SEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s16) = COPY $di %1(s64) = G_SEXT %0(s16) $rax = COPY %1(s64) @@ -150,10 +156,12 @@ body: | liveins: $edi ; CHECK-LABEL: name: test_sext_i32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s32) - ; CHECK: $rax = COPY [[SEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s32) + ; CHECK-NEXT: $rax = COPY [[SEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s32) = COPY $edi %1(s64) = G_SEXT %0(s32) $rax = COPY %1(s64) @@ -174,12 +182,14 @@ body: | liveins: $edi ; CHECK-LABEL: name: test_zext_i1 - ; CHECK: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s8) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] - ; CHECK: $rax = COPY [[AND]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: $rax = COPY [[AND]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s8) = COPY $dil %1(s1) = G_TRUNC %0(s8) %2(s64) = G_ZEXT %1(s1) @@ -200,10 +210,12 @@ body: | liveins: $edi ; CHECK-LABEL: name: test_zext_i8 - ; CHECK: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s8) - ; CHECK: $rax = COPY [[ZEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s8) + ; CHECK-NEXT: $rax = COPY [[ZEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s8) = COPY $dil %1(s64) = G_ZEXT %0(s8) $rax = COPY %1(s64) @@ -223,10 +235,12 @@ body: | liveins: $edi ; CHECK-LABEL: name: test_zext_i16 - ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s16) - ; CHECK: $rax = COPY [[ZEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s16) + ; CHECK-NEXT: $rax = COPY [[ZEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s16) = COPY $di %1(s64) = G_ZEXT %0(s16) $rax = COPY %1(s64) @@ -246,10 +260,12 @@ body: | liveins: $edi ; CHECK-LABEL: name: test_zext_i32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; CHECK: $rax = COPY [[ZEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK-NEXT: $rax = COPY [[ZEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s32) = COPY $edi %1(s64) = G_ZEXT %0(s32) $rax = COPY %1(s64) @@ -270,10 +286,12 @@ body: | liveins: $edi ; CHECK-LABEL: name: test_anyext_i1 - ; CHECK: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s8) - ; CHECK: $rax = COPY [[ANYEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s8) + ; CHECK-NEXT: $rax = COPY [[ANYEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s8) = COPY $dil %1(s1) = G_TRUNC %0(s8) %2(s64) = G_ANYEXT %1(s1) @@ -294,10 +312,12 @@ body: | liveins: $edi ; CHECK-LABEL: name: test_anyext_i8 - ; CHECK: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s8) - ; CHECK: $rax = COPY [[ANYEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s8) + ; CHECK-NEXT: $rax = COPY [[ANYEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s8) = COPY $dil %1(s64) = G_ANYEXT %0(s8) $rax = COPY %1(s64) @@ -317,10 +337,12 @@ body: | liveins: $edi ; CHECK-LABEL: name: test_anyext_i16 - ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s16) - ; CHECK: $rax = COPY [[ANYEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s16) + ; CHECK-NEXT: $rax = COPY [[ANYEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s16) = COPY $di %1(s64) = G_ANYEXT %0(s16) $rax = COPY %1(s64) @@ -340,10 +362,12 @@ body: | liveins: $edi ; CHECK-LABEL: name: test_anyext_i32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) - ; CHECK: $rax = COPY [[ANYEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) + ; CHECK-NEXT: $rax = COPY [[ANYEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s32) = COPY $edi %1(s64) = G_ANYEXT %0(s32) $rax = COPY %1(s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-ext.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-ext.mir index 58c9880023c880..bf625cf7eeb995 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-ext.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-ext.mir @@ -104,19 +104,24 @@ body: | liveins: $edi ; X32-LABEL: name: test_zext_i1toi8 - ; X32: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; X32: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; X32: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; X32: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] - ; X32: $al = COPY [[AND]](s8) - ; X32: RET 0, implicit $al + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; X32-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; X32-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; X32-NEXT: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] + ; X32-NEXT: $al = COPY [[AND]](s8) + ; X32-NEXT: RET 0, implicit $al + ; ; X64-LABEL: name: test_zext_i1toi8 - ; X64: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; X64: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; X64: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; X64: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] - ; X64: $al = COPY [[AND]](s8) - ; X64: RET 0, implicit $al + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; X64-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; X64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; X64-NEXT: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] + ; X64-NEXT: $al = COPY [[AND]](s8) + ; X64-NEXT: RET 0, implicit $al %1:_(s32) = COPY $edi %0:_(s1) = G_TRUNC %1(s32) %2:_(s8) = G_ZEXT %0(s1) @@ -137,19 +142,24 @@ body: | liveins: $edi ; X32-LABEL: name: test_zext_i1toi16 - ; X32: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; X32: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; X32: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; X32: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; X32: $ax = COPY [[AND]](s16) - ; X32: RET 0, implicit $ax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; X32-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; X32-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; X32-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; X32-NEXT: $ax = COPY [[AND]](s16) + ; X32-NEXT: RET 0, implicit $ax + ; ; X64-LABEL: name: test_zext_i1toi16 - ; X64: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; X64: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; X64: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; X64: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; X64: $ax = COPY [[AND]](s16) - ; X64: RET 0, implicit $ax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; X64-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; X64-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; X64-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; X64-NEXT: $ax = COPY [[AND]](s16) + ; X64-NEXT: RET 0, implicit $ax %1:_(s32) = COPY $edi %0:_(s1) = G_TRUNC %1(s32) %2:_(s16) = G_ZEXT %0(s1) @@ -171,19 +181,24 @@ body: | liveins: $edi ; X32-LABEL: name: test_zext_i1 - ; X32: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; X32: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) - ; X32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; X32: $eax = COPY [[AND]](s32) - ; X32: RET 0, implicit $eax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X32-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) + ; X32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; X32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; X32-NEXT: $eax = COPY [[AND]](s32) + ; X32-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_zext_i1 - ; X64: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X64: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; X64: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) - ; X64: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; X64: $eax = COPY [[AND]](s32) - ; X64: RET 0, implicit $eax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X64-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) + ; X64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; X64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; X64-NEXT: $eax = COPY [[AND]](s32) + ; X64-NEXT: RET 0, implicit $eax %0(s8) = COPY $dil %1(s1) = G_TRUNC %0(s8) %2(s32) = G_ZEXT %1(s1) @@ -204,15 +219,20 @@ body: | liveins: $edi ; X32-LABEL: name: test_zext_i8toi16 - ; X32: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X32: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[COPY]](s8) - ; X32: $ax = COPY [[ZEXT]](s16) - ; X32: RET 0, implicit $ax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X32-NEXT: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[COPY]](s8) + ; X32-NEXT: $ax = COPY [[ZEXT]](s16) + ; X32-NEXT: RET 0, implicit $ax + ; ; X64-LABEL: name: test_zext_i8toi16 - ; X64: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X64: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[COPY]](s8) - ; X64: $ax = COPY [[ZEXT]](s16) - ; X64: RET 0, implicit $ax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[COPY]](s8) + ; X64-NEXT: $ax = COPY [[ZEXT]](s16) + ; X64-NEXT: RET 0, implicit $ax %0(s8) = COPY $dil %1(s16) = G_ZEXT %0(s8) $ax = COPY %1(s16) @@ -232,15 +252,20 @@ body: | liveins: $edi ; X32-LABEL: name: test_zext_i8 - ; X32: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X32: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s8) - ; X32: $eax = COPY [[ZEXT]](s32) - ; X32: RET 0, implicit $eax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X32-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s8) + ; X32-NEXT: $eax = COPY [[ZEXT]](s32) + ; X32-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_zext_i8 - ; X64: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X64: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s8) - ; X64: $eax = COPY [[ZEXT]](s32) - ; X64: RET 0, implicit $eax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s8) + ; X64-NEXT: $eax = COPY [[ZEXT]](s32) + ; X64-NEXT: RET 0, implicit $eax %0(s8) = COPY $dil %1(s32) = G_ZEXT %0(s8) $eax = COPY %1(s32) @@ -260,15 +285,20 @@ body: | liveins: $edi ; X32-LABEL: name: test_zext_i16 - ; X32: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; X32: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16) - ; X32: $eax = COPY [[ZEXT]](s32) - ; X32: RET 0, implicit $eax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; X32-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16) + ; X32-NEXT: $eax = COPY [[ZEXT]](s32) + ; X32-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_zext_i16 - ; X64: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; X64: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16) - ; X64: $eax = COPY [[ZEXT]](s32) - ; X64: RET 0, implicit $eax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16) + ; X64-NEXT: $eax = COPY [[ZEXT]](s32) + ; X64-NEXT: RET 0, implicit $eax %0(s16) = COPY $di %1(s32) = G_ZEXT %0(s16) $eax = COPY %1(s32) @@ -288,13 +318,18 @@ body: | liveins: $edi ; X32-LABEL: name: test_sext_i1toi8 - ; X32: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 - ; X32: $al = COPY [[C]](s8) - ; X32: RET 0, implicit $al + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 + ; X32-NEXT: $al = COPY [[C]](s8) + ; X32-NEXT: RET 0, implicit $al + ; ; X64-LABEL: name: test_sext_i1toi8 - ; X64: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 - ; X64: $al = COPY [[C]](s8) - ; X64: RET 0, implicit $al + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 + ; X64-NEXT: $al = COPY [[C]](s8) + ; X64-NEXT: RET 0, implicit $al %0(s1) = G_IMPLICIT_DEF %1(s8) = G_SEXT %0(s1) $al = COPY %1(s8) @@ -314,13 +349,18 @@ body: | liveins: $edi ; X32-LABEL: name: test_sext_i1toi16 - ; X32: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; X32: $ax = COPY [[C]](s16) - ; X32: RET 0, implicit $ax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; X32-NEXT: $ax = COPY [[C]](s16) + ; X32-NEXT: RET 0, implicit $ax + ; ; X64-LABEL: name: test_sext_i1toi16 - ; X64: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; X64: $ax = COPY [[C]](s16) - ; X64: RET 0, implicit $ax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; X64-NEXT: $ax = COPY [[C]](s16) + ; X64-NEXT: RET 0, implicit $ax %0(s1) = G_IMPLICIT_DEF %1(s16) = G_SEXT %0(s1) $ax = COPY %1(s16) @@ -341,13 +381,18 @@ body: | liveins: $edi ; X32-LABEL: name: test_sext_i1 - ; X32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; X32: $eax = COPY [[C]](s32) - ; X32: RET 0, implicit $eax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; X32-NEXT: $eax = COPY [[C]](s32) + ; X32-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_sext_i1 - ; X64: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; X64: $eax = COPY [[C]](s32) - ; X64: RET 0, implicit $eax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; X64-NEXT: $eax = COPY [[C]](s32) + ; X64-NEXT: RET 0, implicit $eax %0(s1) = G_IMPLICIT_DEF %2(s32) = G_SEXT %0(s1) $eax = COPY %2(s32) @@ -367,15 +412,20 @@ body: | liveins: $edi ; X32-LABEL: name: test_sext_i8toi16 - ; X32: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X32: [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[COPY]](s8) - ; X32: $ax = COPY [[SEXT]](s16) - ; X32: RET 0, implicit $ax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X32-NEXT: [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[COPY]](s8) + ; X32-NEXT: $ax = COPY [[SEXT]](s16) + ; X32-NEXT: RET 0, implicit $ax + ; ; X64-LABEL: name: test_sext_i8toi16 - ; X64: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X64: [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[COPY]](s8) - ; X64: $ax = COPY [[SEXT]](s16) - ; X64: RET 0, implicit $ax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X64-NEXT: [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[COPY]](s8) + ; X64-NEXT: $ax = COPY [[SEXT]](s16) + ; X64-NEXT: RET 0, implicit $ax %0(s8) = COPY $dil %1(s16) = G_SEXT %0(s8) $ax = COPY %1(s16) @@ -395,15 +445,20 @@ body: | liveins: $edi ; X32-LABEL: name: test_sext_i8 - ; X32: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X32: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s8) - ; X32: $eax = COPY [[SEXT]](s32) - ; X32: RET 0, implicit $eax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X32-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s8) + ; X32-NEXT: $eax = COPY [[SEXT]](s32) + ; X32-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_sext_i8 - ; X64: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X64: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s8) - ; X64: $eax = COPY [[SEXT]](s32) - ; X64: RET 0, implicit $eax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X64-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s8) + ; X64-NEXT: $eax = COPY [[SEXT]](s32) + ; X64-NEXT: RET 0, implicit $eax %0(s8) = COPY $dil %1(s32) = G_SEXT %0(s8) $eax = COPY %1(s32) @@ -423,15 +478,20 @@ body: | liveins: $edi ; X32-LABEL: name: test_sext_i16 - ; X32: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; X32: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16) - ; X32: $eax = COPY [[SEXT]](s32) - ; X32: RET 0, implicit $eax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; X32-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16) + ; X32-NEXT: $eax = COPY [[SEXT]](s32) + ; X32-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_sext_i16 - ; X64: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; X64: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16) - ; X64: $eax = COPY [[SEXT]](s32) - ; X64: RET 0, implicit $eax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; X64-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16) + ; X64-NEXT: $eax = COPY [[SEXT]](s32) + ; X64-NEXT: RET 0, implicit $eax %0(s16) = COPY $di %1(s32) = G_SEXT %0(s16) $eax = COPY %1(s32) @@ -452,15 +512,20 @@ body: | liveins: $edi ; X32-LABEL: name: test_anyext_i1toi8 - ; X32: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; X32: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; X32: $al = COPY [[TRUNC]](s8) - ; X32: RET 0, implicit $al + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; X32-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; X32-NEXT: $al = COPY [[TRUNC]](s8) + ; X32-NEXT: RET 0, implicit $al + ; ; X64-LABEL: name: test_anyext_i1toi8 - ; X64: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; X64: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; X64: $al = COPY [[TRUNC]](s8) - ; X64: RET 0, implicit $al + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; X64-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; X64-NEXT: $al = COPY [[TRUNC]](s8) + ; X64-NEXT: RET 0, implicit $al %0(s32) = COPY $edi %1(s1) = G_TRUNC %0(s32) %2(s8) = G_ANYEXT %1(s1) @@ -482,15 +547,20 @@ body: | liveins: $edi ; X32-LABEL: name: test_anyext_i1toi16 - ; X32: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; X32: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; X32: $ax = COPY [[TRUNC]](s16) - ; X32: RET 0, implicit $ax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; X32-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; X32-NEXT: $ax = COPY [[TRUNC]](s16) + ; X32-NEXT: RET 0, implicit $ax + ; ; X64-LABEL: name: test_anyext_i1toi16 - ; X64: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; X64: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; X64: $ax = COPY [[TRUNC]](s16) - ; X64: RET 0, implicit $ax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; X64-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; X64-NEXT: $ax = COPY [[TRUNC]](s16) + ; X64-NEXT: RET 0, implicit $ax %0(s32) = COPY $edi %1(s1) = G_TRUNC %0(s32) %2(s16) = G_ANYEXT %1(s1) @@ -512,15 +582,20 @@ body: | liveins: $edi ; X32-LABEL: name: test_anyext_i1 - ; X32: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X32: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) - ; X32: $eax = COPY [[ANYEXT]](s32) - ; X32: RET 0, implicit $eax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X32-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) + ; X32-NEXT: $eax = COPY [[ANYEXT]](s32) + ; X32-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_anyext_i1 - ; X64: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X64: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) - ; X64: $eax = COPY [[ANYEXT]](s32) - ; X64: RET 0, implicit $eax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X64-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) + ; X64-NEXT: $eax = COPY [[ANYEXT]](s32) + ; X64-NEXT: RET 0, implicit $eax %0(s8) = COPY $dil %1(s1) = G_TRUNC %0(s8) %2(s32) = G_ANYEXT %1(s1) @@ -541,15 +616,20 @@ body: | liveins: $edi ; X32-LABEL: name: test_anyext_i8toi16 - ; X32: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X32: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) - ; X32: $ax = COPY [[ANYEXT]](s16) - ; X32: RET 0, implicit $ax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X32-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) + ; X32-NEXT: $ax = COPY [[ANYEXT]](s16) + ; X32-NEXT: RET 0, implicit $ax + ; ; X64-LABEL: name: test_anyext_i8toi16 - ; X64: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X64: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) - ; X64: $ax = COPY [[ANYEXT]](s16) - ; X64: RET 0, implicit $ax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X64-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) + ; X64-NEXT: $ax = COPY [[ANYEXT]](s16) + ; X64-NEXT: RET 0, implicit $ax %0(s8) = COPY $dil %1(s16) = G_ANYEXT %0(s8) $ax = COPY %1(s16) @@ -569,15 +649,20 @@ body: | liveins: $edi ; X32-LABEL: name: test_anyext_i8 - ; X32: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X32: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) - ; X32: $eax = COPY [[ANYEXT]](s32) - ; X32: RET 0, implicit $eax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X32-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) + ; X32-NEXT: $eax = COPY [[ANYEXT]](s32) + ; X32-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_anyext_i8 - ; X64: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X64: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) - ; X64: $eax = COPY [[ANYEXT]](s32) - ; X64: RET 0, implicit $eax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X64-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) + ; X64-NEXT: $eax = COPY [[ANYEXT]](s32) + ; X64-NEXT: RET 0, implicit $eax %0(s8) = COPY $dil %1(s32) = G_ANYEXT %0(s8) $eax = COPY %1(s32) @@ -597,15 +682,20 @@ body: | liveins: $edi ; X32-LABEL: name: test_anyext_i16 - ; X32: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; X32: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16) - ; X32: $eax = COPY [[ANYEXT]](s32) - ; X32: RET 0, implicit $eax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; X32-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16) + ; X32-NEXT: $eax = COPY [[ANYEXT]](s32) + ; X32-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_anyext_i16 - ; X64: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; X64: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16) - ; X64: $eax = COPY [[ANYEXT]](s32) - ; X64: RET 0, implicit $eax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; X64-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16) + ; X64-NEXT: $eax = COPY [[ANYEXT]](s32) + ; X64-NEXT: RET 0, implicit $eax %0(s16) = COPY $di %1(s32) = G_ANYEXT %0(s16) $eax = COPY %1(s32) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir index 05947932307907..9807d13e3235a3 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir @@ -21,6 +21,7 @@ body: | ; X64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738367 ; X64-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[CTTZ_ZERO_UNDEF]], [[C1]] ; X64-NEXT: RET 0, implicit [[AND]](s64) + ; ; X86-LABEL: name: test_cttz35 ; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx ; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -34,8 +35,8 @@ body: | ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) - ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) + ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; X86-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[UADDO]], [[CTTZ_ZERO_UNDEF1]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[UADDE]], [[C]] @@ -88,6 +89,7 @@ body: | ; X64-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTTZ_ZERO_UNDEF [[DEF]](s64) ; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[CTTZ_ZERO_UNDEF]](s64) ; X64-NEXT: RET 0, implicit [[COPY]](s64) + ; ; X86-LABEL: name: test_cttz64 ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) @@ -98,8 +100,8 @@ body: | ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C1]] ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32) - ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) + ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; X86-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[UADDO]], [[CTTZ_ZERO_UNDEF1]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[UADDE]], [[C]] diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir index 9459b2ff19dac2..e2d10423dbec51 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir @@ -21,6 +21,7 @@ body: | ; X64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738367 ; X64-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[CTTZ_ZERO_UNDEF]], [[C1]] ; X64-NEXT: RET 0, implicit [[AND]](s64) + ; ; X86-LABEL: name: test_cttz35 ; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx ; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -34,8 +35,8 @@ body: | ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) - ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) + ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; X86-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[UADDO]], [[CTTZ_ZERO_UNDEF1]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[UADDE]], [[C]] @@ -90,6 +91,7 @@ body: | ; X64-NEXT: [[CTTZ:%[0-9]+]]:_(s64) = G_CTTZ [[DEF]](s64) ; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[CTTZ]](s64) ; X64-NEXT: RET 0, implicit [[COPY]](s64) + ; ; X86-LABEL: name: test_cttz64 ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) @@ -100,8 +102,8 @@ body: | ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ]], [[C1]] ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32) - ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) + ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; X86-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[UADDO]], [[CTTZ_ZERO_UNDEF]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[UADDE]], [[C]] diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-trunc.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-trunc.mir index e0fcce270e2745..9a2414a4eb7558 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-trunc.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-trunc.mir @@ -18,28 +18,29 @@ body: | bb.1 (%ir-block.0): ; X32-LABEL: name: trunc_check ; X32: [[DEF:%[0-9]+]]:_(s32) = IMPLICIT_DEF - ; X32: [[DEF1:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; X32: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; X32: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) - ; X32: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] - ; X32: G_STORE [[AND]](s8), [[DEF1]](p0) :: (store (s1)) - ; X32: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) - ; X32: G_STORE [[TRUNC1]](s8), [[DEF1]](p0) :: (store (s8)) - ; X32: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; X32: G_STORE [[TRUNC2]](s16), [[DEF1]](p0) :: (store (s16)) - ; X32: RET 0 + ; X32-NEXT: [[DEF1:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF + ; X32-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) + ; X32-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; X32-NEXT: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] + ; X32-NEXT: G_STORE [[AND]](s8), [[DEF1]](p0) :: (store (s1)) + ; X32-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) + ; X32-NEXT: G_STORE [[TRUNC1]](s8), [[DEF1]](p0) :: (store (s8)) + ; X32-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; X32-NEXT: G_STORE [[TRUNC2]](s16), [[DEF1]](p0) :: (store (s16)) + ; X32-NEXT: RET 0 + ; ; X64-LABEL: name: trunc_check ; X64: [[DEF:%[0-9]+]]:_(s32) = IMPLICIT_DEF - ; X64: [[DEF1:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; X64: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; X64: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) - ; X64: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] - ; X64: G_STORE [[AND]](s8), [[DEF1]](p0) :: (store (s1)) - ; X64: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) - ; X64: G_STORE [[TRUNC1]](s8), [[DEF1]](p0) :: (store (s8)) - ; X64: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; X64: G_STORE [[TRUNC2]](s16), [[DEF1]](p0) :: (store (s16)) - ; X64: RET 0 + ; X64-NEXT: [[DEF1:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF + ; X64-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) + ; X64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; X64-NEXT: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] + ; X64-NEXT: G_STORE [[AND]](s8), [[DEF1]](p0) :: (store (s1)) + ; X64-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) + ; X64-NEXT: G_STORE [[TRUNC1]](s8), [[DEF1]](p0) :: (store (s8)) + ; X64-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; X64-NEXT: G_STORE [[TRUNC2]](s16), [[DEF1]](p0) :: (store (s16)) + ; X64-NEXT: RET 0 %0(s32) = IMPLICIT_DEF %1(s1) = G_TRUNC %0(s32) %4:_(p0) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/X86/GlobalISel/lshr-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/lshr-scalar.ll index fe5c78cd0767f3..8a6215e07ebf6d 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/lshr-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/lshr-scalar.ll @@ -165,8 +165,7 @@ define i1 @test_lshr_i1_imm1(i32 %arg1) { ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax ; X64-NEXT: andb $1, %al -; X64-NEXT: movb $1, %cl -; X64-NEXT: shrb %cl, %al +; X64-NEXT: shrb %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i1 diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-zext.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-zext.mir index b071c29e0d1970..e3074c3e01f547 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-zext.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-zext.mir @@ -68,12 +68,13 @@ body: | ; CHECK-LABEL: name: zext_i1_to_i8 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] - ; CHECK: $al = COPY [[AND]](s8) - ; CHECK: RET 0, implicit $al + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: $al = COPY [[AND]](s8) + ; CHECK-NEXT: RET 0, implicit $al %1:_(s32) = COPY $edi %0:_(s1) = G_TRUNC %1(s32) %2:_(s8) = G_ZEXT %0(s1) @@ -95,12 +96,13 @@ body: | ; CHECK-LABEL: name: zext_i1_to_i16 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK: $ax = COPY [[AND]](s16) - ; CHECK: RET 0, implicit $ax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: $ax = COPY [[AND]](s16) + ; CHECK-NEXT: RET 0, implicit $ax %1:_(s32) = COPY $edi %0:_(s1) = G_TRUNC %1(s32) %2:_(s16) = G_ZEXT %0(s1) @@ -122,11 +124,12 @@ body: | ; CHECK-LABEL: name: zext_i1_to_i32 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK: $eax = COPY [[AND]](s32) - ; CHECK: RET 0, implicit $eax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: $eax = COPY [[AND]](s32) + ; CHECK-NEXT: RET 0, implicit $eax %1:_(s32) = COPY $edi %0:_(s1) = G_TRUNC %1(s32) %2:_(s32) = G_ZEXT %0(s1) @@ -148,12 +151,13 @@ body: | ; CHECK-LABEL: name: zext_i1_to_i64 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] - ; CHECK: $rax = COPY [[AND]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: $rax = COPY [[AND]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %1:_(s32) = COPY $edi %0:_(s1) = G_TRUNC %1(s32) %2:_(s64) = G_ZEXT %0(s1) @@ -175,12 +179,13 @@ body: | ; CHECK-LABEL: name: zext_i8_to_i16 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK: $ax = COPY [[AND]](s16) - ; CHECK: RET 0, implicit $ax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: $ax = COPY [[AND]](s16) + ; CHECK-NEXT: RET 0, implicit $ax %1:_(s32) = COPY $edi %0:_(s8) = G_TRUNC %1(s32) %2:_(s16) = G_ZEXT %0(s8) @@ -202,11 +207,12 @@ body: | ; CHECK-LABEL: name: zext_i8_to_i32 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK: $eax = COPY [[AND]](s32) - ; CHECK: RET 0, implicit $eax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: $eax = COPY [[AND]](s32) + ; CHECK-NEXT: RET 0, implicit $eax %1:_(s32) = COPY $edi %0:_(s8) = G_TRUNC %1(s32) %2:_(s32) = G_ZEXT %0(s8) @@ -228,12 +234,13 @@ body: | ; CHECK-LABEL: name: zext_i8_to_i64 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] - ; CHECK: $rax = COPY [[AND]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: $rax = COPY [[AND]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %1:_(s32) = COPY $edi %0:_(s8) = G_TRUNC %1(s32) %2:_(s64) = G_ZEXT %0(s8) @@ -255,11 +262,12 @@ body: | ; CHECK-LABEL: name: zext_i16_to_i32 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK: $eax = COPY [[AND]](s32) - ; CHECK: RET 0, implicit $eax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: $eax = COPY [[AND]](s32) + ; CHECK-NEXT: RET 0, implicit $eax %1:_(s32) = COPY $edi %0:_(s16) = G_TRUNC %1(s32) %2:_(s32) = G_ZEXT %0(s16) @@ -281,12 +289,13 @@ body: | ; CHECK-LABEL: name: zext_i16_to_i64 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] - ; CHECK: $rax = COPY [[AND]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: $rax = COPY [[AND]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %1:_(s32) = COPY $edi %0:_(s16) = G_TRUNC %1(s32) %2:_(s64) = G_ZEXT %0(s16) @@ -307,10 +316,11 @@ body: | ; CHECK-LABEL: name: zext_i32_to_i64 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; CHECK: $rax = COPY [[ZEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK-NEXT: $rax = COPY [[ZEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0:_(s32) = COPY $edi %1:_(s64) = G_ZEXT %0(s32) $rax = COPY %1(s64) diff --git a/llvm/test/Transforms/ExpandLargeDivRem/X86/lit.local.cfg b/llvm/test/Transforms/ExpandLargeDivRem/X86/lit.local.cfg new file mode 100644 index 00000000000000..42bf50dcc13c35 --- /dev/null +++ b/llvm/test/Transforms/ExpandLargeDivRem/X86/lit.local.cfg @@ -0,0 +1,2 @@ +if not "X86" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/CodeGen/X86/expand-large-div-rem-sdiv129.ll b/llvm/test/Transforms/ExpandLargeDivRem/X86/sdiv129.ll similarity index 100% rename from llvm/test/CodeGen/X86/expand-large-div-rem-sdiv129.ll rename to llvm/test/Transforms/ExpandLargeDivRem/X86/sdiv129.ll diff --git a/llvm/test/CodeGen/X86/expand-large-div-rem-srem129.ll b/llvm/test/Transforms/ExpandLargeDivRem/X86/srem129.ll similarity index 100% rename from llvm/test/CodeGen/X86/expand-large-div-rem-srem129.ll rename to llvm/test/Transforms/ExpandLargeDivRem/X86/srem129.ll diff --git a/llvm/test/CodeGen/X86/expand-large-div-rem-udiv129.ll b/llvm/test/Transforms/ExpandLargeDivRem/X86/udiv129.ll similarity index 100% rename from llvm/test/CodeGen/X86/expand-large-div-rem-udiv129.ll rename to llvm/test/Transforms/ExpandLargeDivRem/X86/udiv129.ll diff --git a/llvm/test/CodeGen/X86/expand-large-div-rem-urem129.ll b/llvm/test/Transforms/ExpandLargeDivRem/X86/urem129.ll similarity index 100% rename from llvm/test/CodeGen/X86/expand-large-div-rem-urem129.ll rename to llvm/test/Transforms/ExpandLargeDivRem/X86/urem129.ll diff --git a/llvm/test/CodeGen/X86/expand-large-fp-convert-fptosi129.ll b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptosi129.ll similarity index 100% rename from llvm/test/CodeGen/X86/expand-large-fp-convert-fptosi129.ll rename to llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptosi129.ll diff --git a/llvm/test/CodeGen/X86/expand-large-fp-convert-fptoui129.ll b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptoui129.ll similarity index 100% rename from llvm/test/CodeGen/X86/expand-large-fp-convert-fptoui129.ll rename to llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptoui129.ll diff --git a/llvm/test/CodeGen/X86/expand-large-fp-convert-si129tofp.ll b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-si129tofp.ll similarity index 100% rename from llvm/test/CodeGen/X86/expand-large-fp-convert-si129tofp.ll rename to llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-si129tofp.ll diff --git a/llvm/test/CodeGen/X86/expand-large-fp-convert-ui129tofp.ll b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-ui129tofp.ll similarity index 100% rename from llvm/test/CodeGen/X86/expand-large-fp-convert-ui129tofp.ll rename to llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-ui129tofp.ll diff --git a/llvm/test/Transforms/ExpandLargeFpConvert/X86/lit.local.cfg b/llvm/test/Transforms/ExpandLargeFpConvert/X86/lit.local.cfg new file mode 100644 index 00000000000000..42bf50dcc13c35 --- /dev/null +++ b/llvm/test/Transforms/ExpandLargeFpConvert/X86/lit.local.cfg @@ -0,0 +1,2 @@ +if not "X86" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll index 280630468c2c09..857132a0d10fca 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll @@ -320,8 +320,7 @@ define i8 @ptrmask_cast_local_to_flat_const_mask_fffffffffffffffe(ptr addrspace( define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffffffffffff(ptr addrspace(3) %src.ptr) { ; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_ffffffffffffffff( -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[SRC_PTR:%.*]], i32 -1) -; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[TMP1]], align 1 +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[SRC_PTR:%.*]], align 1 ; CHECK-NEXT: ret i8 [[LOAD]] ; %cast = addrspacecast ptr addrspace(3) %src.ptr to ptr @@ -333,7 +332,7 @@ define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffffffffffff(ptr addrspace( ; Make sure non-constant masks can also be handled. define i8 @ptrmask_cast_local_to_flat_load_range_mask(ptr addrspace(3) %src.ptr, ptr addrspace(1) %mask.ptr) { ; CHECK-LABEL: @ptrmask_cast_local_to_flat_load_range_mask( -; CHECK-NEXT: [[LOAD_MASK:%.*]] = load i64, ptr addrspace(1) [[MASK_PTR:%.*]], align 8, !range !0 +; CHECK-NEXT: [[LOAD_MASK:%.*]] = load i64, ptr addrspace(1) [[MASK_PTR:%.*]], align 8, !range [[RNG0:![0-9]+]] ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[LOAD_MASK]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[SRC_PTR:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[TMP2]], align 1 diff --git a/llvm/test/Transforms/InstCombine/align-addr.ll b/llvm/test/Transforms/InstCombine/align-addr.ll index ec8e7c9348f174..facb5df08a82f4 100644 --- a/llvm/test/Transforms/InstCombine/align-addr.ll +++ b/llvm/test/Transforms/InstCombine/align-addr.ll @@ -135,7 +135,7 @@ define <16 x i8> @ptrmask_align_unknown_ptr_align1(ptr align 1 %ptr, i64 %mask) define <16 x i8> @ptrmask_align_unknown_ptr_align8(ptr align 8 %ptr, i64 %mask) { ; CHECK-LABEL: @ptrmask_align_unknown_ptr_align8( -; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 [[MASK:%.*]]) +; CHECK-NEXT: [[ALIGNED:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 [[MASK:%.*]]) ; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 ; CHECK-NEXT: ret <16 x i8> [[LOAD]] ; @@ -147,7 +147,7 @@ define <16 x i8> @ptrmask_align_unknown_ptr_align8(ptr align 8 %ptr, i64 %mask) ; Increase load align from 1 to 2 define <16 x i8> @ptrmask_align2_ptr_align1(ptr align 1 %ptr) { ; CHECK-LABEL: @ptrmask_align2_ptr_align1( -; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -2) +; CHECK-NEXT: [[ALIGNED:%.*]] = call align 2 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -2) ; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 ; CHECK-NEXT: ret <16 x i8> [[LOAD]] ; @@ -159,7 +159,7 @@ define <16 x i8> @ptrmask_align2_ptr_align1(ptr align 1 %ptr) { ; Increase load align from 1 to 4 define <16 x i8> @ptrmask_align4_ptr_align1(ptr align 1 %ptr) { ; CHECK-LABEL: @ptrmask_align4_ptr_align1( -; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNED:%.*]] = call align 4 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) ; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 ; CHECK-NEXT: ret <16 x i8> [[LOAD]] ; @@ -171,7 +171,7 @@ define <16 x i8> @ptrmask_align4_ptr_align1(ptr align 1 %ptr) { ; Increase load align from 1 to 8 define <16 x i8> @ptrmask_align8_ptr_align1(ptr align 1 %ptr) { ; CHECK-LABEL: @ptrmask_align8_ptr_align1( -; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8) +; CHECK-NEXT: [[ALIGNED:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8) ; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 ; CHECK-NEXT: ret <16 x i8> [[LOAD]] ; @@ -181,11 +181,9 @@ define <16 x i8> @ptrmask_align8_ptr_align1(ptr align 1 %ptr) { } ; Underlying alignment already the same as forced alignment by ptrmask -; TODO: Should be able to drop the ptrmask define <16 x i8> @ptrmask_align8_ptr_align8(ptr align 8 %ptr) { ; CHECK-LABEL: @ptrmask_align8_ptr_align8( -; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8) -; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 +; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1 ; CHECK-NEXT: ret <16 x i8> [[LOAD]] ; %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8) @@ -194,11 +192,9 @@ define <16 x i8> @ptrmask_align8_ptr_align8(ptr align 8 %ptr) { } ; Underlying alignment greater than alignment forced by ptrmask -; TODO: Should be able to drop the ptrmask define <16 x i8> @ptrmask_align8_ptr_align16(ptr align 16 %ptr) { ; CHECK-LABEL: @ptrmask_align8_ptr_align16( -; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8) -; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 +; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1 ; CHECK-NEXT: ret <16 x i8> [[LOAD]] ; %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8) diff --git a/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll b/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll index bfe9efb1e88dd2..22a7c0c072d963 100644 --- a/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll +++ b/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll @@ -4,7 +4,8 @@ target datalayout = "p1:64:64:64:32" declare ptr @llvm.ptrmask.p0.i64(ptr, i64) -declare ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1), i32) +declare ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) , i32) +declare <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) >, <2 x i32>) declare <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr>, <2 x i64>) declare void @use.ptr(ptr) @@ -57,3 +58,27 @@ define ptr addrspace(1) @fold_2x_smaller_index_type(ptr addrspace(1) %p, i32 %m0 %p1 = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p0, i32 %m1) ret ptr addrspace(1) %p1 } + +define <2 x ptr> @fold_2x_vec_i64(<2 x ptr> %p, <2 x i64> %m0) { +; CHECK-LABEL: define <2 x ptr> @fold_2x_vec_i64 +; CHECK-SAME: (<2 x ptr> [[P:%.*]], <2 x i64> [[M0:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[M0]], +; CHECK-NEXT: [[P1:%.*]] = call align 2 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> [[TMP1]]) +; CHECK-NEXT: ret <2 x ptr> [[P1]] +; + %p0 = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p, <2 x i64> %m0) + %p1 = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p0, <2 x i64> ) + ret <2 x ptr> %p1 +} + +define <2 x ptr addrspace(1) > @fold_2x_vec_i32_undef(<2 x ptr addrspace(1) > %p, <2 x i32> %m0) { +; CHECK-LABEL: define <2 x ptr addrspace(1)> @fold_2x_vec_i32_undef +; CHECK-SAME: (<2 x ptr addrspace(1)> [[P:%.*]], <2 x i32> [[M0:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[M0]], +; CHECK-NEXT: [[P1:%.*]] = call <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P]], <2 x i32> [[TMP1]]) +; CHECK-NEXT: ret <2 x ptr addrspace(1)> [[P1]] +; + %p0 = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p, <2 x i32> %m0) + %p1 = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p0, <2 x i32> ) + ret <2 x ptr addrspace(1) > %p1 +} diff --git a/llvm/test/Transforms/InstCombine/ptrmask.ll b/llvm/test/Transforms/InstCombine/ptrmask.ll new file mode 100644 index 00000000000000..afeb5d5251d0f4 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/ptrmask.ll @@ -0,0 +1,424 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +target datalayout = "p1:64:64:64:32" + +declare ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1), i32) +declare ptr @llvm.ptrmask.p0.i64(ptr, i64) +declare <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) >, <2 x i32>) +declare <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr>, <2 x i64>) + +define ptr @ptrmask_combine_consecutive_preserve_attrs(ptr %p0, i64 %m1) { +; CHECK-LABEL: define ptr @ptrmask_combine_consecutive_preserve_attrs +; CHECK-SAME: (ptr [[P0:%.*]], i64 [[M1:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[M1]], 224 +; CHECK-NEXT: [[R:%.*]] = call noalias align 32 ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[TMP1]]) +; CHECK-NEXT: ret ptr [[R]] +; + %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 224) + %r = call noalias ptr @llvm.ptrmask.p0.i64(ptr %pm0, i64 %m1) + ret ptr %r +} + +define <2 x ptr> @ptrmask_combine_consecutive_preserve_attrs_vecs(<2 x ptr> %p0, <2 x i64> %m1) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_combine_consecutive_preserve_attrs_vecs +; CHECK-SAME: (<2 x ptr> [[P0:%.*]], <2 x i64> [[M1:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[M1]], +; CHECK-NEXT: [[R:%.*]] = call align 128 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> [[TMP1]]) +; CHECK-NEXT: ret <2 x ptr> [[R]] +; + %pm0 = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p0, <2 x i64> ) + %r = call align 128 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %pm0, <2 x i64> %m1) + ret <2 x ptr> %r +} + +define ptr @ptrmask_combine_consecutive_preserve_attrs_fail(ptr %p0, i64 %m0) { +; CHECK-LABEL: define ptr @ptrmask_combine_consecutive_preserve_attrs_fail +; CHECK-SAME: (ptr [[P0:%.*]], i64 [[M0:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[M0]], 193 +; CHECK-NEXT: [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[TMP1]]) +; CHECK-NEXT: ret ptr [[R]] +; + %pm0 = call noalias ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0) + %r = call ptr @llvm.ptrmask.p0.i64(ptr %pm0, i64 193) + ret ptr %r +} + +define ptr @ptrmask_combine_consecutive_preserve_attrs_todo0(ptr %p0) { +; CHECK-LABEL: define ptr @ptrmask_combine_consecutive_preserve_attrs_todo0 +; CHECK-SAME: (ptr [[P0:%.*]]) { +; CHECK-NEXT: [[PM0:%.*]] = call noalias align 32 ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 224) +; CHECK-NEXT: ret ptr [[PM0]] +; + %pm0 = call noalias ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 224) + %r = call ptr @llvm.ptrmask.p0.i64(ptr %pm0, i64 224) + ret ptr %r +} + +define ptr @ptrmask_combine_consecutive_preserve_attrs_todo1(ptr %p0) { +; CHECK-LABEL: define ptr @ptrmask_combine_consecutive_preserve_attrs_todo1 +; CHECK-SAME: (ptr [[P0:%.*]]) { +; CHECK-NEXT: [[PM0:%.*]] = call align 32 ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 224) +; CHECK-NEXT: ret ptr [[PM0]] +; + %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 224) + %r = call noalias ptr @llvm.ptrmask.p0.i64(ptr %pm0, i64 224) + ret ptr %r +} + +define ptr addrspace(1) @ptrmask_combine_consecutive_preserve_attrs_todo2(ptr addrspace(1) %p0) { +; CHECK-LABEL: define ptr addrspace(1) @ptrmask_combine_consecutive_preserve_attrs_todo2 +; CHECK-SAME: (ptr addrspace(1) [[P0:%.*]]) { +; CHECK-NEXT: [[PM0:%.*]] = call noalias align 32 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P0]], i32 224) +; CHECK-NEXT: ret ptr addrspace(1) [[PM0]] +; + %pm0 = call noalias ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p0, i32 224) + %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %pm0, i32 224) + ret ptr addrspace(1) %r +} + +define ptr @ptrmask_combine_add_nonnull(ptr %p) { +; CHECK-LABEL: define ptr @ptrmask_combine_add_nonnull +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[PM0:%.*]] = call align 64 ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64) +; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, ptr [[PM0]], i64 33 +; CHECK-NEXT: [[R:%.*]] = call nonnull align 32 ptr @llvm.ptrmask.p0.i64(ptr [[PGEP]], i64 -32) +; CHECK-NEXT: ret ptr [[R]] +; + %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -64) + %pgep = getelementptr i8, ptr %pm0, i64 33 + %r = call ptr @llvm.ptrmask.p0.i64(ptr %pgep, i64 -16) + ret ptr %r +} + +define ptr @ptrmask_combine_add_alignment(ptr %p) { +; CHECK-LABEL: define ptr @ptrmask_combine_add_alignment +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call align 64 ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64) +; CHECK-NEXT: ret ptr [[R]] +; + %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -64) + ret ptr %r +} + +define ptr addrspace(1) @ptrmask_combine_add_alignment2(ptr addrspace(1) align 32 %p) { +; CHECK-LABEL: define ptr addrspace(1) @ptrmask_combine_add_alignment2 +; CHECK-SAME: (ptr addrspace(1) align 32 [[P:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call align 64 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 -64) +; CHECK-NEXT: ret ptr addrspace(1) [[R]] +; + %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 -64) + ret ptr addrspace(1) %r +} + +define <2 x ptr> @ptrmask_combine_add_alignment_vec(<2 x ptr> %p) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_combine_add_alignment_vec +; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call align 32 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) +; CHECK-NEXT: ret <2 x ptr> [[R]] +; + %r = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p, <2 x i64> ) + ret <2 x ptr> %r +} + +define ptr addrspace(1) @ptrmask_combine_improve_alignment(ptr addrspace(1) %p) { +; CHECK-LABEL: define ptr addrspace(1) @ptrmask_combine_improve_alignment +; CHECK-SAME: (ptr addrspace(1) [[P:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call align 64 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 -64) +; CHECK-NEXT: ret ptr addrspace(1) [[R]] +; + %r = call align 32 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 -64) + ret ptr addrspace(1) %r +} + +define <2 x ptr addrspace(1) > @ptrmask_combine_improve_alignment_vec(<2 x ptr addrspace(1) > %p) { +; CHECK-LABEL: define <2 x ptr addrspace(1)> @ptrmask_combine_improve_alignment_vec +; CHECK-SAME: (<2 x ptr addrspace(1)> [[P:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call align 64 <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P]], <2 x i32> ) +; CHECK-NEXT: ret <2 x ptr addrspace(1)> [[R]] +; + %r = call align 32 <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p, <2 x i32> ) + ret <2 x ptr addrspace(1) > %r +} + +define ptr addrspace(1) @ptrmask_combine_improve_alignment_fail(ptr addrspace(1) %p) { +; CHECK-LABEL: define ptr addrspace(1) @ptrmask_combine_improve_alignment_fail +; CHECK-SAME: (ptr addrspace(1) [[P:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call align 128 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 -64) +; CHECK-NEXT: ret ptr addrspace(1) [[R]] +; + %r = call align 128 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 -64) + ret ptr addrspace(1) %r +} + +define i64 @ptrtoint_of_ptrmask(ptr %p, i64 %m) { +; CHECK-LABEL: define i64 @ptrtoint_of_ptrmask +; CHECK-SAME: (ptr [[P:%.*]], i64 [[M:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[R:%.*]] = and i64 [[TMP1]], [[M]] +; CHECK-NEXT: ret i64 [[R]] +; + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 %m) + %r = ptrtoint ptr %pm to i64 + ret i64 %r +} + +; This succeeds because (ptrtoint i32) gets folded to (trunc i32 (ptrtoint i64)) +define i32 @ptrtoint_of_ptrmask2(ptr %p, i64 %m) { +; CHECK-LABEL: define i32 @ptrtoint_of_ptrmask2 +; CHECK-SAME: (ptr [[P:%.*]], i64 [[M:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], [[M]] +; CHECK-NEXT: [[R:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: ret i32 [[R]] +; + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 %m) + %r = ptrtoint ptr %pm to i32 + ret i32 %r +} + +define <2 x i64> @ptrtoint_of_ptrmask_vec(<2 x ptr> %p, <2 x i64> %m) { +; CHECK-LABEL: define <2 x i64> @ptrtoint_of_ptrmask_vec +; CHECK-SAME: (<2 x ptr> [[P:%.*]], <2 x i64> [[M:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <2 x ptr> [[P]] to <2 x i64> +; CHECK-NEXT: [[R:%.*]] = and <2 x i64> [[TMP1]], [[M]] +; CHECK-NEXT: ret <2 x i64> [[R]] +; + %pm = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p, <2 x i64> %m) + %r = ptrtoint <2 x ptr> %pm to <2 x i64> + ret <2 x i64> %r +} + +define <2 x i32> @ptrtoint_of_ptrmask_vec2(<2 x ptr> %p, <2 x i64> %m) { +; CHECK-LABEL: define <2 x i32> @ptrtoint_of_ptrmask_vec2 +; CHECK-SAME: (<2 x ptr> [[P:%.*]], <2 x i64> [[M:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <2 x ptr> [[P]] to <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP1]], [[M]] +; CHECK-NEXT: [[R:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[R]] +; + %pm = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p, <2 x i64> %m) + %r = ptrtoint <2 x ptr> %pm to <2 x i32> + ret <2 x i32> %r +} + +define i64 @ptrtoint_of_ptrmask_fail(ptr addrspace(1) %p, i32 %m) { +; CHECK-LABEL: define i64 @ptrtoint_of_ptrmask_fail +; CHECK-SAME: (ptr addrspace(1) [[P:%.*]], i32 [[M:%.*]]) { +; CHECK-NEXT: [[PM:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 [[M]]) +; CHECK-NEXT: [[R:%.*]] = ptrtoint ptr addrspace(1) [[PM]] to i64 +; CHECK-NEXT: ret i64 [[R]] +; + %pm = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 %m) + %r = ptrtoint ptr addrspace(1) %pm to i64 + ret i64 %r +} + +define <2 x i32> @ptrtoint_of_ptrmask_vec_fail(<2 x ptr addrspace(1) > %p, <2 x i32> %m) { +; CHECK-LABEL: define <2 x i32> @ptrtoint_of_ptrmask_vec_fail +; CHECK-SAME: (<2 x ptr addrspace(1)> [[P:%.*]], <2 x i32> [[M:%.*]]) { +; CHECK-NEXT: [[PM:%.*]] = call <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P]], <2 x i32> [[M]]) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <2 x ptr addrspace(1)> [[PM]] to <2 x i64> +; CHECK-NEXT: [[R:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[R]] +; + %pm = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p, <2 x i32> %m) + %r = ptrtoint <2 x ptr addrspace(1) > %pm to <2 x i32> + ret <2 x i32> %r +} + +define ptr addrspace(1) @ptrmask_is_null(ptr addrspace(1) align 32 %p) { +; CHECK-LABEL: define ptr addrspace(1) @ptrmask_is_null +; CHECK-SAME: (ptr addrspace(1) align 32 [[P:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call align 4294967296 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 0) +; CHECK-NEXT: ret ptr addrspace(1) [[R]] +; + %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 31) + ret ptr addrspace(1) %r +} + +define <2 x ptr addrspace(1) > @ptrmask_is_null_vec(<2 x ptr addrspace(1) > align 64 %p) { +; CHECK-LABEL: define <2 x ptr addrspace(1)> @ptrmask_is_null_vec +; CHECK-SAME: (<2 x ptr addrspace(1)> align 64 [[P:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P]], <2 x i32> ) +; CHECK-NEXT: ret <2 x ptr addrspace(1)> [[R]] +; + %r = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p, <2 x i32> ) + ret <2 x ptr addrspace(1) > %r +} + +define ptr addrspace(1) @ptrmask_is_null_fail(ptr addrspace(1) align 16 %p) { +; CHECK-LABEL: define ptr addrspace(1) @ptrmask_is_null_fail +; CHECK-SAME: (ptr addrspace(1) align 16 [[P:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call align 16 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 16) +; CHECK-NEXT: ret ptr addrspace(1) [[R]] +; + %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 31) + ret ptr addrspace(1) %r +} + +define <2 x ptr addrspace(1) > @ptrmask_is_null_vec_fail(<2 x ptr addrspace(1) > align 32 %p) { +; CHECK-LABEL: define <2 x ptr addrspace(1)> @ptrmask_is_null_vec_fail +; CHECK-SAME: (<2 x ptr addrspace(1)> align 32 [[P:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P]], <2 x i32> ) +; CHECK-NEXT: ret <2 x ptr addrspace(1)> [[R]] +; + %r = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p, <2 x i32> ) + ret <2 x ptr addrspace(1) > %r +} + +define ptr @ptrmask_maintain_provenance_i64(ptr %p0) { +; CHECK-LABEL: define ptr @ptrmask_maintain_provenance_i64 +; CHECK-SAME: (ptr [[P0:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call align 4294967296 ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 0) +; CHECK-NEXT: ret ptr [[R]] +; + %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 0) + ret ptr %r +} + +define ptr addrspace(1) @ptrmask_maintain_provenance_i32(ptr addrspace(1) %p0) { +; CHECK-LABEL: define ptr addrspace(1) @ptrmask_maintain_provenance_i32 +; CHECK-SAME: (ptr addrspace(1) [[P0:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call align 4294967296 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P0]], i32 0) +; CHECK-NEXT: ret ptr addrspace(1) [[R]] +; + %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p0, i32 0) + ret ptr addrspace(1) %r +} + +define ptr @ptrmask_is_useless0(i64 %i, i64 %m) { +; CHECK-LABEL: define ptr @ptrmask_is_useless0 +; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) { +; CHECK-NEXT: [[I0:%.*]] = and i64 [[I]], -4 +; CHECK-NEXT: [[P0:%.*]] = inttoptr i64 [[I0]] to ptr +; CHECK-NEXT: [[R:%.*]] = call align 4 ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M]]) +; CHECK-NEXT: ret ptr [[R]] +; + %m0 = and i64 %m, -4 + %i0 = and i64 %i, -4 + %p0 = inttoptr i64 %i0 to ptr + %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0) + ret ptr %r +} + +define ptr @ptrmask_is_useless1(i64 %i, i64 %m) { +; CHECK-LABEL: define ptr @ptrmask_is_useless1 +; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) { +; CHECK-NEXT: [[I0:%.*]] = and i64 [[I]], -8 +; CHECK-NEXT: [[P0:%.*]] = inttoptr i64 [[I0]] to ptr +; CHECK-NEXT: [[R:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M]]) +; CHECK-NEXT: ret ptr [[R]] +; + %m0 = and i64 %m, -4 + %i0 = and i64 %i, -8 + %p0 = inttoptr i64 %i0 to ptr + %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0) + ret ptr %r +} + +define ptr @ptrmask_is_useless2(i64 %i, i64 %m) { +; CHECK-LABEL: define ptr @ptrmask_is_useless2 +; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) { +; CHECK-NEXT: [[I0:%.*]] = and i64 [[I]], 31 +; CHECK-NEXT: [[P0:%.*]] = inttoptr i64 [[I0]] to ptr +; CHECK-NEXT: [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M]]) +; CHECK-NEXT: ret ptr [[R]] +; + %m0 = and i64 %m, 127 + %i0 = and i64 %i, 31 + %p0 = inttoptr i64 %i0 to ptr + %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0) + ret ptr %r +} + +define ptr @ptrmask_is_useless3(i64 %i, i64 %m) { +; CHECK-LABEL: define ptr @ptrmask_is_useless3 +; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) { +; CHECK-NEXT: [[I0:%.*]] = and i64 [[I]], 127 +; CHECK-NEXT: [[P0:%.*]] = inttoptr i64 [[I0]] to ptr +; CHECK-NEXT: [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M]]) +; CHECK-NEXT: ret ptr [[R]] +; + %m0 = and i64 %m, 127 + %i0 = and i64 %i, 127 + %p0 = inttoptr i64 %i0 to ptr + %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0) + ret ptr %r +} + +define ptr @ptrmask_is_useless4(i64 %i, i64 %m) { +; CHECK-LABEL: define ptr @ptrmask_is_useless4 +; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) { +; CHECK-NEXT: [[I0:%.*]] = and i64 [[I]], -4 +; CHECK-NEXT: [[P0:%.*]] = inttoptr i64 [[I0]] to ptr +; CHECK-NEXT: ret ptr [[P0]] +; + %m0 = or i64 %m, -4 + %i0 = and i64 %i, -4 + %p0 = inttoptr i64 %i0 to ptr + %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0) + ret ptr %r +} + +define <2 x ptr> @ptrmask_is_useless_vec(<2 x i64> %i, <2 x i64> %m) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_is_useless_vec +; CHECK-SAME: (<2 x i64> [[I:%.*]], <2 x i64> [[M:%.*]]) { +; CHECK-NEXT: [[I0:%.*]] = and <2 x i64> [[I]], +; CHECK-NEXT: [[P0:%.*]] = inttoptr <2 x i64> [[I0]] to <2 x ptr> +; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> [[M]]) +; CHECK-NEXT: ret <2 x ptr> [[R]] +; + %m0 = and <2 x i64> %m, + %i0 = and <2 x i64> %i, + %p0 = inttoptr <2 x i64> %i0 to <2 x ptr> + %r = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p0, <2 x i64> %m0) + ret <2 x ptr> %r +} + +define <2 x ptr> @ptrmask_is_useless_vec_todo(<2 x i64> %i, <2 x i64> %m) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_is_useless_vec_todo +; CHECK-SAME: (<2 x i64> [[I:%.*]], <2 x i64> [[M:%.*]]) { +; CHECK-NEXT: [[I0:%.*]] = and <2 x i64> [[I]], +; CHECK-NEXT: [[P0:%.*]] = inttoptr <2 x i64> [[I0]] to <2 x ptr> +; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> [[M]]) +; CHECK-NEXT: ret <2 x ptr> [[R]] +; + %m0 = and <2 x i64> %m, + %i0 = and <2 x i64> %i, + %p0 = inttoptr <2 x i64> %i0 to <2 x ptr> + %r = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p0, <2 x i64> %m0) + ret <2 x ptr> %r +} + +define ptr @ptrmask_is_useless_fail0(i64 %i, i64 %m) { +; CHECK-LABEL: define ptr @ptrmask_is_useless_fail0 +; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) { +; CHECK-NEXT: [[M0:%.*]] = and i64 [[M]], -4 +; CHECK-NEXT: [[I0:%.*]] = or i64 [[I]], -4 +; CHECK-NEXT: [[P0:%.*]] = inttoptr i64 [[I0]] to ptr +; CHECK-NEXT: [[R:%.*]] = call align 4 ptr @llvm.ptrmask.p0.i64(ptr nonnull [[P0]], i64 [[M0]]) +; CHECK-NEXT: ret ptr [[R]] +; + %m0 = and i64 %m, -4 + %i0 = or i64 %i, -4 + %p0 = inttoptr i64 %i0 to ptr + %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0) + ret ptr %r +} + +define ptr @ptrmask_is_useless_fail1(i64 %i, i64 %m) { +; CHECK-LABEL: define ptr @ptrmask_is_useless_fail1 +; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) { +; CHECK-NEXT: [[M0:%.*]] = and i64 [[M]], 127 +; CHECK-NEXT: [[I0:%.*]] = and i64 [[I]], 511 +; CHECK-NEXT: [[P0:%.*]] = inttoptr i64 [[I0]] to ptr +; CHECK-NEXT: [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M0]]) +; CHECK-NEXT: ret ptr [[R]] +; + %m0 = and i64 %m, 127 + %i0 = and i64 %i, 511 + %p0 = inttoptr i64 %i0 to ptr + %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0) + ret ptr %r +} diff --git a/llvm/test/Transforms/InstSimplify/ptrmask.ll b/llvm/test/Transforms/InstSimplify/ptrmask.ll new file mode 100644 index 00000000000000..dd83abfdeee464 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/ptrmask.ll @@ -0,0 +1,343 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes=instsimplify -S | FileCheck %s + +target datalayout = "p1:64:64:64:32" + +declare ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) , i32) +declare ptr @llvm.ptrmask.p0.i64(ptr, i64) + +declare <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) >, <2 x i32>) +declare <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr>, <2 x i64>) + +define ptr @ptrmask_simplify_poison_mask(ptr %p) { +; CHECK-LABEL: define ptr @ptrmask_simplify_poison_mask +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: ret ptr poison +; + %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 poison) + ret ptr %r +} + +define <2 x ptr addrspace(1) > @ptrmask_simplify_poison_mask_vec(<2 x ptr addrspace(1) > %p) { +; CHECK-LABEL: define <2 x ptr addrspace(1)> @ptrmask_simplify_poison_mask_vec +; CHECK-SAME: (<2 x ptr addrspace(1)> [[P:%.*]]) { +; CHECK-NEXT: ret <2 x ptr addrspace(1)> poison +; + %r = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p, <2 x i32> poison) + ret <2 x ptr addrspace(1) > %r +} + +define <2 x ptr addrspace(1) > @ptrmask_simplify_poison_and_zero_i32_vec_fail(<2 x ptr addrspace(1) > %p) { +; CHECK-LABEL: define <2 x ptr addrspace(1)> @ptrmask_simplify_poison_and_zero_i32_vec_fail +; CHECK-SAME: (<2 x ptr addrspace(1)> [[P:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P]], <2 x i32> ) +; CHECK-NEXT: ret <2 x ptr addrspace(1)> [[R]] +; + %r = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p, <2 x i32> ) + ret <2 x ptr addrspace(1) > %r +} + +define <2 x ptr> @ptrmask_simplify_undef_and_ones_vec(<2 x ptr> %p) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_undef_and_ones_vec +; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { +; CHECK-NEXT: ret <2 x ptr> [[P]] +; + %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) + ret <2 x ptr> %r +} + +define <2 x ptr> @ptrmask_simplify_poison_and_ones_vec(<2 x ptr> %p) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_poison_and_ones_vec +; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { +; CHECK-NEXT: ret <2 x ptr> [[P]] +; + %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) + ret <2 x ptr> %r +} + +define <2 x ptr> @ptrmask_simplify_ones_vec(<2 x ptr> %p) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_ones_vec +; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { +; CHECK-NEXT: ret <2 x ptr> [[P]] +; + %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) + ret <2 x ptr> %r +} + +define <2 x ptr addrspace(1) > @ptrmask_simplify_ones_i32_vec(<2 x ptr addrspace(1) > %p) { +; CHECK-LABEL: define <2 x ptr addrspace(1)> @ptrmask_simplify_ones_i32_vec +; CHECK-SAME: (<2 x ptr addrspace(1)> [[P:%.*]]) { +; CHECK-NEXT: ret <2 x ptr addrspace(1)> [[P]] +; + %r = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p, <2 x i32> ) + ret <2 x ptr addrspace(1) > %r +} + +define ptr addrspace(1) @ptrmask_simplify_undef_mask(ptr addrspace(1) %p) { +; CHECK-LABEL: define ptr addrspace(1) @ptrmask_simplify_undef_mask +; CHECK-SAME: (ptr addrspace(1) [[P:%.*]]) { +; CHECK-NEXT: ret ptr addrspace(1) [[P]] +; + %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 undef) + ret ptr addrspace(1) %r +} + +define ptr @ptrmask_simplify_0_mask(ptr %p) { +; CHECK-LABEL: define ptr @ptrmask_simplify_0_mask +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 0) +; CHECK-NEXT: ret ptr [[R]] +; + %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 0) + ret ptr %r +} + +define ptr @ptrmask_simplify_1s_mask(ptr %p) { +; CHECK-LABEL: define ptr @ptrmask_simplify_1s_mask +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: ret ptr [[P]] +; + %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -1) + ret ptr %r +} + +define ptr addrspace(1) @ptrmask_simplify_1s_mask_i32(ptr addrspace(1) %p) { +; CHECK-LABEL: define ptr addrspace(1) @ptrmask_simplify_1s_mask_i32 +; CHECK-SAME: (ptr addrspace(1) [[P:%.*]]) { +; CHECK-NEXT: ret ptr addrspace(1) [[P]] +; + %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 -1) + ret ptr addrspace(1) %r +} + +define ptr @ptrmask_simplify_poison_ptr(i64 %m) { +; CHECK-LABEL: define ptr @ptrmask_simplify_poison_ptr +; CHECK-SAME: (i64 [[M:%.*]]) { +; CHECK-NEXT: ret ptr poison +; + %r = call ptr @llvm.ptrmask.p0.i64(ptr poison, i64 %m) + ret ptr %r +} + +define ptr addrspace(1) @ptrmask_simplify_undef_ptr(i32 %m) { +; CHECK-LABEL: define ptr addrspace(1) @ptrmask_simplify_undef_ptr +; CHECK-SAME: (i32 [[M:%.*]]) { +; CHECK-NEXT: ret ptr addrspace(1) null +; + %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) undef, i32 %m) + ret ptr addrspace(1) %r +} + +define ptr @ptrmask_simplify_null_ptr(i64 %m) { +; CHECK-LABEL: define ptr @ptrmask_simplify_null_ptr +; CHECK-SAME: (i64 [[M:%.*]]) { +; CHECK-NEXT: ret ptr null +; + %r = call ptr @llvm.ptrmask.p0.i64(ptr null, i64 %m) + ret ptr %r +} + +define ptr @ptrmask_simplify_ptrmask(ptr %p) { +; CHECK-LABEL: define ptr @ptrmask_simplify_ptrmask +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: ret ptr [[P]] +; + %m = ptrtoint ptr %p to i64 + %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 %m) + ret ptr %r +} + +define ptr addrspace(1) @ptrmask_simplify_ptrmask_i32(ptr addrspace(1) %p) { +; CHECK-LABEL: define ptr addrspace(1) @ptrmask_simplify_ptrmask_i32 +; CHECK-SAME: (ptr addrspace(1) [[P:%.*]]) { +; CHECK-NEXT: ret ptr addrspace(1) [[P]] +; + %m = ptrtoint ptr addrspace(1) %p to i32 + %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 %m) + ret ptr addrspace(1) %r +} + +define ptr @ptrmask_simplify_aligned_unused(ptr align 64 %p) { +; CHECK-LABEL: define ptr @ptrmask_simplify_aligned_unused +; CHECK-SAME: (ptr align 64 [[P:%.*]]) { +; CHECK-NEXT: ret ptr [[P]] +; + %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -64) + ret ptr %r +} + +define <2 x ptr> @ptrmask_simplify_aligned_unused_vec(<2 x ptr> align 128 %p) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_aligned_unused_vec +; CHECK-SAME: (<2 x ptr> align 128 [[P:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) +; CHECK-NEXT: ret <2 x ptr> [[R]] +; + %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) + ret <2 x ptr> %r +} + +define <2 x ptr> @ptrmask_simplify_aligned_unused_vec_todo(<2 x ptr> align 128 %p) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_aligned_unused_vec_todo +; CHECK-SAME: (<2 x ptr> align 128 [[P:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) +; CHECK-NEXT: ret <2 x ptr> [[R]] +; + %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) + ret <2 x ptr> %r +} + +define ptr addrspace(1) @ptrmask_simplify_aligned_unused_i32(ptr addrspace(1) align 64 %p) { +; CHECK-LABEL: define ptr addrspace(1) @ptrmask_simplify_aligned_unused_i32 +; CHECK-SAME: (ptr addrspace(1) align 64 [[P:%.*]]) { +; CHECK-NEXT: ret ptr addrspace(1) [[P]] +; + %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 -64) + ret ptr addrspace(1) %r +} + +define ptr @ptrmask_simplify_known_unused(ptr %p) { +; CHECK-LABEL: define ptr @ptrmask_simplify_known_unused +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[PM0:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64) +; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, ptr [[PM0]], i64 32 +; CHECK-NEXT: ret ptr [[PGEP]] +; + %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -64) + %pgep = getelementptr i8, ptr %pm0, i64 32 + %r = call ptr @llvm.ptrmask.p0.i64(ptr %pgep, i64 -32) + ret ptr %r +} + +define <2 x ptr> @ptrmask_simplify_known_unused_vec(<2 x ptr> %p) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec +; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { +; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) +; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> +; CHECK-NEXT: ret <2 x ptr> [[PGEP]] +; + %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) + %pgep = getelementptr i8, <2 x ptr> %pm0, <2 x i64> + %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %pgep, <2 x i64> ) + ret <2 x ptr> %r +} + +define <2 x ptr> @ptrmask_simplify_known_unused_vec2(<2 x ptr> %p) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec2 +; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { +; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) +; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> +; CHECK-NEXT: ret <2 x ptr> [[PGEP]] +; + %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) + %pgep = getelementptr i8, <2 x ptr> %pm0, <2 x i64> + %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %pgep, <2 x i64> ) + ret <2 x ptr> %r +} + +define <2 x ptr> @ptrmask_simplify_known_unused_vec3(<2 x ptr> %p) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec3 +; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { +; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) +; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> +; CHECK-NEXT: ret <2 x ptr> [[PGEP]] +; + %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) + %pgep = getelementptr i8, <2 x ptr> %pm0, <2 x i64> + %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %pgep, <2 x i64> ) + ret <2 x ptr> %r +} + +define <2 x ptr> @ptrmask_simplify_known_unused_vec4(<2 x ptr> %p) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec4 +; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { +; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) +; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> +; CHECK-NEXT: ret <2 x ptr> [[PGEP]] +; + %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) + %pgep = getelementptr i8, <2 x ptr> %pm0, <2 x i64> + %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %pgep, <2 x i64> ) + ret <2 x ptr> %r +} + +define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail(<2 x ptr> %p) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail +; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { +; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) +; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> +; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> ) +; CHECK-NEXT: ret <2 x ptr> [[R]] +; + %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) + %pgep = getelementptr i8, <2 x ptr> %pm0, <2 x i64> + %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %pgep, <2 x i64> ) + ret <2 x ptr> %r +} + +define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail2(<2 x ptr> %p) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail2 +; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { +; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) +; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> +; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> ) +; CHECK-NEXT: ret <2 x ptr> [[R]] +; + %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) + %pgep = getelementptr i8, <2 x ptr> %pm0, <2 x i64> + %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %pgep, <2 x i64> ) + ret <2 x ptr> %r +} + +define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail3(<2 x ptr> %p) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail3 +; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { +; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) +; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> +; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> ) +; CHECK-NEXT: ret <2 x ptr> [[R]] +; + %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) + %pgep = getelementptr i8, <2 x ptr> %pm0, <2 x i64> + %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %pgep, <2 x i64> ) + ret <2 x ptr> %r +} + +define ptr @ptrmask_maintain_provenance_i64(ptr %p0) { +; CHECK-LABEL: define ptr @ptrmask_maintain_provenance_i64 +; CHECK-SAME: (ptr [[P0:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 0) +; CHECK-NEXT: ret ptr [[R]] +; + %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 0) + ret ptr %r +} + +define ptr addrspace(1) @ptrmask_maintain_provenance_i32(ptr addrspace(1) %p0) { +; CHECK-LABEL: define ptr addrspace(1) @ptrmask_maintain_provenance_i32 +; CHECK-SAME: (ptr addrspace(1) [[P0:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P0]], i32 0) +; CHECK-NEXT: ret ptr addrspace(1) [[R]] +; + %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p0, i32 0) + ret ptr addrspace(1) %r +} + +define <2 x ptr> @ptrmask_maintain_provenance_v2i64(<2 x ptr> %p0) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_maintain_provenance_v2i64 +; CHECK-SAME: (<2 x ptr> [[P0:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> zeroinitializer) +; CHECK-NEXT: ret <2 x ptr> [[R]] +; + %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p0, <2 x i64> zeroinitializer) + ret <2 x ptr> %r +} + +define <2 x ptr addrspace(1) > @ptrmask_maintain_provenance_v2i32(<2 x ptr addrspace(1) > %p0) { +; CHECK-LABEL: define <2 x ptr addrspace(1)> @ptrmask_maintain_provenance_v2i32 +; CHECK-SAME: (<2 x ptr addrspace(1)> [[P0:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P0]], <2 x i32> zeroinitializer) +; CHECK-NEXT: ret <2 x ptr addrspace(1)> [[R]] +; + %r = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p0, <2 x i32> zeroinitializer) + ret <2 x ptr addrspace(1) > %r +} diff --git a/llvm/test/Transforms/LoopFlatten/widen-iv.ll b/llvm/test/Transforms/LoopFlatten/widen-iv.ll index ef5f9d78cd0493..4692fa829bac5c 100644 --- a/llvm/test/Transforms/LoopFlatten/widen-iv.ll +++ b/llvm/test/Transforms/LoopFlatten/widen-iv.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -passes='loop-simplify,loop(loop-flatten),verify' -loop-flatten-widen-iv=true \ +; RUN: opt < %s -S -passes='loop-simplify,loop(loop-flatten),dce,verify' -loop-flatten-widen-iv=true \ ; RUN: -verify-loop-info -verify-dom-info -verify-scev \ ; RUN: -loop-flatten-cost-threshold=6 | \ ; RUN: FileCheck %s --check-prefix=CHECK -; RUN: opt < %s -S -passes='loop-simplify,loop(loop-flatten),verify' -loop-flatten-widen-iv=false \ +; RUN: opt < %s -S -passes='loop-simplify,loop(loop-flatten),dce,verify' -loop-flatten-widen-iv=false \ ; RUN: -verify-loop-info -verify-dom-info -verify-scev | \ ; RUN: FileCheck %s --check-prefix=DONTWIDEN @@ -30,19 +30,12 @@ define void @foo(ptr %A, i32 %N, i32 %M) { ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] ; CHECK: for.cond1.preheader.us: ; CHECK-NEXT: [[INDVAR1:%.*]] = phi i64 [ [[INDVAR_NEXT2:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ] -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVAR1]] to i32 -; CHECK-NEXT: [[MUL_US:%.*]] = mul nsw i32 [[TMP2]], [[M]] ; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR1]] to i32 ; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]] ; CHECK: for.body4.us: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US]] ] -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVAR]] to i32 -; CHECK-NEXT: [[ADD_US:%.*]] = add nsw i32 [[TMP3]], [[MUL_US]] ; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[FLATTEN_TRUNCIV]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @f(ptr [[ARRAYIDX_US]]) -; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[CMP2_US:%.*]] = icmp slt i64 [[INDVAR_NEXT]], [[TMP0]] ; CHECK-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: ; CHECK-NEXT: [[INDVAR_NEXT2]] = add i64 [[INDVAR1]], 1 @@ -140,32 +133,17 @@ define void @foo2_sext(i32* nocapture readonly %A, i32 %N, i32 %M) { ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] ; CHECK: for.cond1.preheader.us.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[M]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[M]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 -; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul i64 [[TMP0]], [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[N]] to i64 +; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul i64 [[TMP0]], [[TMP1]] ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] ; CHECK: for.cond1.preheader.us: ; CHECK-NEXT: [[INDVAR2:%.*]] = phi i64 [ [[INDVAR_NEXT3:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ] ; CHECK-NEXT: [[I_018_US:%.*]] = phi i32 [ [[INC6_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ] -; CHECK-NEXT: [[TMP3:%.*]] = mul nsw i64 [[INDVAR2]], [[TMP1]] -; CHECK-NEXT: [[MUL_US:%.*]] = mul nsw i32 [[I_018_US]], [[M]] -; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[MUL_US]] to i64 -; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR2]] to i32 ; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]] ; CHECK: for.body4.us: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US]] ] -; CHECK-NEXT: [[J_016_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[INDVAR]], [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[J_016_US]] to i64 -; CHECK-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP6]], [[TMP3]] -; CHECK-NEXT: [[ADD_US:%.*]] = add nsw i32 [[J_016_US]], [[MUL_US]] -; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[FLATTEN_TRUNCIV]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVAR2]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4 -; CHECK-NEXT: tail call void @g(i32 [[TMP8]]) -; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[INC_US:%.*]] = add nuw nsw i32 [[J_016_US]], 1 -; CHECK-NEXT: [[CMP2_US:%.*]] = icmp slt i64 [[INDVAR_NEXT]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4 +; CHECK-NEXT: tail call void @g(i32 [[TMP2]]) ; CHECK-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: ; CHECK-NEXT: [[INDVAR_NEXT3]] = add i64 [[INDVAR2]], 1 @@ -300,20 +278,13 @@ define void @foo2_zext(i32* nocapture readonly %A, i32 %N, i32 %M) { ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] ; CHECK: for.cond1.preheader.us: ; CHECK-NEXT: [[INDVAR1:%.*]] = phi i64 [ [[INDVAR_NEXT2:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ] -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVAR1]] to i32 -; CHECK-NEXT: [[MUL_US:%.*]] = mul i32 [[TMP2]], [[M]] ; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR1]] to i32 ; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]] ; CHECK: for.body4.us: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US]] ] -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVAR]] to i32 -; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[TMP3]], [[MUL_US]] ; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[FLATTEN_TRUNCIV]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4 -; CHECK-NEXT: tail call void @g(i32 [[TMP4]]) -; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ult i64 [[INDVAR_NEXT]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4 +; CHECK-NEXT: tail call void @g(i32 [[TMP2]]) ; CHECK-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: ; CHECK-NEXT: [[INDVAR_NEXT2]] = add i64 [[INDVAR1]], 1 @@ -435,21 +406,14 @@ define void @zext(i32 %N, ptr nocapture %A, i16 %val) { ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] ; CHECK: for.cond1.preheader.us: ; CHECK-NEXT: [[INDVAR1:%.*]] = phi i64 [ [[INDVAR_NEXT2:%.*]], [[FOR_COND1_FOR_INC7_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ] -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVAR1]] to i32 -; CHECK-NEXT: [[MUL_US:%.*]] = mul i32 [[TMP2]], [[N]] ; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR1]] to i32 ; CHECK-NEXT: br label [[FOR_BODY3_US:%.*]] ; CHECK: for.body3.us: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US]] ] -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVAR]] to i32 -; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[TMP3]], [[MUL_US]] ; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[FLATTEN_TRUNCIV]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i16, ptr [[A:%.*]], i64 [[IDXPROM_US]] -; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX_US]], align 2 -; CHECK-NEXT: [[ADD5_US:%.*]] = add i16 [[TMP4]], [[VAL:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX_US]], align 2 +; CHECK-NEXT: [[ADD5_US:%.*]] = add i16 [[TMP2]], [[VAL:%.*]] ; CHECK-NEXT: store i16 [[ADD5_US]], ptr [[ARRAYIDX_US]], align 2 -; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ult i64 [[INDVAR_NEXT]], [[TMP0]] ; CHECK-NEXT: br label [[FOR_COND1_FOR_INC7_CRIT_EDGE_US]] ; CHECK: for.cond1.for.inc7_crit_edge.us: ; CHECK-NEXT: [[INDVAR_NEXT2]] = add i64 [[INDVAR1]], 1 @@ -553,18 +517,11 @@ define void @test(i8 %n, i8 %m) { ; CHECK-NEXT: br label [[FOR_COND3_PREHEADER_US:%.*]] ; CHECK: for.cond3.preheader.us: ; CHECK-NEXT: [[INDVAR2:%.*]] = phi i64 [ [[INDVAR_NEXT3:%.*]], [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND3_PREHEADER_US_PREHEADER]] ] -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVAR2]] to i8 -; CHECK-NEXT: [[MUL_US:%.*]] = mul i8 [[TMP2]], [[M]] ; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR2]] to i8 ; CHECK-NEXT: br label [[FOR_BODY9_US:%.*]] ; CHECK: for.body9.us: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND3_PREHEADER_US]] ] -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVAR]] to i8 -; CHECK-NEXT: [[ADD_US:%.*]] = add i8 [[TMP3]], [[MUL_US]] ; CHECK-NEXT: [[CONV14_US:%.*]] = zext i8 [[FLATTEN_TRUNCIV]] to i32 ; CHECK-NEXT: [[CALL_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]]) -; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[CMP6_US:%.*]] = icmp ult i64 [[INDVAR_NEXT]], [[TMP0]] ; CHECK-NEXT: br label [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US]] ; CHECK: for.cond3.for.cond.cleanup8_crit_edge.us: ; CHECK-NEXT: [[INDVAR_NEXT3]] = add i64 [[INDVAR2]], 1 @@ -695,14 +652,9 @@ define void @test3(i8 %n, i8 %m) { ; CHECK-NEXT: br label [[FOR_COND3_PREHEADER_US:%.*]] ; CHECK: for.cond3.preheader.us: ; CHECK-NEXT: [[INDVAR2:%.*]] = phi i64 [ [[INDVAR_NEXT3:%.*]], [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND3_PREHEADER_US_PREHEADER]] ] -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVAR2]] to i8 -; CHECK-NEXT: [[MUL_US:%.*]] = mul i8 [[TMP2]], [[M]] ; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR2]] to i8 ; CHECK-NEXT: br label [[FOR_BODY9_US:%.*]] ; CHECK: for.body9.us: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND3_PREHEADER_US]] ] -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVAR]] to i8 -; CHECK-NEXT: [[ADD_US:%.*]] = add i8 [[TMP3]], [[MUL_US]] ; CHECK-NEXT: [[CONV14_US:%.*]] = zext i8 [[FLATTEN_TRUNCIV]] to i32 ; CHECK-NEXT: [[CALL_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]]) ; CHECK-NEXT: [[CONV15_US:%.*]] = zext i8 [[FLATTEN_TRUNCIV]] to i16 @@ -711,8 +663,6 @@ define void @test3(i8 %n, i8 %m) { ; CHECK-NEXT: [[CALL20_US:%.*]] = tail call i32 @use_16(i16 [[CONV15_US]]) ; CHECK-NEXT: [[CONV21_US:%.*]] = zext i8 [[FLATTEN_TRUNCIV]] to i64 ; CHECK-NEXT: [[CALL22_US:%.*]] = tail call i32 @use_64(i64 [[CONV21_US]]) -; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[CMP6_US:%.*]] = icmp ult i64 [[INDVAR_NEXT]], [[TMP0]] ; CHECK-NEXT: br label [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US]] ; CHECK: for.cond3.for.cond.cleanup8_crit_edge.us: ; CHECK-NEXT: [[INDVAR_NEXT3]] = add i64 [[INDVAR2]], 1 @@ -855,14 +805,9 @@ define void @test4(i16 %n, i16 %m) { ; CHECK-NEXT: br label [[FOR_COND3_PREHEADER_US:%.*]] ; CHECK: for.cond3.preheader.us: ; CHECK-NEXT: [[INDVAR2:%.*]] = phi i64 [ [[INDVAR_NEXT3:%.*]], [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND3_PREHEADER_US_PREHEADER]] ] -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVAR2]] to i16 -; CHECK-NEXT: [[MUL_US:%.*]] = mul i16 [[TMP2]], [[M]] ; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR2]] to i16 ; CHECK-NEXT: br label [[FOR_BODY9_US:%.*]] ; CHECK: for.body9.us: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND3_PREHEADER_US]] ] -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVAR]] to i16 -; CHECK-NEXT: [[ADD_US:%.*]] = add i16 [[TMP3]], [[MUL_US]] ; CHECK-NEXT: [[CONV14_US:%.*]] = sext i16 [[FLATTEN_TRUNCIV]] to i32 ; CHECK-NEXT: [[CALL_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]]) ; CHECK-NEXT: [[CALL15_US:%.*]] = tail call i32 @use_16(i16 [[FLATTEN_TRUNCIV]]) @@ -870,8 +815,6 @@ define void @test4(i16 %n, i16 %m) { ; CHECK-NEXT: [[CALL18_US:%.*]] = tail call i32 @use_16(i16 [[FLATTEN_TRUNCIV]]) ; CHECK-NEXT: [[CONV19_US:%.*]] = sext i16 [[FLATTEN_TRUNCIV]] to i64 ; CHECK-NEXT: [[CALL20_US:%.*]] = tail call i32 @use_64(i64 [[CONV19_US]]) -; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[CMP6_US:%.*]] = icmp slt i64 [[INDVAR_NEXT]], [[TMP0]] ; CHECK-NEXT: br label [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US]] ; CHECK: for.cond3.for.cond.cleanup8_crit_edge.us: ; CHECK-NEXT: [[INDVAR_NEXT3]] = add i64 [[INDVAR2]], 1 @@ -991,10 +934,7 @@ define i32 @constTripCount() { ; CHECK-NEXT: [[INDVAR1:%.*]] = phi i64 [ [[INDVAR_NEXT2:%.*]], [[J_LOOPDONE:%.*]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[J_LOOP:%.*]] ; CHECK: j.loop: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[I_LOOP]] ] ; CHECK-NEXT: call void @payload() -; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[J_ATEND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 20 ; CHECK-NEXT: br label [[J_LOOPDONE]] ; CHECK: j.loopdone: ; CHECK-NEXT: [[INDVAR_NEXT2]] = add i64 [[INDVAR1]], 1 @@ -1063,19 +1003,12 @@ define void @foo_M_sext(ptr %A, i32 %N, i16 %M) { ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] ; CHECK: for.cond1.preheader.us: ; CHECK-NEXT: [[INDVAR1:%.*]] = phi i64 [ [[INDVAR_NEXT2:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ] -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVAR1]] to i32 -; CHECK-NEXT: [[MUL_US:%.*]] = mul nsw i32 [[TMP2]], [[M2]] ; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR1]] to i32 ; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]] ; CHECK: for.body4.us: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US]] ] -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVAR]] to i32 -; CHECK-NEXT: [[ADD_US:%.*]] = add nsw i32 [[TMP3]], [[MUL_US]] ; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[FLATTEN_TRUNCIV]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @f(ptr [[ARRAYIDX_US]]) -; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[CMP2_US:%.*]] = icmp slt i64 [[INDVAR_NEXT]], [[TMP0]] ; CHECK-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: ; CHECK-NEXT: [[INDVAR_NEXT2]] = add i64 [[INDVAR1]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll new file mode 100644 index 00000000000000..438321e0fb0ccb --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll @@ -0,0 +1,133 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; REQUIRES: asserts +; RUN: opt < %s -hoist-runtime-checks -p 'loop-vectorize' -force-vector-interleave=1 -S \ +; RUN: -force-vector-width=4 -debug-only=loop-accesses,loop-vectorize,loop-utils 2> %t | FileCheck %s +; RUN: cat %t | FileCheck %s --check-prefix=DEBUG + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8.1m.main-none-unknown-eabi" + +; Equivalent example in C: +; void diff_checks(int32_t *dst, int32_t *src, int m, int n) { +; for (int i = 0; i < m; i++) { +; for (int j = 0; j < n; j++) { +; dst[(i * (n + 1)) + j] = src[(i * n) + j]; +; } +; } +; } +; NOTE: The strides of the starting address values in the inner loop differ, i.e. +; '(i * (n + 1))' vs '(i * n)'. + +; DEBUG-LABEL: LAA: Found a loop in diff_checks: +; DEBUG: LAA: Not creating diff runtime check, since these cannot be hoisted out of the outer loop +; DEBUG: LAA: Adding RT check for range: +; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting +; DEBUG-NEXT: LAA: ... but need to check stride is positive: (4 + (4 * %n)) +; DEBUG-NEXT: Start: %dst End: ((4 * %n) + ((4 + (4 * %n)) * (-1 + %m)) + %dst) +; DEBUG-NEXT: LAA: Adding RT check for range: +; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting +; DEBUG-NEXT: LAA: ... but need to check stride is positive: (4 * %n) +; DEBUG-NEXT: Start: %src End: ((4 * %m * %n) + %src) + +define void @diff_checks(ptr nocapture noundef writeonly %dst, ptr nocapture noundef readonly %src, i32 noundef %m, i32 noundef %n) #0 { +; CHECK-LABEL: define void @diff_checks +; CHECK-SAME: (ptr nocapture noundef writeonly [[DST:%.*]], ptr nocapture noundef readonly [[SRC:%.*]], i32 noundef [[M:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[N]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[M]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[N]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP0]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[N]], [[M]] +; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP5]], 2 +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP6]] +; CHECK-NEXT: br label [[OUTER_LOOP:%.*]] +; CHECK: outer.loop: +; CHECK-NEXT: [[I_023_US:%.*]] = phi i32 [ [[INC10_US:%.*]], [[INNER_LOOP_EXIT:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[MUL_US:%.*]] = mul nsw i32 [[I_023_US]], [[N]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[SRC]], i32 [[MUL_US]] +; CHECK-NEXT: [[MUL6_US:%.*]] = mul nsw i32 [[I_023_US]], [[ADD5]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[DST]], i32 [[MUL6_US]] +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: [[STRIDE_CHECK:%.*]] = icmp slt i32 [[TMP2]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[FOUND_CONFLICT]], [[STRIDE_CHECK]] +; CHECK-NEXT: [[STRIDE_CHECK2:%.*]] = icmp slt i32 [[TMP1]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[STRIDE_CHECK2]] +; CHECK-NEXT: br i1 [[TMP10]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 3 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP11]], i32 [[N]]) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP7]], i32 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP13]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison), !alias.scope !0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP8]], i32 [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP14]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[WIDE_MASKED_LOAD]], ptr [[TMP15]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope !3, !noalias !0 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label [[INNER_LOOP:%.*]] +; CHECK: inner.loop: +; CHECK-NEXT: [[J_021_US:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC_US:%.*]], [[INNER_LOOP]] ] +; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr i32, ptr [[TMP7]], i32 [[J_021_US]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4 +; CHECK-NEXT: [[ARRAYIDX8_US:%.*]] = getelementptr i32, ptr [[TMP8]], i32 [[J_021_US]] +; CHECK-NEXT: store i32 [[TMP17]], ptr [[ARRAYIDX8_US]], align 4 +; CHECK-NEXT: [[INC_US]] = add nuw nsw i32 [[J_021_US]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_US]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[INNER_LOOP_EXIT]], label [[INNER_LOOP]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: inner.loop.exit: +; CHECK-NEXT: [[INC10_US]] = add nuw nsw i32 [[I_023_US]], 1 +; CHECK-NEXT: [[EXITCOND26_NOT:%.*]] = icmp eq i32 [[INC10_US]], [[M]] +; CHECK-NEXT: br i1 [[EXITCOND26_NOT]], label [[EXIT:%.*]], label [[OUTER_LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %add5 = add nsw i32 %n, 1 + br label %outer.loop + +outer.loop: + %i.023.us = phi i32 [ %inc10.us, %inner.loop.exit ], [ 0, %entry ] + %mul.us = mul nsw i32 %i.023.us, %n + %0 = getelementptr i32, ptr %src, i32 %mul.us + %mul6.us = mul nsw i32 %i.023.us, %add5 + %1 = getelementptr i32, ptr %dst, i32 %mul6.us + br label %inner.loop + +inner.loop: + %j.021.us = phi i32 [ 0, %outer.loop ], [ %inc.us, %inner.loop ] + %arrayidx.us = getelementptr i32, ptr %0, i32 %j.021.us + %2 = load i32, ptr %arrayidx.us, align 4 + %arrayidx8.us = getelementptr i32, ptr %1, i32 %j.021.us + store i32 %2, ptr %arrayidx8.us, align 4 + %inc.us = add nuw nsw i32 %j.021.us, 1 + %exitcond.not = icmp eq i32 %inc.us, %n + br i1 %exitcond.not, label %inner.loop.exit, label %inner.loop + +inner.loop.exit: + %inc10.us = add nuw nsw i32 %i.023.us, 1 + %exitcond26.not = icmp eq i32 %inc10.us, %m + br i1 %exitcond26.not, label %exit, label %outer.loop + +exit: + ret void +} + +attributes #0 = { "target-cpu"="cortex-m55" } diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp index b829df42dc401d..401d04954a669a 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp @@ -212,8 +212,8 @@ TEST_F(AArch64GISelMITest, UnorderedArtifactCombiningManyCopiesTest) { CHECK-NEXT: [[OFFSET_1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 CHECK-NEXT: [[VPTR_1:%[0-9]+]]:_(p0) = G_PTR_ADD %vptr:_, [[OFFSET_1]]:_(s64) CHECK-NEXT: [[LOAD_1:%[0-9]+]]:_(s16) = G_LOAD [[VPTR_1]]:_(p0) :: (load (s8) from unknown-address + 1) - CHECK-NEXT: [[FF_MASK:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 CHECK-NEXT: [[V0_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD_0]]:_(s16) + CHECK-NEXT: [[FF_MASK:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 CHECK-NEXT: %v0_zext:_(s32) = G_AND [[V0_EXT]]:_, [[FF_MASK]]:_ CHECK-NEXT: [[V1_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD_1]]:_(s16) CHECK-NEXT: [[SHAMNT:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp index ad81a2fcd16441..e5ee93ffa3c607 100644 --- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp +++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp @@ -21,7 +21,6 @@ #include "llvm/DebugInfo/GSYM/StringTable.h" #include "llvm/ObjectYAML/DWARFEmitter.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Endian.h" #include "llvm/Testing/Support/Error.h" #include "gtest/gtest.h" diff --git a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp index a94ad0859ebba5..20eed05b14ae8e 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp +++ b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp @@ -9,7 +9,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ExecutionEngine/JITLink/JITLink.h" #include "llvm/ExecutionEngine/Orc/ObjectFileInterface.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/Memory.h" #include "llvm/Testing/Support/Error.h" diff --git a/llvm/unittests/ExecutionEngine/JITLink/StubsTests.cpp b/llvm/unittests/ExecutionEngine/JITLink/StubsTests.cpp index e33aa63b5e4c81..8fbede0378b9f4 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/StubsTests.cpp +++ b/llvm/unittests/ExecutionEngine/JITLink/StubsTests.cpp @@ -13,7 +13,6 @@ #include "llvm/ExecutionEngine/JITLink/loongarch.h" #include "llvm/ExecutionEngine/JITLink/x86_64.h" #include "llvm/ExecutionEngine/Orc/ObjectFileInterface.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/Memory.h" #include "llvm/Testing/Support/Error.h" diff --git a/llvm/unittests/TargetParser/TripleTest.cpp b/llvm/unittests/TargetParser/TripleTest.cpp index 0a43b1a8925d1e..575e2ca381df36 100644 --- a/llvm/unittests/TargetParser/TripleTest.cpp +++ b/llvm/unittests/TargetParser/TripleTest.cpp @@ -1003,6 +1003,18 @@ TEST(TripleTest, ParsedIDs) { EXPECT_EQ(Triple::LiteOS, T.getOS()); EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment()); + T = Triple("x86_64-pc-serenity"); + EXPECT_EQ(Triple::x86_64, T.getArch()); + EXPECT_EQ(Triple::PC, T.getVendor()); + EXPECT_EQ(Triple::Serenity, T.getOS()); + EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment()); + + T = Triple("aarch64-pc-serenity"); + EXPECT_EQ(Triple::aarch64, T.getArch()); + EXPECT_EQ(Triple::PC, T.getVendor()); + EXPECT_EQ(Triple::Serenity, T.getOS()); + EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment()); + T = Triple("huh"); EXPECT_EQ(Triple::UnknownArch, T.getArch()); } diff --git a/mlir/docs/Dialects/ArmSME.md b/mlir/docs/Dialects/ArmSME.md index ab7c9ffe7aa92f..505b52938eacc0 100644 --- a/mlir/docs/Dialects/ArmSME.md +++ b/mlir/docs/Dialects/ArmSME.md @@ -1,5 +1,7 @@ # 'ArmSME' Dialect +[TOC] + Basic dialect to target Arm SME architectures This dialect contains the definitions necessary to target Arm SME scalable matrix operations. diff --git a/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h b/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h index ef8215d332c463..e97675f37e10c2 100644 --- a/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h +++ b/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h @@ -120,8 +120,6 @@ class MemRefDescriptor : public StructBuilder { static unsigned getNumUnpackedValues(MemRefType type); private: - bool useOpaquePointers(); - // Cached index type. Type indexType; }; diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index d7253212e5fe3f..00e342f8f30eb3 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -391,6 +391,10 @@ def ConvertFuncToLLVMPass : Pass<"convert-func-to-llvm", "ModuleOp"> { 1 value is returned, packed into an LLVM IR struct type. Function calls and returns are updated accordingly. Block argument types are updated to use LLVM IR types. + + Note that until https://github.com/llvm/llvm-project/issues/70982 is resolved, + this pass includes patterns that lower `arith` and `cf` to LLVM. This is legacy + code due to when they were all converted in the same pass. }]; let dependentDialects = ["LLVM::LLVMDialect"]; let options = [ @@ -742,10 +746,7 @@ def FinalizeMemRefToLLVMConversionPass : "bool", /*default=*/"false", "Use generic allocation and deallocation functions instead of the " - "classic 'malloc', 'aligned_alloc' and 'free' functions">, - Option<"useOpaquePointers", "use-opaque-pointers", "bool", - /*default=*/"true", "Generate LLVM IR using opaque pointers " - "instead of typed pointers"> + "classic 'malloc', 'aligned_alloc' and 'free' functions"> ]; } @@ -805,11 +806,6 @@ def ConvertNVGPUToNVVMPass : Pass<"convert-nvgpu-to-nvvm"> { let dependentDialects = [ "NVVM::NVVMDialect", ]; - let options = [ - Option<"useOpaquePointers", "use-opaque-pointers", "bool", - /*default=*/"true", "Generate LLVM IR using opaque pointers " - "instead of typed pointers"> - ]; } @@ -887,12 +883,6 @@ def ConvertSCFToOpenMPPass : Pass<"convert-scf-to-openmp", "ModuleOp"> { let summary = "Convert SCF parallel loop to OpenMP parallel + workshare " "constructs."; - let options = [ - Option<"useOpaquePointers", "use-opaque-pointers", "bool", - /*default=*/"true", "Generate LLVM IR using opaque pointers " - "instead of typed pointers"> - ]; - let dependentDialects = ["omp::OpenMPDialect", "LLVM::LLVMDialect", "memref::MemRefDialect"]; } diff --git a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSME.td b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSME.td index 18b9bd7a107feb..ffafb2569310ed 100644 --- a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSME.td +++ b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSME.td @@ -42,11 +42,45 @@ def ArmSME_Dialect : Dialect { // ArmSME type definitions //===----------------------------------------------------------------------===// +// FIXME: This allows types that are not SVE vectors, e.g. vector<[16]xi128>. def SVEVector : ScalableVectorOfRankAndLengthAndType< - [1], [16, 8, 4, 2, 1], [I8, I16, I32, I64, I128, F16, BF16, F32, F64]>; + [1], [16, 8, 4, 2, 1], [I8, I16, I32, I64, I128, F16, BF16, F32, F64]> +{ + let summary = "a vector type that matches the size of a SVE vector"; + let description = [{ + Possible vector types: + + Integer elements: + + * `vector<[16]xi8>` + * `vector<[8]xi16>` + * `vector<[4]xi32>` + * `vector<[2]xi64>` + * `vector<[1]xi128>` + + Floating point elements: + + * `vector<[8]xf16>` + * `vector<[8]xbf16>` + * `vector<[4]xf32>` + * `vector<[2]xf64>` + }]; +} def SVEPredicate : ScalableVectorOfRankAndLengthAndType< - [1], [16, 8, 4, 2, 1], [I1]>; + [1], [16, 8, 4, 2, 1], [I1]> +{ + let summary = "a vector type that matches the size of a SVE predicate"; + let description = [{ + Possible vector types: + + * `vector<[16]xi1>` + * `vector<[8]xi1>` + * `vector<[4]xi1>` + * `vector<[2]xi1>` + * `vector<[1]xi1>` + }]; +} #endif // ARMSME diff --git a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td index 8a34472d3b3a22..ba33a2826e6ca4 100644 --- a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td +++ b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td @@ -43,7 +43,47 @@ def nxnxv4f32 : SMETileType">; def nxnxv2f64 : SMETileType">; def SMETile : AnyTypeOf<[nxnxv16i8, nxnxv8i16, nxnxv4i32, nxnxv2i64, nxnxv1i128, - nxnxv8f16, nxnxv8bf16, nxnxv4f32, nxnxv2f64]>; + nxnxv8f16, nxnxv8bf16, nxnxv4f32, nxnxv2f64], + "a vector type that fits into a SME tile"> +{ + let description = [{ + Possible vector types: + + Integer elements: + + * `vector<[16]x[16]xi8>` + * `vector<[8]x[8]xi16>` + * `vector<[4]x[4]xi32>` + * `vector<[2]x[2]xi64>` + * `vector<[1]x[1]xi128>` + + Floating point elements: + + * `vector<[8]x[8]xf16>` + * `vector<[8]x[8]xbf16>` + * `vector<[4]x[4]xf32>` + * `vector<[2]x[2]xf64>` + }]; +} + +def TileID : AnyTypeOf<[I8, I16, I32, I64, I128], + "an identifier of a virtual tile (of a size) within the ZA storage"> +{ + let description = [{ + The tile ID is an 8, 16, 32, 64, or 128-bit signless integer. The value of + the integer indicates the tile to use, and the bit size indicates the size + of tile. The number of tiles available and the element types of those depend + on the size. This is summarised below: + + | Tile ID Type | Possible Tile IDs | Tile Vector Types | + |--------------|---------------------|-------------------------------------------------------------------------| + | `i8` | 0 | `vector<[16]x[16]xi8>` | + | `i16` | 0 and 1 | `vector<[8]x[8]xi16>`, `vector<[8]x[8]xf16>`, or `vector<[8]x[8]xbf16>` | + | `i32` | 0 to 3 (inclusive) | `vector<[4]x[4]xi32>` or `vector<[4]x[4]xf32>` | + | `i64` | 0 to 7 (inclusive) | `vector<[2]x[2]xi64>` or `vector<[2]x[2]xf64>` | + | `i128` | 0 to 15 (inclusive) | `vector<[1]x[1]xi128>` | + }]; +} // A type constraint that verifies the bitwidth of the scalar integer returned // from 'arm_sme.get_tile_id' matches the element bitwidth of a "virtual tile". @@ -66,6 +106,15 @@ class HasMatchingMaskTypeConstraint : vector, mask, "::llvm::cast($_self).cloneWith({}, IntegerType::get($_ctxt, 1))">; +class TileSliceMaskConstraint : + TypesMatchWith< + "`" # mask # "` has i1 element type and the shape is a slice of `" # tile # "`", + tile, mask, + "VectorType(" + "VectorType::Builder(" + "::llvm::cast($_self)" + ").dropDim(0).setElementType(IntegerType::get($_self.getContext(), 1)))">; + //===----------------------------------------------------------------------===// // ArmSME attr definitions //===----------------------------------------------------------------------===// @@ -151,7 +200,7 @@ def CastTileToVector : ArmSME_Op<"cast_tile_to_vector", [Pure, TileElementWidthM Canonicalization will look through `arm_sme.cast_tile_to_vector` and fold the cast away if it comes from a `arm_sme.cast_vector_to_tile`. }]; - let arguments = (ins AnyTypeOf<[I8, I16, I32, I64, I128]>:$tile_id); + let arguments = (ins TileID:$tile_id); let results = (outs SMETile:$vector); let assemblyFormat = "$tile_id attr-dict `:` type($tile_id) `to` type($vector)"; @@ -187,7 +236,7 @@ def CastVectorToTile : ArmSME_Op<"cast_vector_to_tile", [Pure, TileElementWidthM the cast away if it comes from a `arm_sme.cast_tile_to_vector`. }]; let arguments = (ins SMETile:$vector); - let results = (outs AnyTypeOf<[I8, I16, I32, I64, I128]>:$tile_id); + let results = (outs TileID:$tile_id); let assemblyFormat = "$vector attr-dict `:` type($vector) `to` type($tile_id)"; let hasCanonicalizeMethod = 1; @@ -223,7 +272,7 @@ def GetTileID : ArmSME_Op<"get_tile_id"> { ``` }]; - let results = (outs AnyTypeOf<[I8, I16, I32, I64, I128]>:$tile_id); + let results = (outs TileID:$tile_id); let assemblyFormat = "attr-dict `:` type($tile_id)"; } @@ -408,15 +457,7 @@ def TileStoreOp : ArmSME_Op<"tile_store", [ } def LoadTileSliceOp : ArmSME_Op<"load_tile_slice", [ - AllTypesMatch<["tile", "result"]>, - TypesMatchWith< - "mask has i1 element type and is a slice of the result", - "result", "mask", - "VectorType(" - "VectorType::Builder(" - "::llvm::cast($_self)" - ").dropDim(0).setElementType(IntegerType::get($_self.getContext(), 1))" - ")">, + AllTypesMatch<["tile", "result"]>, TileSliceMaskConstraint<"result", "mask"> ]> { let summary = "Tile slice load and update operation"; let description = [{ @@ -432,9 +473,8 @@ def LoadTileSliceOp : ArmSME_Op<"load_tile_slice", [ dimensions since the operation is scalable, and the element type must be a scalar that matches the element type of the result. - An SSA value `mask` specifies to mask out elements read from the MemRef. - The `mask` type is an `i1` vector with a shape that matches how elements - are read from the MemRef. + The provided `mask` is used to specify which elements of the tile slice + will be loaded. Example 1: Load a vector<[16]xi8> tile slice from memory into tile horizontally (default) at given index. ```mlir @@ -474,7 +514,9 @@ def LoadTileSliceOp : ArmSME_Op<"load_tile_slice", [ }]; } -def StoreTileSliceOp : ArmSME_Op<"store_tile_slice"> { +def StoreTileSliceOp : ArmSME_Op<"store_tile_slice", [ + TileSliceMaskConstraint<"tile", "mask"> +]> { let summary = "Tile slice store operation"; let description = [{ Stores a 1D tile slice from a 2D SME "virtual tile" into memory. The tile @@ -489,22 +531,26 @@ def StoreTileSliceOp : ArmSME_Op<"store_tile_slice"> { dimensions since the operation is scalable, and the element type must be a scalar that matches the element type of the input tile. + The provided `mask` is used to specify which elements of the tile slice + will be stored. + Example 1: Store vector<[16]xi8> horizontal (default) tile slice from tile at given index to memory. ```mlir - arm_sme.store_tile_slice %tile, %tile_slice_index, %base[%c0] : vector<[16]x[16]xi8>, memref + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %base[%c0] : vector<[16]x[16]xi8>, vector<[16]xi1>, memref ``` Example 2: Store vector<[4]xf32> vertical tile slice from tile at given index to memory. ```mlir - arm_sme.store_tile_slice %tile, %tile_slice_index, %base[%c0] layout : vector<[4]x[4]xf32>, memref + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %base[%c0] layout : vector<[4]x[4]xf32>, vector<[4]xi1>, memref ``` Example 3: Store a vector<[1]xi128> vertical tile slice from tile at given index to memory. ```mlir - arm_sme.store_tile_slice %tile, %tile_slice_index, %base[%c0] layout : vector<[1]x[1]xi128>, memref + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %base[%c0] layout : vector<[1]x[1]xi128>, vector<[1]xi1>, memref ``` }]; - let arguments = (ins SMETile:$tile, Index:$tile_slice_index, + let arguments = (ins + SMETile:$tile, Index:$tile_slice_index, SVEPredicate:$mask, Arg:$base, Variadic:$indices, ArmSME_TileSliceLayoutAttr:$layout ); @@ -518,8 +564,8 @@ def StoreTileSliceOp : ArmSME_Op<"store_tile_slice"> { }]; let assemblyFormat = [{ - $tile `,` $tile_slice_index `,` $base `[` $indices `]` (`layout` `` $layout^)? - attr-dict `:` type($base) `,` type($tile) + $tile `,` $tile_slice_index `,` $mask `,` $base `[` $indices `]` (`layout` `` $layout^)? + attr-dict `:` type($base) `,` type($mask) `,` type($tile) }]; } diff --git a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h index 05320c0c718690..7289c3ac6ff7e1 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h +++ b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h @@ -61,7 +61,7 @@ LLVM::LLVMFuncOp lookupOrCreateGenericAlignedAllocFn(ModuleOp moduleOp, Type indexType, bool opaquePointers); LLVM::LLVMFuncOp lookupOrCreateGenericFreeFn(ModuleOp moduleOp, - bool opaquePointers); + bool opaquePointers = true); LLVM::LLVMFuncOp lookupOrCreateMemRefCopyFn(ModuleOp moduleOp, Type indexType, Type unrankedDescriptorType); diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td index 3093604af63e33..cca50e21d5ce0b 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -69,6 +69,13 @@ def LinalgLowerToAffineLoops : Pass<"convert-linalg-to-affine-loops"> { def LinalgLowerToLoops : Pass<"convert-linalg-to-loops"> { let summary = "Lower the operations from the linalg dialect into loops"; + let description = [{ + Lowers the `linalg` ops to loop nests using `scf.for`. + + Pre-condition: the operands used by the `linalg` ops have buffer semantics, + i.e., tensor operands and results must be converted to memrefs via + bufferization. + }]; let constructor = "mlir::createConvertLinalgToLoopsPass()"; let dependentDialects = [ "linalg::LinalgDialect", diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 122f7356285210..abd996bdbaf852 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -12,6 +12,7 @@ #include #include "mlir/Conversion/VectorToSCF/VectorToSCF.h" +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/SCF/Utils/Utils.h" @@ -28,6 +29,7 @@ namespace mlir { namespace bufferization { +class AllocTensorOp; class OneShotAnalysisState; } // namespace bufferization @@ -110,6 +112,18 @@ Value bufferizeToAllocation(RewriterBase &rewriter, vector::MaskOp maskOp, Attribute memorySpace = {}, Operation *insertionPoint = nullptr); +/// Materialize a buffer allocation for the given bufferization.alloc_tensor op +/// and lower the op to memref.alloc + memref.tensor_store. +/// +/// In addition to rewriting the IR, this function returns the newly allocated +/// buffer. The `insertionPoint` parameter can be used to specify a custom +/// insertion point for the buffer allocation. +Value bufferizeToAllocation(RewriterBase &rewriter, + const BufferizeToAllocationOptions &options, + bufferization::AllocTensorOp allocTensorOp, + Attribute memorySpace = {}, + Operation *insertionPoint = nullptr); + /// Bufferize the given op with tensor semantics and materialize the result in /// a newly allocated buffer. /// diff --git a/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td b/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td index d7ee492b9e990e..651ee05ae1f3cf 100644 --- a/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td @@ -196,6 +196,14 @@ def ExpandStridedMetadata : Pass<"expand-strided-metadata"> { In particular, this pass transforms operations into explicit sequence of operations that model the effect of this operation on the different metadata. This pass uses affine constructs to materialize these effects. + + Supported ops include: + + - `memref.collapse_shape` + - `memref.expand_shape` + - `memref.extract_aligned_pointer_as_index` + - `memref.extract_strided_metadata` + - `memref.subview` }]; let constructor = "mlir::memref::createExpandStridedMetadataPass()"; let dependentDialects = [ diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h index bc4680656d4cf6..4dc94782c1c9b5 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h @@ -48,12 +48,16 @@ mlir::acc::CacheOp #define ACC_COMPUTE_CONSTRUCT_OPS \ mlir::acc::ParallelOp, mlir::acc::KernelsOp, mlir::acc::SerialOp +#define ACC_COMPUTE_CONSTRUCT_AND_LOOP_OPS \ + ACC_COMPUTE_CONSTRUCT_OPS, mlir::acc::LoopOp #define ACC_DATA_CONSTRUCT_OPS \ mlir::acc::DataOp, mlir::acc::EnterDataOp, mlir::acc::ExitDataOp, \ mlir::acc::UpdateOp, mlir::acc::HostDataOp, mlir::acc::DeclareEnterOp, \ mlir::acc::DeclareExitOp, mlir::acc::DeclareOp #define ACC_COMPUTE_AND_DATA_CONSTRUCT_OPS \ ACC_COMPUTE_CONSTRUCT_OPS, ACC_DATA_CONSTRUCT_OPS +#define ACC_COMPUTE_LOOP_AND_DATA_CONSTRUCT_OPS \ + ACC_COMPUTE_CONSTRUCT_AND_LOOP_OPS, ACC_DATA_CONSTRUCT_OPS namespace mlir { namespace acc { diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 3c5173fdda7f66..17cb9d6f1c2cb6 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -1196,11 +1196,12 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", UnitAttr:$hasWorker, UnitAttr:$hasVector, Variadic:$tileOperands, + Variadic:$cacheOperands, Variadic:$privateOperands, OptionalAttr:$privatizations, Variadic:$reductionOperands, - OptionalAttr:$reductionRecipes, - Variadic:$cacheOperands); + OptionalAttr:$reductionRecipes + ); let results = (outs Variadic:$results); @@ -1211,6 +1212,12 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", static StringRef getGangNumKeyword() { return "num"; } static StringRef getGangDimKeyword() { return "dim"; } static StringRef getGangStaticKeyword() { return "static"; } + + /// The number of private and reduction operands. + unsigned getNumDataOperands(); + + /// The i-th data operand passed. + Value getDataOperand(unsigned i); }]; let hasCustomAssemblyFormat = 1; diff --git a/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp b/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp index 50cc818f1ffc09..80da6ffda1ed2e 100644 --- a/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp +++ b/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp @@ -190,6 +190,12 @@ struct TileStoreOpConversion : public OpRewritePattern { rewriter.setInsertionPointToStart(forOp.getBody()); + // Create an 'all true' predicate for the tile slice. + auto predicateType = + VectorType::get(tileType.getDimSize(1), rewriter.getI1Type(), true); + auto allTruePredicate = rewriter.create( + loc, DenseElementsAttr::get(predicateType, true)); + SmallVector memrefIndices; auto tileSliceIndex = forOp.getInductionVar(); getMemrefIndices(tileStoreOp.getIndices(), @@ -197,7 +203,8 @@ struct TileStoreOpConversion : public OpRewritePattern { numTileSlices, memrefIndices, loc, rewriter); rewriter.replaceOpWithNewOp( tileStoreOp, tileStoreOp.getValueToStore(), tileSliceIndex, - tileStoreOp.getBase(), memrefIndices, tileStoreOp.getLayout()); + allTruePredicate, tileStoreOp.getBase(), memrefIndices, + tileStoreOp.getLayout()); return success(); } diff --git a/mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.cpp b/mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.cpp index 6787a5ccd3a4a8..5dba79016120b3 100644 --- a/mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.cpp +++ b/mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.cpp @@ -18,26 +18,48 @@ #include "mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h" #include "mlir/Dialect/SPIRV/Utils/LayoutUtils.h" #include "mlir/IR/AffineMap.h" +#include "mlir/IR/PatternMatch.h" #include "mlir/Support/LogicalResult.h" +#include "mlir/Transforms/DialectConversion.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/FormatVariadic.h" #define DEBUG_TYPE "cf-to-spirv-pattern" using namespace mlir; +/// Checks that the target block arguments are legal. +static LogicalResult checkBlockArguments(Block &block, Operation *op, + PatternRewriter &rewriter, + const TypeConverter &converter) { + for (BlockArgument arg : block.getArguments()) { + if (!converter.isLegal(arg.getType())) { + return rewriter.notifyMatchFailure( + op, + llvm::formatv( + "failed to match, destination argument not legalized (found {0})", + arg)); + } + } + return success(); +} + //===----------------------------------------------------------------------===// // Operation conversion //===----------------------------------------------------------------------===// namespace { - /// Converts cf.br to spirv.Branch. -struct BranchOpPattern final : public OpConversionPattern { - using OpConversionPattern::OpConversionPattern; +struct BranchOpPattern final : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(cf::BranchOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { + if (failed(checkBlockArguments(*op.getDest(), op, rewriter, + *getTypeConverter()))) + return failure(); + rewriter.replaceOpWithNewOp(op, op.getDest(), adaptor.getDestOperands()); return success(); @@ -45,16 +67,24 @@ struct BranchOpPattern final : public OpConversionPattern { }; /// Converts cf.cond_br to spirv.BranchConditional. -struct CondBranchOpPattern final - : public OpConversionPattern { - using OpConversionPattern::OpConversionPattern; +struct CondBranchOpPattern final : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(cf::CondBranchOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { + if (failed(checkBlockArguments(*op.getTrueDest(), op, rewriter, + *getTypeConverter()))) + return failure(); + + if (failed(checkBlockArguments(*op.getFalseDest(), op, rewriter, + *getTypeConverter()))) + return failure(); + rewriter.replaceOpWithNewOp( - op, op.getCondition(), op.getTrueDest(), adaptor.getTrueDestOperands(), - op.getFalseDest(), adaptor.getFalseDestOperands()); + op, adaptor.getCondition(), op.getTrueDest(), + adaptor.getTrueDestOperands(), op.getFalseDest(), + adaptor.getFalseDestOperands()); return success(); } }; diff --git a/mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRVPass.cpp b/mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRVPass.cpp index d8aecae257b461..a752b82eac7c34 100644 --- a/mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRVPass.cpp +++ b/mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRVPass.cpp @@ -25,7 +25,7 @@ using namespace mlir; namespace { /// A pass converting MLIR ControlFlow operations into the SPIR-V dialect. -class ConvertControlFlowToSPIRVPass +class ConvertControlFlowToSPIRVPass final : public impl::ConvertControlFlowToSPIRVBase< ConvertControlFlowToSPIRVPass> { void runOnOperation() override; @@ -44,6 +44,8 @@ void ConvertControlFlowToSPIRVPass::runOnOperation() { options.emulateLT32BitScalarTypes = this->emulateLT32BitScalarTypes; SPIRVTypeConverter typeConverter(targetAttr, options); + // TODO: We should also take care of block argument type conversion. + RewritePatternSet patterns(context); cf::populateControlFlowToSPIRVPatterns(typeConverter, patterns); diff --git a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp index 3126d1dee32cbc..84e145c98e971e 100644 --- a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp +++ b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp @@ -804,7 +804,8 @@ struct ConvertFuncToLLVMPass RewritePatternSet patterns(&getContext()); populateFuncToLLVMConversionPatterns(typeConverter, patterns, symbolTable); - // TODO: Remove these in favor of their dedicated conversion passes. + // TODO(https://github.com/llvm/llvm-project/issues/70982): Remove these in + // favor of their dedicated conversion passes. arith::populateArithToLLVMConversionPatterns(typeConverter, patterns); cf::populateControlFlowToLLVMConversionPatterns(typeConverter, patterns); diff --git a/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp b/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp index 0a3c9a57eec95d..023fd6244ce1af 100644 --- a/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp +++ b/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp @@ -53,8 +53,7 @@ MemRefDescriptor MemRefDescriptor::fromStaticShape( // Extract all strides and offsets and verify they are static. auto [strides, offset] = getStridesAndOffset(type); - assert(!ShapedType::isDynamic(offset) && - "expected static offset"); + assert(!ShapedType::isDynamic(offset) && "expected static offset"); assert(!llvm::any_of(strides, ShapedType::isDynamic) && "expected static strides"); @@ -134,27 +133,19 @@ Value MemRefDescriptor::size(OpBuilder &builder, Location loc, Value pos, int64_t rank) { auto arrayTy = LLVM::LLVMArrayType::get(indexType, rank); - LLVM::LLVMPointerType indexPtrTy; - LLVM::LLVMPointerType arrayPtrTy; - - if (useOpaquePointers()) { - arrayPtrTy = indexPtrTy = LLVM::LLVMPointerType::get(builder.getContext()); - } else { - indexPtrTy = LLVM::LLVMPointerType::get(indexType); - arrayPtrTy = LLVM::LLVMPointerType::get(arrayTy); - } + auto ptrTy = LLVM::LLVMPointerType::get(builder.getContext()); // Copy size values to stack-allocated memory. auto one = createIndexAttrConstant(builder, loc, indexType, 1); auto sizes = builder.create( loc, value, llvm::ArrayRef({kSizePosInMemRefDescriptor})); - auto sizesPtr = builder.create(loc, arrayPtrTy, arrayTy, one, + auto sizesPtr = builder.create(loc, ptrTy, arrayTy, one, /*alignment=*/0); builder.create(loc, sizes, sizesPtr); // Load an return size value of interest. - auto resultPtr = builder.create( - loc, indexPtrTy, arrayTy, sizesPtr, ArrayRef{0, pos}); + auto resultPtr = builder.create(loc, ptrTy, arrayTy, sizesPtr, + ArrayRef{0, pos}); return builder.create(loc, indexType, resultPtr); } @@ -273,10 +264,6 @@ unsigned MemRefDescriptor::getNumUnpackedValues(MemRefType type) { return 3 + 2 * type.getRank(); } -bool MemRefDescriptor::useOpaquePointers() { - return getElementPtrType().isOpaque(); -} - //===----------------------------------------------------------------------===// // MemRefDescriptorView implementation. //===----------------------------------------------------------------------===// @@ -413,44 +400,20 @@ void UnrankedMemRefDescriptor::computeSizes( Value UnrankedMemRefDescriptor::allocatedPtr( OpBuilder &builder, Location loc, Value memRefDescPtr, LLVM::LLVMPointerType elemPtrType) { - - Value elementPtrPtr; - if (elemPtrType.isOpaque()) - elementPtrPtr = memRefDescPtr; - else - elementPtrPtr = builder.create( - loc, LLVM::LLVMPointerType::get(elemPtrType), memRefDescPtr); - - return builder.create(loc, elemPtrType, elementPtrPtr); + return builder.create(loc, elemPtrType, memRefDescPtr); } void UnrankedMemRefDescriptor::setAllocatedPtr( OpBuilder &builder, Location loc, Value memRefDescPtr, LLVM::LLVMPointerType elemPtrType, Value allocatedPtr) { - Value elementPtrPtr; - if (elemPtrType.isOpaque()) - elementPtrPtr = memRefDescPtr; - else - elementPtrPtr = builder.create( - loc, LLVM::LLVMPointerType::get(elemPtrType), memRefDescPtr); - - builder.create(loc, allocatedPtr, elementPtrPtr); + builder.create(loc, allocatedPtr, memRefDescPtr); } static std::pair castToElemPtrPtr(OpBuilder &builder, Location loc, Value memRefDescPtr, LLVM::LLVMPointerType elemPtrType) { - Value elementPtrPtr; - Type elemPtrPtrType; - if (elemPtrType.isOpaque()) { - elementPtrPtr = memRefDescPtr; - elemPtrPtrType = LLVM::LLVMPointerType::get(builder.getContext()); - } else { - elemPtrPtrType = LLVM::LLVMPointerType::get(elemPtrType); - elementPtrPtr = - builder.create(loc, elemPtrPtrType, memRefDescPtr); - } - return {elementPtrPtr, elemPtrPtrType}; + auto elemPtrPtrType = LLVM::LLVMPointerType::get(builder.getContext()); + return {memRefDescPtr, elemPtrPtrType}; } Value UnrankedMemRefDescriptor::alignedPtr( @@ -483,16 +446,8 @@ Value UnrankedMemRefDescriptor::offsetBasePtr( auto [elementPtrPtr, elemPtrPtrType] = castToElemPtrPtr(builder, loc, memRefDescPtr, elemPtrType); - Value offsetGep = - builder.create(loc, elemPtrPtrType, elemPtrType, - elementPtrPtr, ArrayRef{2}); - - if (!elemPtrType.isOpaque()) { - offsetGep = builder.create( - loc, LLVM::LLVMPointerType::get(typeConverter.getIndexType()), - offsetGep); - } - return offsetGep; + return builder.create(loc, elemPtrPtrType, elemPtrType, + elementPtrPtr, ArrayRef{2}); } Value UnrankedMemRefDescriptor::offset(OpBuilder &builder, Location loc, @@ -521,19 +476,8 @@ Value UnrankedMemRefDescriptor::sizeBasePtr( Type indexTy = typeConverter.getIndexType(); Type structTy = LLVM::LLVMStructType::getLiteral( indexTy.getContext(), {elemPtrType, elemPtrType, indexTy, indexTy}); - Value structPtr; - if (elemPtrType.isOpaque()) { - structPtr = memRefDescPtr; - } else { - Type structPtrTy = LLVM::LLVMPointerType::get(structTy); - structPtr = - builder.create(loc, structPtrTy, memRefDescPtr); - } - - auto resultType = elemPtrType.isOpaque() - ? LLVM::LLVMPointerType::get(indexTy.getContext()) - : LLVM::LLVMPointerType::get(indexTy); - return builder.create(loc, resultType, structTy, structPtr, + auto resultType = LLVM::LLVMPointerType::get(builder.getContext()); + return builder.create(loc, resultType, structTy, memRefDescPtr, ArrayRef{0, 3}); } diff --git a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp index e9efcedec0e14e..91b1210efec23e 100644 --- a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp +++ b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp @@ -46,10 +46,9 @@ LLVM::LLVMFuncOp getFreeFn(const LLVMTypeConverter *typeConverter, bool useGenericFn = typeConverter->getOptions().useGenericFunctions; if (useGenericFn) - return LLVM::lookupOrCreateGenericFreeFn( - module, typeConverter->useOpaquePointers()); + return LLVM::lookupOrCreateGenericFreeFn(module); - return LLVM::lookupOrCreateFreeFn(module, typeConverter->useOpaquePointers()); + return LLVM::lookupOrCreateFreeFn(module); } struct AllocOpLowering : public AllocLikeOpLLVMLowering { @@ -108,7 +107,7 @@ struct AllocaOpLowering : public AllocLikeOpLLVMLowering { unsigned addrSpace = *getTypeConverter()->getMemRefAddressSpace(allocaOp.getType()); auto elementPtrType = - getTypeConverter()->getPointerType(elementType, addrSpace); + LLVM::LLVMPointerType::get(rewriter.getContext(), addrSpace); auto allocatedElementPtr = rewriter.create(loc, elementPtrType, elementType, size, @@ -232,10 +231,8 @@ struct DeallocOpLowering : public ConvertOpToLLVMPattern { Value allocatedPtr; if (auto unrankedTy = llvm::dyn_cast(op.getMemref().getType())) { - Type elementType = unrankedTy.getElementType(); - Type llvmElementTy = getTypeConverter()->convertType(elementType); - LLVM::LLVMPointerType elementPtrTy = getTypeConverter()->getPointerType( - llvmElementTy, unrankedTy.getMemorySpaceAsInt()); + auto elementPtrTy = LLVM::LLVMPointerType::get( + rewriter.getContext(), unrankedTy.getMemorySpaceAsInt()); allocatedPtr = UnrankedMemRefDescriptor::allocatedPtr( rewriter, op.getLoc(), UnrankedMemRefDescriptor(adaptor.getMemref()) @@ -245,10 +242,6 @@ struct DeallocOpLowering : public ConvertOpToLLVMPattern { allocatedPtr = MemRefDescriptor(adaptor.getMemref()) .allocatedPtr(rewriter, op.getLoc()); } - if (!getTypeConverter()->useOpaquePointers()) - allocatedPtr = rewriter.create( - op.getLoc(), getVoidPtrType(), allocatedPtr); - rewriter.replaceOpWithNewOp(op, freeFunc, allocatedPtr); return success(); } @@ -306,19 +299,12 @@ struct DimOpLowering : public ConvertOpToLLVMPattern { Value underlyingRankedDesc = unrankedDesc.memRefDescPtr(rewriter, loc); Type elementType = typeConverter->convertType(scalarMemRefType); - Value scalarMemRefDescPtr; - if (getTypeConverter()->useOpaquePointers()) - scalarMemRefDescPtr = underlyingRankedDesc; - else - scalarMemRefDescPtr = rewriter.create( - loc, LLVM::LLVMPointerType::get(elementType, addressSpace), - underlyingRankedDesc); // Get pointer to offset field of memref descriptor. - Type indexPtrTy = getTypeConverter()->getPointerType( - getTypeConverter()->getIndexType(), addressSpace); + auto indexPtrTy = + LLVM::LLVMPointerType::get(rewriter.getContext(), addressSpace); Value offsetPtr = rewriter.create( - loc, indexPtrTy, elementType, scalarMemRefDescPtr, + loc, indexPtrTy, elementType, underlyingRankedDesc, ArrayRef{0, 2}); // The size value that we have to extract can be obtained using GEPop with @@ -569,18 +555,14 @@ struct GetGlobalMemrefOpLowering : public AllocLikeOpLLVMLowering { unsigned memSpace = *maybeAddressSpace; Type arrayTy = convertGlobalMemrefTypeToLLVM(type, *getTypeConverter()); - Type resTy = getTypeConverter()->getPointerType(arrayTy, memSpace); + auto ptrTy = LLVM::LLVMPointerType::get(rewriter.getContext(), memSpace); auto addressOf = - rewriter.create(loc, resTy, getGlobalOp.getName()); + rewriter.create(loc, ptrTy, getGlobalOp.getName()); // Get the address of the first element in the array by creating a GEP with // the address of the GV as the base, and (rank + 1) number of 0 indices. - Type elementType = typeConverter->convertType(type.getElementType()); - Type elementPtrType = - getTypeConverter()->getPointerType(elementType, memSpace); - auto gep = rewriter.create( - loc, elementPtrType, arrayTy, addressOf, + loc, ptrTy, arrayTy, addressOf, SmallVector(type.getRank() + 1, 0)); // We do not expect the memref obtained using `memref.get_global` to be @@ -590,7 +572,7 @@ struct GetGlobalMemrefOpLowering : public AllocLikeOpLLVMLowering { Value deadBeefConst = createIndexAttrConstant(rewriter, op->getLoc(), intPtrType, 0xdeadbeef); auto deadBeefPtr = - rewriter.create(loc, elementPtrType, deadBeefConst); + rewriter.create(loc, ptrTy, deadBeefConst); // Both allocated and aligned pointers are same. We could potentially stash // a nullptr for the allocated pointer since we do not expect any dealloc. @@ -734,13 +716,6 @@ struct MemRefCastOpLowering : public ConvertOpToLLVMPattern { auto ptr = getTypeConverter()->promoteOneMemRefDescriptor( loc, adaptor.getSource(), rewriter); - // voidptr = BitCastOp srcType* to void* - Value voidPtr; - if (getTypeConverter()->useOpaquePointers()) - voidPtr = ptr; - else - voidPtr = rewriter.create(loc, getVoidPtrType(), ptr); - // rank = ConstantOp srcRank auto rankVal = rewriter.create( loc, getIndexType(), rewriter.getIndexAttr(rank)); @@ -749,8 +724,8 @@ struct MemRefCastOpLowering : public ConvertOpToLLVMPattern { UnrankedMemRefDescriptor::undef(rewriter, loc, targetStructType); // d1 = InsertValueOp undef, rank, 0 memRefDesc.setRank(rewriter, loc, rankVal); - // d2 = InsertValueOp d1, voidptr, 1 - memRefDesc.setMemRefDescPtr(rewriter, loc, voidPtr); + // d2 = InsertValueOp d1, ptr, 1 + memRefDesc.setMemRefDescPtr(rewriter, loc, ptr); rewriter.replaceOp(memRefCastOp, (Value)memRefDesc); } else if (isa(srcType) && isa(dstType)) { @@ -760,17 +735,9 @@ struct MemRefCastOpLowering : public ConvertOpToLLVMPattern { UnrankedMemRefDescriptor memRefDesc(adaptor.getSource()); // ptr = ExtractValueOp src, 1 auto ptr = memRefDesc.memRefDescPtr(rewriter, loc); - // castPtr = BitCastOp i8* to structTy* - Value castPtr; - if (getTypeConverter()->useOpaquePointers()) - castPtr = ptr; - else - castPtr = rewriter.create( - loc, LLVM::LLVMPointerType::get(targetStructType), ptr); - // struct = LoadOp castPtr - auto loadOp = - rewriter.create(loc, targetStructType, castPtr); + // struct = LoadOp ptr + auto loadOp = rewriter.create(loc, targetStructType, ptr); rewriter.replaceOp(memRefCastOp, loadOp.getResult()); } else { llvm_unreachable("Unsupported unranked memref to unranked memref cast"); @@ -841,17 +808,10 @@ struct MemRefCopyOpLowering : public ConvertOpToLLVMPattern { auto ptr = typeConverter->promoteOneMemRefDescriptor(loc, ranked, rewriter); - Value voidPtr; - if (getTypeConverter()->useOpaquePointers()) - voidPtr = ptr; - else - voidPtr = rewriter.create(loc, getVoidPtrType(), ptr); - auto unrankedType = UnrankedMemRefType::get(type.getElementType(), type.getMemorySpace()); - return UnrankedMemRefDescriptor::pack(rewriter, loc, *typeConverter, - unrankedType, - ValueRange{rank, voidPtr}); + return UnrankedMemRefDescriptor::pack( + rewriter, loc, *typeConverter, unrankedType, ValueRange{rank, ptr}); }; // Save stack position before promoting descriptors @@ -871,7 +831,7 @@ struct MemRefCopyOpLowering : public ConvertOpToLLVMPattern { auto one = rewriter.create(loc, getIndexType(), rewriter.getIndexAttr(1)); auto promote = [&](Value desc) { - Type ptrType = getTypeConverter()->getPointerType(desc.getType()); + auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext()); auto allocated = rewriter.create(loc, ptrType, desc.getType(), one); rewriter.create(loc, desc, allocated); @@ -983,12 +943,10 @@ struct MemorySpaceCastOpLowering result.setMemRefDescPtr(rewriter, loc, resultUnderlyingDesc); // Copy pointers, performing address space casts. - Type llvmElementType = - typeConverter->convertType(sourceType.getElementType()); - LLVM::LLVMPointerType sourceElemPtrType = - getTypeConverter()->getPointerType(llvmElementType, sourceAddrSpace); + auto sourceElemPtrType = + LLVM::LLVMPointerType::get(rewriter.getContext(), sourceAddrSpace); auto resultElemPtrType = - getTypeConverter()->getPointerType(llvmElementType, resultAddrSpace); + LLVM::LLVMPointerType::get(rewriter.getContext(), resultAddrSpace); Value allocatedPtr = sourceDesc.allocatedPtr( rewriter, loc, sourceUnderlyingDesc, sourceElemPtrType); @@ -1053,10 +1011,8 @@ static void extractPointersAndOffset(Location loc, // These will all cause assert()s on unconvertible types. unsigned memorySpace = *typeConverter.getMemRefAddressSpace( cast(operandType)); - Type elementType = cast(operandType).getElementType(); - Type llvmElementType = typeConverter.convertType(elementType); - LLVM::LLVMPointerType elementPtrType = - typeConverter.getPointerType(llvmElementType, memorySpace); + auto elementPtrType = + LLVM::LLVMPointerType::get(rewriter.getContext(), memorySpace); // Extract pointer to the underlying ranked memref descriptor and cast it to // ElemType**. @@ -1254,7 +1210,6 @@ struct MemRefReshapeOpLowering auto targetType = cast(reshapeOp.getResult().getType()); unsigned addressSpace = *getTypeConverter()->getMemRefAddressSpace(targetType); - Type elementType = targetType.getElementType(); // Create the unranked memref descriptor that holds the ranked one. The // inner descriptor is allocated on stack. @@ -1276,9 +1231,8 @@ struct MemRefReshapeOpLowering &allocatedPtr, &alignedPtr, &offset); // Set pointers and offset. - Type llvmElementType = typeConverter->convertType(elementType); - LLVM::LLVMPointerType elementPtrType = - getTypeConverter()->getPointerType(llvmElementType, addressSpace); + auto elementPtrType = + LLVM::LLVMPointerType::get(rewriter.getContext(), addressSpace); UnrankedMemRefDescriptor::setAllocatedPtr(rewriter, loc, underlyingDescPtr, elementPtrType, allocatedPtr); @@ -1328,7 +1282,7 @@ struct MemRefReshapeOpLowering rewriter.setInsertionPointToStart(bodyBlock); // Copy size from shape to descriptor. - Type llvmIndexPtrType = getTypeConverter()->getPointerType(indexType); + auto llvmIndexPtrType = LLVM::LLVMPointerType::get(rewriter.getContext()); Value sizeLoadGep = rewriter.create( loc, llvmIndexPtrType, typeConverter->convertType(shapeMemRefType.getElementType()), @@ -1430,9 +1384,9 @@ class TransposeOpLowering : public ConvertOpToLLVMPattern { targetMemRef.setOffset(rewriter, loc, viewMemRef.offset(rewriter, loc)); // Iterate over the dimensions and apply size/stride permutation: - // When enumerating the results of the permutation map, the enumeration index - // is the index into the target dimensions and the DimExpr points to the - // dimension of the source memref. + // When enumerating the results of the permutation map, the enumeration + // index is the index into the target dimensions and the DimExpr points to + // the dimension of the source memref. for (const auto &en : llvm::enumerate(transposeOp.getPermutation().getResults())) { int targetPos = en.index(); @@ -1523,17 +1477,7 @@ struct ViewOpLowering : public ConvertOpToLLVMPattern { // Field 1: Copy the allocated pointer, used for malloc/free. Value allocatedPtr = sourceMemRef.allocatedPtr(rewriter, loc); auto srcMemRefType = cast(viewOp.getSource().getType()); - unsigned sourceMemorySpace = - *getTypeConverter()->getMemRefAddressSpace(srcMemRefType); - Value bitcastPtr; - if (getTypeConverter()->useOpaquePointers()) - bitcastPtr = allocatedPtr; - else - bitcastPtr = rewriter.create( - loc, LLVM::LLVMPointerType::get(targetElementTy, sourceMemorySpace), - allocatedPtr); - - targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr); + targetMemRef.setAllocatedPtr(rewriter, loc, allocatedPtr); // Field 2: Copy the actual aligned pointer to payload. Value alignedPtr = sourceMemRef.alignedPtr(rewriter, loc); @@ -1542,15 +1486,7 @@ struct ViewOpLowering : public ConvertOpToLLVMPattern { typeConverter->convertType(srcMemRefType.getElementType()), alignedPtr, adaptor.getByteShift()); - if (getTypeConverter()->useOpaquePointers()) { - bitcastPtr = alignedPtr; - } else { - bitcastPtr = rewriter.create( - loc, LLVM::LLVMPointerType::get(targetElementTy, sourceMemorySpace), - alignedPtr); - } - - targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr); + targetMemRef.setAlignedPtr(rewriter, loc, alignedPtr); Type indexType = getIndexType(); // Field 3: The offset in the resulting type must be 0. This is @@ -1766,7 +1702,6 @@ struct FinalizeMemRefToLLVMConversionPass : LowerToLLVMOptions::AllocLowering::Malloc); options.useGenericFunctions = useGenericFunctions; - options.useOpaquePointers = useOpaquePointers; if (indexBitwidth != kDeriveIndexBitwidthFromDataLayout) options.overrideIndexBitwidth(indexBitwidth); diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp index efcde2ba58bd68..1977a571130ed1 100644 --- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp +++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp @@ -402,7 +402,6 @@ struct ConvertNVGPUToNVVMPass void runOnOperation() override { LowerToLLVMOptions options(&getContext()); - options.useOpaquePointers = useOpaquePointers; RewritePatternSet patterns(&getContext()); LLVMTypeConverter converter(&getContext(), options); IRRewriter rewriter(&getContext()); @@ -451,7 +450,7 @@ struct ConvertNVGPUToNVVMPass nvgpu::getMBarrierMemrefType(rewriter.getContext(), type)); }); converter.addConversion([&](nvgpu::TensorMapDescriptorType type) -> Type { - return converter.getPointerType(type.getTensor().getElementType()); + return LLVM::LLVMPointerType::get(type.getContext()); }); populateNVGPUToNVVMConversionPatterns(converter, patterns); LLVMConversionTarget target(getContext()); @@ -651,16 +650,11 @@ struct NVGPUAsyncCopyLowering Value dstPtr = getStridedElementPtr(b.getLoc(), dstMemrefType, adaptor.getDst(), adaptor.getDstIndices(), rewriter); - auto i8Ty = IntegerType::get(op.getContext(), 8); FailureOr dstAddressSpace = getTypeConverter()->getMemRefAddressSpace(dstMemrefType); if (failed(dstAddressSpace)) return rewriter.notifyMatchFailure( loc, "destination memref address space not convertible to integer"); - auto dstPointerType = - getTypeConverter()->getPointerType(i8Ty, *dstAddressSpace); - if (!getTypeConverter()->useOpaquePointers()) - dstPtr = b.create(dstPointerType, dstPtr); auto srcMemrefType = cast(op.getSrc().getType()); FailureOr srcAddressSpace = @@ -671,13 +665,9 @@ struct NVGPUAsyncCopyLowering Value scrPtr = getStridedElementPtr(loc, srcMemrefType, adaptor.getSrc(), adaptor.getSrcIndices(), rewriter); - auto srcPointerType = - getTypeConverter()->getPointerType(i8Ty, *srcAddressSpace); - if (!getTypeConverter()->useOpaquePointers()) - scrPtr = b.create(srcPointerType, scrPtr); // Intrinsics takes a global pointer so we need an address space cast. - auto srcPointerGlobalType = getTypeConverter()->getPointerType( - i8Ty, NVVM::NVVMMemorySpace::kGlobalMemorySpace); + auto srcPointerGlobalType = LLVM::LLVMPointerType::get( + op->getContext(), NVVM::NVVMMemorySpace::kGlobalMemorySpace); scrPtr = b.create(srcPointerGlobalType, scrPtr); int64_t dstElements = adaptor.getDstElements().getZExtValue(); int64_t sizeInBytes = @@ -1128,8 +1118,7 @@ struct NVGPUTmaCreateDescriptorOpLowering matchAndRewrite(nvgpu::TmaCreateDescriptorOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { ImplicitLocOpBuilder b(op->getLoc(), rewriter); - LLVM::LLVMPointerType llvmPointerType = getTypeConverter()->getPointerType( - IntegerType::get(op->getContext(), 8)); + auto llvmPointerType = LLVM::LLVMPointerType::get(op->getContext()); Type llvmInt64Type = IntegerType::get(op->getContext(), 64); Value tensorElementType = diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index 50087751053ba6..ee3ee02cf535e1 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -211,25 +211,14 @@ static omp::ReductionDeclareOp createDecl(PatternRewriter &builder, return decl; } -/// Returns an LLVM pointer type with the given element type, or an opaque -/// pointer if 'useOpaquePointers' is true. -static LLVM::LLVMPointerType getPointerType(Type elementType, - bool useOpaquePointers) { - if (useOpaquePointers) - return LLVM::LLVMPointerType::get(elementType.getContext()); - return LLVM::LLVMPointerType::get(elementType); -} - /// Adds an atomic reduction combiner to the given OpenMP reduction declaration /// using llvm.atomicrmw of the given kind. static omp::ReductionDeclareOp addAtomicRMW(OpBuilder &builder, LLVM::AtomicBinOp atomicKind, omp::ReductionDeclareOp decl, - scf::ReduceOp reduce, - bool useOpaquePointers) { + scf::ReduceOp reduce) { OpBuilder::InsertionGuard guard(builder); - Type type = reduce.getOperand().getType(); - Type ptrType = getPointerType(type, useOpaquePointers); + auto ptrType = LLVM::LLVMPointerType::get(builder.getContext()); Location reduceOperandLoc = reduce.getOperand().getLoc(); builder.createBlock(&decl.getAtomicReductionRegion(), decl.getAtomicReductionRegion().end(), {ptrType, ptrType}, @@ -250,8 +239,7 @@ static omp::ReductionDeclareOp addAtomicRMW(OpBuilder &builder, /// the neutral value, necessary for the OpenMP declaration. If the reduction /// cannot be recognized, returns null. static omp::ReductionDeclareOp declareReduction(PatternRewriter &builder, - scf::ReduceOp reduce, - bool useOpaquePointers) { + scf::ReduceOp reduce) { Operation *container = SymbolTable::getNearestSymbolTable(reduce); SymbolTable symbolTable(container); @@ -272,34 +260,29 @@ static omp::ReductionDeclareOp declareReduction(PatternRewriter &builder, if (matchSimpleReduction(reduction)) { omp::ReductionDeclareOp decl = createDecl(builder, symbolTable, reduce, builder.getFloatAttr(type, 0.0)); - return addAtomicRMW(builder, LLVM::AtomicBinOp::fadd, decl, reduce, - useOpaquePointers); + return addAtomicRMW(builder, LLVM::AtomicBinOp::fadd, decl, reduce); } if (matchSimpleReduction(reduction)) { omp::ReductionDeclareOp decl = createDecl(builder, symbolTable, reduce, builder.getIntegerAttr(type, 0)); - return addAtomicRMW(builder, LLVM::AtomicBinOp::add, decl, reduce, - useOpaquePointers); + return addAtomicRMW(builder, LLVM::AtomicBinOp::add, decl, reduce); } if (matchSimpleReduction(reduction)) { omp::ReductionDeclareOp decl = createDecl(builder, symbolTable, reduce, builder.getIntegerAttr(type, 0)); - return addAtomicRMW(builder, LLVM::AtomicBinOp::_or, decl, reduce, - useOpaquePointers); + return addAtomicRMW(builder, LLVM::AtomicBinOp::_or, decl, reduce); } if (matchSimpleReduction(reduction)) { omp::ReductionDeclareOp decl = createDecl(builder, symbolTable, reduce, builder.getIntegerAttr(type, 0)); - return addAtomicRMW(builder, LLVM::AtomicBinOp::_xor, decl, reduce, - useOpaquePointers); + return addAtomicRMW(builder, LLVM::AtomicBinOp::_xor, decl, reduce); } if (matchSimpleReduction(reduction)) { omp::ReductionDeclareOp decl = createDecl( builder, symbolTable, reduce, builder.getIntegerAttr( type, llvm::APInt::getAllOnes(type.getIntOrFloatBitWidth()))); - return addAtomicRMW(builder, LLVM::AtomicBinOp::_and, decl, reduce, - useOpaquePointers); + return addAtomicRMW(builder, LLVM::AtomicBinOp::_and, decl, reduce); } // Match simple binary reductions that cannot be expressed with atomicrmw. @@ -335,7 +318,7 @@ static omp::ReductionDeclareOp declareReduction(PatternRewriter &builder, builder, symbolTable, reduce, minMaxValueForSignedInt(type, !isMin)); return addAtomicRMW(builder, isMin ? LLVM::AtomicBinOp::min : LLVM::AtomicBinOp::max, - decl, reduce, useOpaquePointers); + decl, reduce); } if (matchSelectReduction( reduction, {arith::CmpIPredicate::ult, arith::CmpIPredicate::ule}, @@ -347,7 +330,7 @@ static omp::ReductionDeclareOp declareReduction(PatternRewriter &builder, builder, symbolTable, reduce, minMaxValueForUnsignedInt(type, !isMin)); return addAtomicRMW( builder, isMin ? LLVM::AtomicBinOp::umin : LLVM::AtomicBinOp::umax, - decl, reduce, useOpaquePointers); + decl, reduce); } return nullptr; @@ -357,11 +340,8 @@ namespace { struct ParallelOpLowering : public OpRewritePattern { - bool useOpaquePointers; - - ParallelOpLowering(MLIRContext *context, bool useOpaquePointers) - : OpRewritePattern(context), - useOpaquePointers(useOpaquePointers) {} + ParallelOpLowering(MLIRContext *context) + : OpRewritePattern(context) {} LogicalResult matchAndRewrite(scf::ParallelOp parallelOp, PatternRewriter &rewriter) const override { @@ -370,8 +350,7 @@ struct ParallelOpLowering : public OpRewritePattern { // declaration and use it instead of redeclaring. SmallVector reductionDeclSymbols; for (auto reduce : parallelOp.getOps()) { - omp::ReductionDeclareOp decl = - declareReduction(rewriter, reduce, useOpaquePointers); + omp::ReductionDeclareOp decl = declareReduction(rewriter, reduce); if (!decl) return failure(); reductionDeclSymbols.push_back( @@ -385,14 +364,14 @@ struct ParallelOpLowering : public OpRewritePattern { loc, rewriter.getIntegerType(64), rewriter.getI64IntegerAttr(1)); SmallVector reductionVariables; reductionVariables.reserve(parallelOp.getNumReductions()); + auto ptrType = LLVM::LLVMPointerType::get(parallelOp.getContext()); for (Value init : parallelOp.getInitVals()) { assert((LLVM::isCompatibleType(init.getType()) || isa(init.getType())) && "cannot create a reduction variable if the type is not an LLVM " "pointer element"); - Value storage = rewriter.create( - loc, getPointerType(init.getType(), useOpaquePointers), - init.getType(), one, 0); + Value storage = + rewriter.create(loc, ptrType, init.getType(), one, 0); rewriter.create(loc, init, storage); reductionVariables.push_back(storage); } @@ -464,14 +443,14 @@ struct ParallelOpLowering : public OpRewritePattern { }; /// Applies the conversion patterns in the given function. -static LogicalResult applyPatterns(ModuleOp module, bool useOpaquePointers) { +static LogicalResult applyPatterns(ModuleOp module) { ConversionTarget target(*module.getContext()); target.addIllegalOp(); target.addLegalDialect(); RewritePatternSet patterns(module.getContext()); - patterns.add(module.getContext(), useOpaquePointers); + patterns.add(module.getContext()); FrozenRewritePatternSet frozen(std::move(patterns)); return applyPartialConversion(module, target, frozen); } @@ -484,7 +463,7 @@ struct SCFToOpenMPPass /// Pass entry point. void runOnOperation() override { - if (failed(applyPatterns(getOperation(), useOpaquePointers))) + if (failed(applyPatterns(getOperation()))) signalPassFailure(); } }; diff --git a/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp b/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp index 005dd546bf1632..5491f7dd30629a 100644 --- a/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp +++ b/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp @@ -60,15 +60,30 @@ getSMETileAndCastToVector(PatternRewriter &rewriter, Location loc, namespace { -/// Conversion pattern for vector.transfer_read op with transpose permutation -/// map to vertical arm_sme.tile_load (in-flight transpose). +/// Conversion pattern for vector.transfer_read. +/// +/// --- +/// +/// Example 1: op with identity permutation map to horizontal +/// arm_sme.tile_load: +/// +/// vector.transfer_read ... permutation_map: (d0, d1) -> (d0, d1) +/// +/// is converted to: +/// +/// arm_sme.tile_load ... +/// +/// --- +/// +/// Example 2: op with transpose permutation map to vertical arm_sme.tile_load +/// (in-flight transpose): /// /// vector.transfer_read ... permutation_map: (d0, d1) -> (d1, d0) /// /// is converted to: /// /// arm_sme.tile_load ... layout -struct TransferReadPermutationToArmSMELowering +struct TransferReadToArmSMELowering : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -79,15 +94,6 @@ struct TransferReadPermutationToArmSMELowering return rewriter.notifyMatchFailure(transferReadOp, "not a 2 result permutation map"); - AffineMap map = transferReadOp.getPermutationMap(); - - // Permutation map doesn't perform permutation, can be lowered to - // vector.load by TransferReadToVectorLoadLowering and then - // arm_sme.tile_load by VectorLoadToArmSMELowering. - if (map.isIdentity()) - return rewriter.notifyMatchFailure( - transferReadOp, "map is an identity, apply another pattern"); - auto vectorType = transferReadOp.getVectorType(); if (!arm_sme::isValidSMETileVectorType(vectorType)) return rewriter.notifyMatchFailure(transferReadOp, @@ -96,26 +102,33 @@ struct TransferReadPermutationToArmSMELowering if (!llvm::isa(transferReadOp.getSource().getType())) return rewriter.notifyMatchFailure(transferReadOp, "not a memref source"); - if (transferReadOp.getMask()) - // TODO: support masking. - return rewriter.notifyMatchFailure(transferReadOp, - "masking not yet supported"); - // Out-of-bounds dims are not supported. if (transferReadOp.hasOutOfBoundsDim()) return rewriter.notifyMatchFailure(transferReadOp, "not inbounds transfer read"); + arm_sme::TileSliceLayout layout; + AffineExpr d0, d1; bindDims(transferReadOp.getContext(), d0, d1); - if (map != AffineMap::get(map.getNumDims(), 0, {d1, d0}, - transferReadOp.getContext())) + AffineMap map = transferReadOp.getPermutationMap(); + if (map.isIdentity()) + layout = arm_sme::TileSliceLayout::Horizontal; + else if (map == AffineMap::get(map.getNumDims(), 0, {d1, d0}, + transferReadOp.getContext())) + layout = arm_sme::TileSliceLayout::Vertical; + else return rewriter.notifyMatchFailure(transferReadOp, - "not true 2-D matrix transpose"); + "unsupported permutation map"); + // Padding isn't optional for transfer_read, but is only used in the case + // of out-of-bounds accesses (not supported here) and/or masking. Mask is + // optional, if it's not present don't pass padding. + auto mask = transferReadOp.getMask(); + auto padding = mask ? transferReadOp.getPadding() : nullptr; rewriter.replaceOpWithNewOp( transferReadOp, vectorType, transferReadOp.getSource(), - transferReadOp.getIndices(), arm_sme::TileSliceLayout::Vertical); + transferReadOp.getIndices(), padding, mask, layout); return success(); } @@ -531,7 +544,7 @@ struct VectorOuterProductToArmSMELowering void mlir::populateVectorToArmSMEPatterns(RewritePatternSet &patterns, MLIRContext &ctx) { patterns.add(&ctx); diff --git a/mlir/lib/Dialect/ArmSME/Transforms/LegalizeForLLVMExport.cpp b/mlir/lib/Dialect/ArmSME/Transforms/LegalizeForLLVMExport.cpp index 7dd04e25075c8d..d1a54658a595bf 100644 --- a/mlir/lib/Dialect/ArmSME/Transforms/LegalizeForLLVMExport.cpp +++ b/mlir/lib/Dialect/ArmSME/Transforms/LegalizeForLLVMExport.cpp @@ -278,13 +278,7 @@ struct StoreTileSliceToArmSMELowering auto tileSliceI32 = rewriter.create( loc, rewriter.getI32Type(), tileSlice); - // Create all active predicate mask. - auto one = rewriter.create( - loc, rewriter.getI1Type(), - rewriter.getIntegerAttr(rewriter.getI1Type(), 1)); - auto predTy = VectorType::get(tileType.getShape()[0], rewriter.getI1Type(), - /*scalableDims=*/{true}); - auto allActiveMask = rewriter.create(loc, predTy, one); + auto maskOp = storeTileSliceOp.getMask(); Value tileI32 = castTileIDToI32(tile, loc, rewriter); arm_sme::TileSliceLayout layout = storeTileSliceOp.getLayout(); @@ -295,23 +289,23 @@ struct StoreTileSliceToArmSMELowering llvm_unreachable("unexpected element type!"); case 8: rewriter.replaceOpWithNewOp( - storeTileSliceOp, allActiveMask, ptr, tileI32, tileSliceI32); + storeTileSliceOp, maskOp, ptr, tileI32, tileSliceI32); break; case 16: rewriter.replaceOpWithNewOp( - storeTileSliceOp, allActiveMask, ptr, tileI32, tileSliceI32); + storeTileSliceOp, maskOp, ptr, tileI32, tileSliceI32); break; case 32: rewriter.replaceOpWithNewOp( - storeTileSliceOp, allActiveMask, ptr, tileI32, tileSliceI32); + storeTileSliceOp, maskOp, ptr, tileI32, tileSliceI32); break; case 64: rewriter.replaceOpWithNewOp( - storeTileSliceOp, allActiveMask, ptr, tileI32, tileSliceI32); + storeTileSliceOp, maskOp, ptr, tileI32, tileSliceI32); break; case 128: rewriter.replaceOpWithNewOp( - storeTileSliceOp, allActiveMask, ptr, tileI32, tileSliceI32); + storeTileSliceOp, maskOp, ptr, tileI32, tileSliceI32); break; } } else { @@ -320,23 +314,23 @@ struct StoreTileSliceToArmSMELowering llvm_unreachable("unexpected element type!"); case 8: rewriter.replaceOpWithNewOp( - storeTileSliceOp, allActiveMask, ptr, tileI32, tileSliceI32); + storeTileSliceOp, maskOp, ptr, tileI32, tileSliceI32); break; case 16: rewriter.replaceOpWithNewOp( - storeTileSliceOp, allActiveMask, ptr, tileI32, tileSliceI32); + storeTileSliceOp, maskOp, ptr, tileI32, tileSliceI32); break; case 32: rewriter.replaceOpWithNewOp( - storeTileSliceOp, allActiveMask, ptr, tileI32, tileSliceI32); + storeTileSliceOp, maskOp, ptr, tileI32, tileSliceI32); break; case 64: rewriter.replaceOpWithNewOp( - storeTileSliceOp, allActiveMask, ptr, tileI32, tileSliceI32); + storeTileSliceOp, maskOp, ptr, tileI32, tileSliceI32); break; case 128: rewriter.replaceOpWithNewOp( - storeTileSliceOp, allActiveMask, ptr, tileI32, tileSliceI32); + storeTileSliceOp, maskOp, ptr, tileI32, tileSliceI32); break; } } diff --git a/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp b/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp index f7340844f7e197..311540fde512b9 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp @@ -317,6 +317,27 @@ Value linalg::bufferizeToAllocation( return alloc; } +Value linalg::bufferizeToAllocation( + RewriterBase &rewriter, const linalg::BufferizeToAllocationOptions &options, + bufferization::AllocTensorOp allocTensorOp, Attribute memorySpace, + Operation *insertionPoint) { + Location loc = allocTensorOp.getLoc(); + OpBuilder::InsertionGuard g(rewriter); + rewriter.setInsertionPoint(insertionPoint ? insertionPoint : allocTensorOp); + bufferization::BufferizationOptions bufferizationOptions; + + // Create buffer allocation. + Value alloc = createAllocationForTensor( + rewriter, loc, allocTensorOp.getResult(), options, memorySpace); + + // Create bufferization.to_tensor with "restrict" and "writable". The returned + // tensor is a new buffer allocation, so it does not alias with any buffer. + Value toTensorOp = rewriter.create( + loc, alloc, /*restrict=*/true, /*writable=*/true); + rewriter.replaceOp(allocTensorOp, toTensorOp); + return alloc; +} + /// Lower tensor.from_elements to a sequence of chained tensor.insert. FailureOr mlir::linalg::rewriteInDestinationPassingStyle( RewriterBase &rewriter, tensor::FromElementsOp fromElementsOp) { @@ -454,6 +475,8 @@ Value linalg::bufferizeToAllocation( return bufferizeToAllocation(rewriter, options, padOp, memorySpace); if (auto maskOp = dyn_cast(op)) return bufferizeToAllocation(rewriter, options, maskOp, memorySpace); + if (auto allocTensorOp = dyn_cast(op)) + return bufferizeToAllocation(rewriter, options, allocTensorOp, memorySpace); // Only bufferizable ops are supported. auto bufferizableOp = dyn_cast(op); diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index d3747d33104091..df64d561f46cb3 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -878,6 +878,22 @@ LogicalResult acc::LoopOp::verify() { return success(); } +unsigned LoopOp::getNumDataOperands() { + return getReductionOperands().size() + getPrivateOperands().size(); +} + +Value LoopOp::getDataOperand(unsigned i) { + unsigned numOptional = getGangNum() ? 1 : 0; + numOptional += getGangDim() ? 1 : 0; + numOptional += getGangStatic() ? 1 : 0; + numOptional += getVectorLength() ? 1 : 0; + numOptional += getWorkerNum() ? 1 : 0; + numOptional += getVectorLength() ? 1 : 0; + numOptional += getTileOperands().size(); + numOptional += getCacheOperands().size(); + return getOperand(numOptional + i); +} + //===----------------------------------------------------------------------===// // DataOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 2bf9355ed62676..3b792a26d1823f 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -21,6 +21,7 @@ #include "mlir/Interfaces/FoldInterfaces.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLForwardCompat.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" @@ -668,10 +669,7 @@ static LogicalResult verifySynchronizationHint(Operation *op, uint64_t hint) { // Helper function to get bitwise AND of `value` and 'flag' uint64_t mapTypeToBitFlag(uint64_t value, llvm::omp::OpenMPOffloadMappingFlags flag) { - return value & - static_cast< - std::underlying_type_t>( - flag); + return value & llvm::to_underlying(flag); } /// Parses a map_entries map type from a string format back into its numeric @@ -720,8 +718,7 @@ static ParseResult parseMapClause(OpAsmParser &parser, IntegerAttr &mapType) { mapType = parser.getBuilder().getIntegerAttr( parser.getBuilder().getIntegerType(64, /*isSigned=*/false), - static_cast>( - mapTypeBits)); + llvm::to_underlying(mapTypeBits)); return success(); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseReinterpretMap.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseReinterpretMap.cpp index a822effbb2ab78..307a609fd1b774 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseReinterpretMap.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseReinterpretMap.cpp @@ -6,7 +6,10 @@ // //===----------------------------------------------------------------------===// +#include "CodegenUtils.h" + #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" @@ -18,10 +21,8 @@ using namespace mlir; using namespace mlir::sparse_tensor; -namespace { - //===----------------------------------------------------------------------===// -// Helper methods. +// File Local Helper methods. //===----------------------------------------------------------------------===// // Translates a "simple" map according to an identity lvl-map. @@ -51,6 +52,27 @@ static Value genRemap(OpBuilder &builder, SparseTensorEncodingAttr enc, return builder.create(val.getLoc(), enc, val); } +static SmallVector remapValueRange(OpBuilder &rewriter, TypeRange types, + ValueRange outs) { + SmallVector ret(outs); + assert(outs.size() == types.size()); + for (auto [r, t] : llvm::zip(ret, types)) + if (r.getType() != t) + r = rewriter.create(r.getLoc(), t, r); + return ret; +} + +/// Whether the operation has any sparse tensor with non-identity dim2lvl maps. +static bool hasNonIdentityOperandsOrResults(Operation *op) { + auto hasNonIdentityMap = [](Value v) { + auto stt = tryGetSparseTensorType(v); + return stt && !stt->isIdentity(); + }; + + return llvm::any_of(op->getOperands(), hasNonIdentityMap) || + llvm::any_of(op->getResults(), hasNonIdentityMap); +} + // Generates a clone of the given linalg generic operation, but with // remapped arguments, index maps, and iteration types. // @@ -86,6 +108,8 @@ static linalg::GenericOp genGenericLinalg(PatternRewriter &rewriter, return newOp; } +namespace { + //===----------------------------------------------------------------------===// // Rewriting rules for linalg generic ops. //===----------------------------------------------------------------------===// @@ -142,21 +166,17 @@ struct GenericOpReinterpretMap : public OpRewritePattern { }; //===----------------------------------------------------------------------===// -// Rewriting rules for operations other than linalg generic ops. +// Reinterpret Map Rewriters for operations other than linalg.generics //===----------------------------------------------------------------------===// -// CRTP to help implementing a rewriter that demaps all its inputs and remaps -// all its outputs. +// CRTP to help implementing a rewriter that demaps all its inputs. template -struct DemapInsRemapOutsRewriter : public OpRewritePattern { +struct DemapInsRewriter : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; using OpAdaptor = typename SourceOp::Adaptor; LogicalResult matchAndRewrite(SourceOp op, PatternRewriter &rewriter) const override { - if (!static_cast(this)->matchOp(op)) - return failure(); - Location loc = op.getLoc(); // Demaps non-trivial inputs. SmallVector deMappedIns(op->getOperands()); @@ -166,61 +186,169 @@ struct DemapInsRemapOutsRewriter : public OpRewritePattern { // CRTP call. OpAdaptor adaptor(deMappedIns); - ValueRange outs = - static_cast(this)->rewriteOp(op, adaptor, rewriter); - assert(outs.size() == op->getResults().size()); - - // Remap outputs. - SmallVector reMappedOuts(outs); - for (auto [r, a] : llvm::zip(reMappedOuts, op->getResults())) - if (r.getType() != a.getType()) - r = rewriter.create(loc, a.getType(), r); - - rewriter.replaceOp(op, reMappedOuts); - return success(); + return static_cast(this)->rewriteOp(op, adaptor, + rewriter); } }; -struct CrdTranslateRewriter : public OpRewritePattern { +struct TensorAllocDemapper + : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(CrdTranslateOp op, + LogicalResult matchAndRewrite(bufferization::AllocTensorOp op, PatternRewriter &rewriter) const override { - AffineMap map = op.getDirection() == CrdTransDirectionKind::dim2lvl - ? op.getEncoder().getDimToLvl() - : op.getEncoder().getLvlToDim(); - - SmallVector outCrds; - for (AffineExpr result : map.getResults()) { - // TODO: we should probably expand the affine map to IR using our own - // rules, since affine.apply assume signed value, while the cooridinates - // we provided must always be signless. - Value trans = rewriter.create( - op.getLoc(), AffineMap::get(map.getNumDims(), 0, result), - op.getInCrds()); - outCrds.push_back(trans); + if (!hasNonIdentityOperandsOrResults(op)) + return failure(); + + Location loc = op.getLoc(); + auto stt = getSparseTensorType(op.getResult()); + + SmallVector maxDimCrds; + maxDimCrds.reserve(stt.getDimRank()); + ValueRange dynSz = op.getDynamicSizes(); + for (int64_t dimSz : stt.getDimShape()) { + if (ShapedType::isDynamic(dimSz)) { + Value maxCrd = rewriter.create( + loc, dynSz.front(), constantIndex(rewriter, loc, 1)); + maxDimCrds.push_back(maxCrd); + dynSz = dynSz.drop_front(); + } else { + maxDimCrds.push_back(constantIndex(rewriter, loc, dimSz - 1)); + } + } + + ValueRange maxLvlCrds = stt.translateCrds(rewriter, loc, maxDimCrds, + CrdTransDirectionKind::dim2lvl); + auto lvlShape = stt.getLvlShape(); + SmallVector dynLvlSzs; + for (unsigned i = 0, e = lvlShape.size(); i < e; i++) { + if (ShapedType::isDynamic(lvlShape[i])) { + Value sz = rewriter.create( + loc, maxLvlCrds[i], constantIndex(rewriter, loc, 1)); + dynLvlSzs.push_back(sz); + } } - rewriter.replaceOp(op, outCrds); + + assert(dynSz.empty()); // should have consumed all. + rewriter.startRootUpdate(op); + op->setOperands(dynLvlSzs); + op.getResult().setType(stt.getDemappedType()); + rewriter.finalizeRootUpdate(op); + rewriter.setInsertionPointAfter(op); + + Value t = genRemap(rewriter, stt.getEncoding(), op.getResult()); + rewriter.replaceAllUsesExcept(op.getResult(), t, t.getDefiningOp()); return success(); } }; -struct TensorInsertRewriter - : public DemapInsRemapOutsRewriter { - using DemapInsRemapOutsRewriter::DemapInsRemapOutsRewriter; - - bool matchOp(tensor::InsertOp op) const { - return op.getResult().getType().getEncoding() != nullptr; - } +struct TensorInsertDemapper + : public DemapInsRewriter { + using DemapInsRewriter::DemapInsRewriter; + LogicalResult rewriteOp(tensor::InsertOp op, OpAdaptor adaptor, + PatternRewriter &rewriter) const { + if (!hasAnySparseResult(op)) + return failure(); - ValueRange rewriteOp(tensor::InsertOp op, OpAdaptor adaptor, - PatternRewriter &rewriter) const { Location loc = op.getLoc(); auto stt = getSparseTensorType(op.getResult()); ValueRange lvlCrd = stt.translateCrds(rewriter, loc, op.getIndices(), CrdTransDirectionKind::dim2lvl); - Operation *insertOp = rewriter.create( + auto insertOp = rewriter.create( loc, op.getScalar(), adaptor.getDest(), lvlCrd); - return insertOp->getResults(); + + Value out = genRemap(rewriter, stt.getEncoding(), insertOp.getResult()); + rewriter.replaceOp(op, out); + return success(); + } +}; + +struct ForeachOpDemapper + : public DemapInsRewriter { + using DemapInsRewriter::DemapInsRewriter; + LogicalResult rewriteOp(ForeachOp op, OpAdaptor adaptor, + PatternRewriter &rewriter) const { + // Only handle operations with sparse input/output with non-identity dim2lvl + // maps. + if (!hasNonIdentityOperandsOrResults(op)) + return failure(); + + // TODO: demap constant as well. + if (auto constOp = op.getTensor().getDefiningOp()) + if (auto attr = dyn_cast(constOp.getValue())) + return failure(); + + Location loc = op.getLoc(); + // Cache the type information since we update the foreach op in-place. + auto srcStt = getSparseTensorType(op.getTensor()); + SmallVector prevRetTps(op.getResultTypes()); + + rewriter.startRootUpdate(op); + op.getTensorMutable().assign(adaptor.getTensor()); + op.getInitArgsMutable().assign(adaptor.getInitArgs()); + // Update results' types. + for (auto r : op.getResults()) + if (auto stt = tryGetSparseTensorType(r); stt && !stt->isIdentity()) + r.setType(stt->getDemappedType()); + + Level lvlRank = getSparseTensorType(adaptor.getTensor()).getLvlRank(); + // Update the foreach body. + SmallVector blockArgTps(lvlRank, rewriter.getIndexType()); + blockArgTps.push_back(srcStt.getElementType()); + blockArgTps.append(adaptor.getInitArgs().getTypes().begin(), + adaptor.getInitArgs().getTypes().end()); + Block *body = op.getBody(); + // Block Args: [dimCrd, val, initArgs] + unsigned preArgNum = body->getNumArguments(); + for (Type t : blockArgTps) + body->addArgument(t, loc); + + // Block Args: [dimCrd, val, initArgs, lvlCrds, val, DemappedArgs] + rewriter.setInsertionPointToStart(body); + ValueRange lvlCrds = body->getArguments().slice(preArgNum, lvlRank); + + ValueRange dimCrds = srcStt.translateCrds(rewriter, loc, lvlCrds, + CrdTransDirectionKind::lvl2dim); + rewriter.replaceAllUsesWith( + body->getArguments().take_front(srcStt.getDimRank()), dimCrds); + body->eraseArguments(0, srcStt.getDimRank()); + // Block Args: [val, initArgs, lvlCrds, val, DemappedArgs] + unsigned numInitArgs = op.getInitArgs().size(); + rewriter.replaceAllUsesWith(body->getArgument(0), + body->getArgument(lvlRank + numInitArgs + 1)); + body->eraseArgument(0); + // Block Args: [initArgs, lvlCrds, val, DemappedArgs] + ValueRange srcArgs = body->getArguments().take_front(numInitArgs); + ValueRange dstArgs = body->getArguments().take_back(numInitArgs); + // Remap back before replacement. + SmallVector reMappedArgs = + remapValueRange(rewriter, srcArgs.getTypes(), dstArgs); + rewriter.replaceAllUsesWith(srcArgs, reMappedArgs); + body->eraseArguments(0, numInitArgs); + // Block Args: [lvlCrds, DemappedArgs] and we are done. + + // Update yield operations. + if (numInitArgs != 0) { + rewriter.setInsertionPointToEnd(body); + auto yield = llvm::cast(body->getTerminator()); + if (auto stt = tryGetSparseTensorType(yield.getResult()); + stt && !stt->isIdentity()) { + Value y = genDemap(rewriter, stt->getEncoding(), yield.getResult()); + rewriter.create(loc, y); + rewriter.eraseOp(yield); + } + } + rewriter.finalizeRootUpdate(op); + + rewriter.setInsertionPointAfter(op); + SmallVector outs = + remapValueRange(rewriter, prevRetTps, op.getResults()); + + // Replace all the uses of the foreach results, expect the use in + // reinterpret_map used to remap the output. + for (auto [from, to] : llvm::zip(op.getResults(), outs)) + rewriter.replaceAllUsesExcept(from, to, to.getDefiningOp()); + + return success(); } }; @@ -234,7 +362,7 @@ void mlir::populateSparseReinterpretMap(RewritePatternSet &patterns, } if (scope == ReinterpretMapScope::kAll || scope == ReinterpretMapScope::kExceptGeneric) { - patterns.add( + patterns.add( patterns.getContext()); } } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp index 02796bc9a7e7df..c00f19916e49fb 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -1063,6 +1063,29 @@ struct DirectConvertRewriter : public OpRewritePattern { } }; +struct CrdTranslateRewriter : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(CrdTranslateOp op, + PatternRewriter &rewriter) const override { + AffineMap map = op.getDirection() == CrdTransDirectionKind::dim2lvl + ? op.getEncoder().getDimToLvl() + : op.getEncoder().getLvlToDim(); + + SmallVector outCrds; + for (AffineExpr result : map.getResults()) { + // TODO: we should probably expand the affine map to IR using our own + // rules, since affine.apply assume signed value, while the cooridinates + // we provided must always be signless. + Value trans = rewriter.create( + op.getLoc(), AffineMap::get(map.getNumDims(), 0, result), + op.getInCrds()); + outCrds.push_back(trans); + } + rewriter.replaceOp(op, outCrds); + return success(); + } +}; + /// Sparse rewriting rule for the foreach operator. struct ForeachRewriter : public OpRewritePattern { public: @@ -1284,5 +1307,7 @@ void mlir::populateLowerSparseOpsToForeachPatterns(RewritePatternSet &patterns, } void mlir::populateLowerForeachToSCFPatterns(RewritePatternSet &patterns) { - patterns.add(patterns.getContext()); + // Run CrdTranslateRewriter later in the pipeline so that operation can be + // folded before lowering to affine.apply + patterns.add(patterns.getContext()); } diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp index 3d65123373109b..6aea0343bfc932 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp @@ -34,6 +34,71 @@ using namespace mlir; namespace { +//===----------------------------------------------------------------------===// +// ConvertVectorStore +//===----------------------------------------------------------------------===// + +struct ConvertVectorStore final : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(vector::StoreOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + + auto loc = op.getLoc(); + auto convertedType = cast(adaptor.getBase().getType()); + Type oldElementType = op.getValueToStore().getType().getElementType(); + Type newElementType = convertedType.getElementType(); + int srcBits = oldElementType.getIntOrFloatBitWidth(); + int dstBits = newElementType.getIntOrFloatBitWidth(); + + if (dstBits % srcBits != 0) { + return rewriter.notifyMatchFailure( + op, "only dstBits % srcBits == 0 supported"); + } + int scale = dstBits / srcBits; + + // Adjust the number of elements to store when emulating narrow types. + // Here only the 1-D vector store is considered, and the N-D memref types + // should be linearized. + // For example, to emulate i4 to i8, the following op: + // + // vector.store %arg1, %0[%arg2, %arg3] : memref<4x8xi4>, vector<8xi4> + // + // can be replaced with + // + // %bitcast = vector.bitcast %arg1 : vector<8xi4> to vector<4xi8> + // vector.store %bitcast, %alloc[%linear_index] : memref<16xi8>, + // vector<4xi8> + + auto origElements = op.getValueToStore().getType().getNumElements(); + if (origElements % scale != 0) + return failure(); + + auto stridedMetadata = + rewriter.create(loc, op.getBase()); + + OpFoldResult linearizedIndices; + std::tie(std::ignore, linearizedIndices) = + memref::getLinearizedMemRefOffsetAndSize( + rewriter, loc, srcBits, dstBits, + stridedMetadata.getConstifiedMixedOffset(), + stridedMetadata.getConstifiedMixedSizes(), + stridedMetadata.getConstifiedMixedStrides(), + getAsOpFoldResult(adaptor.getIndices())); + + auto numElements = origElements / scale; + auto bitCast = rewriter.create( + loc, VectorType::get(numElements, newElementType), + op.getValueToStore()); + + rewriter.replaceOpWithNewOp( + op, bitCast.getResult(), adaptor.getBase(), + getValueOrCreateConstantIndexOp(rewriter, loc, linearizedIndices)); + return success(); + } +}; + //===----------------------------------------------------------------------===// // ConvertVectorLoad //===----------------------------------------------------------------------===// @@ -755,7 +820,7 @@ void vector::populateVectorNarrowTypeEmulationPatterns( RewritePatternSet &patterns) { // Populate `vector.*` conversion patterns. - patterns.add(typeConverter, patterns.getContext()); } diff --git a/mlir/test/Conversion/ArmSMEToSCF/arm-sme-to-scf.mlir b/mlir/test/Conversion/ArmSMEToSCF/arm-sme-to-scf.mlir index 3fb320c0d219e6..d61f588941b408 100644 --- a/mlir/test/Conversion/ArmSMEToSCF/arm-sme-to-scf.mlir +++ b/mlir/test/Conversion/ArmSMEToSCF/arm-sme-to-scf.mlir @@ -48,8 +48,9 @@ func.func @arm_sme_tile_load_ver(%src : memref) { // CHECK-DAG: %[[VSCALE:.*]] = vector.vscale // CHECK: %[[NUM_TILE_SLICES:.*]] = arith.muli %[[C4]], %[[VSCALE]] : index // CHECK: scf.for %[[TILE_SLICE_INDEX:.*]] = %[[C0]] to %[[NUM_TILE_SLICES]] step %[[C1]] { +// CHECK: %[[PTRUE_S:.*]] = arith.constant dense : vector<[4]xi1> // CHECK: %[[OFFSET:.*]] = arith.addi %[[C0]], %[[TILE_SLICE_INDEX]] : index -// CHECK: arm_sme.store_tile_slice %[[TILE]], %[[TILE_SLICE_INDEX]], %[[DEST]]{{\[}}%[[OFFSET]], %[[C0]]] : memref, vector<[4]x[4]xi32> +// CHECK: arm_sme.store_tile_slice %[[TILE]], %[[TILE_SLICE_INDEX]], %[[PTRUE_S]], %[[DEST]]{{\[}}%[[OFFSET]], %[[C0]]] : memref, vector<[4]xi1>, vector<[4]x[4]xi32> func.func @arm_sme_tile_store_hor(%tile : vector<[4]x[4]xi32>, %dest : memref) { %c0 = arith.constant 0 : index arm_sme.tile_store %tile, %dest[%c0, %c0] : memref, vector<[4]x[4]xi32> diff --git a/mlir/test/Conversion/ControlFlowToSPIRV/cf-ops-to-spirv.mlir b/mlir/test/Conversion/ControlFlowToSPIRV/cf-ops-to-spirv.mlir index 98ee6891453496..a10a1468e58249 100644 --- a/mlir/test/Conversion/ControlFlowToSPIRV/cf-ops-to-spirv.mlir +++ b/mlir/test/Conversion/ControlFlowToSPIRV/cf-ops-to-spirv.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -split-input-file -convert-cf-to-spirv -verify-diagnostics %s | FileCheck %s +// RUN: mlir-opt --split-input-file --convert-cf-to-spirv --verify-diagnostics %s | FileCheck %s //===----------------------------------------------------------------------===// // cf.br, cf.cond_br @@ -36,3 +36,20 @@ func.func @simple_loop(%begin: i32, %end: i32, %step: i32) { } } + +// ----- + +// TODO: We should handle blocks whose arguments require type conversion. + +// CHECK-LABEL: func.func @main_graph +func.func @main_graph(%arg0: index) { + %c3 = arith.constant 1 : index + cf.br ^bb1(%arg0 : index) +^bb1(%0: index): // 2 preds: ^bb0, ^bb2 + %1 = arith.cmpi slt, %0, %c3 : index + cf.cond_br %1, ^bb2, ^bb3 +^bb2: // pred: ^bb1 + cf.br ^bb1(%c3 : index) +^bb3: // pred: ^bb1 + return +} diff --git a/mlir/test/Conversion/MemRefToLLVM/convert-alloca-scope.mlir b/mlir/test/Conversion/MemRefToLLVM/convert-alloca-scope.mlir index 5cc9b33d21f174..5fb3059626cdc1 100644 --- a/mlir/test/Conversion/MemRefToLLVM/convert-alloca-scope.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/convert-alloca-scope.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -finalize-memref-to-llvm='use-opaque-pointers=1' %s | FileCheck %s +// RUN: mlir-opt -finalize-memref-to-llvm %s | FileCheck %s // CHECK-LABEL: @empty func.func @empty() { diff --git a/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir b/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir index ea6a235857e636..66dc30127ae741 100644 --- a/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt -split-input-file -finalize-memref-to-llvm='use-opaque-pointers=1' %s | FileCheck %s -// RUN: mlir-opt -split-input-file -finalize-memref-to-llvm='use-aligned-alloc=1 use-opaque-pointers=1' %s | FileCheck %s --check-prefix=ALIGNED-ALLOC -// RUN: mlir-opt -split-input-file -finalize-memref-to-llvm='index-bitwidth=32 use-opaque-pointers=1' %s | FileCheck --check-prefix=CHECK32 %s +// RUN: mlir-opt -split-input-file -finalize-memref-to-llvm %s | FileCheck %s +// RUN: mlir-opt -split-input-file -finalize-memref-to-llvm='use-aligned-alloc=1' %s | FileCheck %s --check-prefix=ALIGNED-ALLOC +// RUN: mlir-opt -split-input-file -finalize-memref-to-llvm='index-bitwidth=32' %s | FileCheck --check-prefix=CHECK32 %s // CHECK-LABEL: func @mixed_alloc( // CHECK: %[[Marg:.*]]: index, %[[Narg:.*]]: index) diff --git a/mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir b/mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir index 35a6358d8f58b0..f1600d43e7bfb3 100644 --- a/mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -finalize-memref-to-llvm='use-opaque-pointers=1' -split-input-file %s | FileCheck %s +// RUN: mlir-opt -finalize-memref-to-llvm -split-input-file %s | FileCheck %s // CHECK-LABEL: func @zero_d_alloc() func.func @zero_d_alloc() -> memref { diff --git a/mlir/test/Conversion/MemRefToLLVM/expand-then-convert-to-llvm.mlir b/mlir/test/Conversion/MemRefToLLVM/expand-then-convert-to-llvm.mlir index 0e655b5464d961..eb45112b117c0d 100644 --- a/mlir/test/Conversion/MemRefToLLVM/expand-then-convert-to-llvm.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/expand-then-convert-to-llvm.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -expand-strided-metadata -finalize-memref-to-llvm='use-opaque-pointers=1' -lower-affine -convert-arith-to-llvm -cse %s -split-input-file | FileCheck %s +// RUN: mlir-opt -expand-strided-metadata -finalize-memref-to-llvm -lower-affine -convert-arith-to-llvm -cse %s -split-input-file | FileCheck %s // // This test demonstrates a full "memref to llvm" pipeline where // we first expand some of the memref operations (using affine, diff --git a/mlir/test/Conversion/MemRefToLLVM/generic-functions.mlir b/mlir/test/Conversion/MemRefToLLVM/generic-functions.mlir index d5ac0b0da979dc..9256461eadcb30 100644 --- a/mlir/test/Conversion/MemRefToLLVM/generic-functions.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/generic-functions.mlir @@ -1,7 +1,7 @@ -// RUN: mlir-opt -pass-pipeline="builtin.module(finalize-memref-to-llvm{use-generic-functions=1 use-opaque-pointers=1})" -split-input-file %s \ +// RUN: mlir-opt -pass-pipeline="builtin.module(finalize-memref-to-llvm{use-generic-functions=1})" -split-input-file %s \ // RUN: | FileCheck %s --check-prefix="CHECK-NOTALIGNED" -// RUN: mlir-opt -pass-pipeline="builtin.module(finalize-memref-to-llvm{use-generic-functions=1 use-aligned-alloc=1 use-opaque-pointers=1})" -split-input-file %s \ +// RUN: mlir-opt -pass-pipeline="builtin.module(finalize-memref-to-llvm{use-generic-functions=1 use-aligned-alloc=1})" -split-input-file %s \ // RUN: | FileCheck %s --check-prefix="CHECK-ALIGNED" // CHECK-LABEL: func @alloc() diff --git a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir index ae487ef6694745..3b3a51d609be97 100644 --- a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt -finalize-memref-to-llvm='use-opaque-pointers=1' %s -split-input-file | FileCheck %s -// RUN: mlir-opt -finalize-memref-to-llvm='index-bitwidth=32 use-opaque-pointers=1' %s -split-input-file | FileCheck --check-prefix=CHECK32 %s +// RUN: mlir-opt -finalize-memref-to-llvm %s -split-input-file | FileCheck %s +// RUN: mlir-opt -finalize-memref-to-llvm='index-bitwidth=32' %s -split-input-file | FileCheck --check-prefix=CHECK32 %s // Same below, but using the `ConvertToLLVMPatternInterface` entry point // and the generic `convert-to-llvm` pass. This produces slightly different IR diff --git a/mlir/test/Conversion/MemRefToLLVM/typed-pointers.mlir b/mlir/test/Conversion/MemRefToLLVM/typed-pointers.mlir deleted file mode 100644 index 19d053ee7813bc..00000000000000 --- a/mlir/test/Conversion/MemRefToLLVM/typed-pointers.mlir +++ /dev/null @@ -1,376 +0,0 @@ -// RUN: mlir-opt -finalize-memref-to-llvm='use-opaque-pointers=0' %s -split-input-file | FileCheck %s -// RUN: mlir-opt -finalize-memref-to-llvm='index-bitwidth=32 use-opaque-pointers=0' -split-input-file %s | FileCheck --check-prefix=CHECK32 %s - -// CHECK-LABEL: func @view( -// CHECK: %[[ARG1F:.*]]: index, %[[ARG2F:.*]]: index -func.func @view(%arg1 : index, %arg2 : index) { - // CHECK: %[[ARG2:.*]] = builtin.unrealized_conversion_cast %[[ARG2F:.*]] - // CHECK: %[[ARG1:.*]] = builtin.unrealized_conversion_cast %[[ARG1F:.*]] - // CHECK: llvm.mlir.constant(2048 : index) : i64 - // CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %0 = memref.alloc() : memref<2048xi8> - - // CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> - // CHECK: %[[BASE_PTR_2:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - // CHECK: %[[SHIFTED_BASE_PTR_2:.*]] = llvm.getelementptr %[[BASE_PTR_2]][%[[ARG2]]] : (!llvm.ptr, i64) -> !llvm.ptr - // CHECK: %[[CAST_SHIFTED_BASE_PTR_2:.*]] = llvm.bitcast %[[SHIFTED_BASE_PTR_2]] : !llvm.ptr to !llvm.ptr - // CHECK: llvm.insertvalue %[[CAST_SHIFTED_BASE_PTR_2]], %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> - // CHECK: %[[C0_2:.*]] = llvm.mlir.constant(0 : index) : i64 - // CHECK: llvm.insertvalue %[[C0_2]], %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> - // CHECK: llvm.insertvalue %[[ARG1]], %{{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> - // CHECK: llvm.mlir.constant(1 : index) : i64 - // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> - // CHECK: llvm.mlir.constant(4 : index) : i64 - // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> - // CHECK: llvm.mul %{{.*}}, %[[ARG1]] - // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> - %1 = memref.view %0[%arg2][%arg1] : memref<2048xi8> to memref<4x?xf32> - return -} - -// ----- - -// CHECK: llvm.mlir.global external @gv0() {addr_space = 0 : i32} : !llvm.array<2 x f32> { -// CHECK-NEXT: %0 = llvm.mlir.undef : !llvm.array<2 x f32> -// CHECK-NEXT: llvm.return %0 : !llvm.array<2 x f32> -// CHECK-NEXT: } -memref.global @gv0 : memref<2xf32> = uninitialized - -// CHECK-LABEL: func @get_gv0_memref -func.func @get_gv0_memref() { - %0 = memref.get_global @gv0 : memref<2xf32> - // CHECK: %[[DIM:.*]] = llvm.mlir.constant(2 : index) : i64 - // CHECK: %[[STRIDE:.*]] = llvm.mlir.constant(1 : index) : i64 - // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @gv0 : !llvm.ptr> - // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ADDR]][0, 0] : (!llvm.ptr>) -> !llvm.ptr - // CHECK: %[[DEADBEEF:.*]] = llvm.mlir.constant(3735928559 : index) : i64 - // CHECK: %[[DEADBEEFPTR:.*]] = llvm.inttoptr %[[DEADBEEF]] : i64 to !llvm.ptr - // CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - // CHECK: llvm.insertvalue %[[DEADBEEFPTR]], {{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - // CHECK: llvm.insertvalue %[[GEP]], {{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - // CHECK: %[[OFFSET:.*]] = llvm.mlir.constant(0 : index) : i64 - // CHECK: llvm.insertvalue %[[OFFSET]], {{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - // CHECK: llvm.insertvalue %[[DIM]], {{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - // CHECK: llvm.insertvalue %[[STRIDE]], {{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - return -} - -// ----- - -// CHECK-LABEL: func @memref_copy_unranked -func.func @memref_copy_unranked() { - %0 = memref.alloc() : memref<2xi1> - // CHECK: llvm.mlir.constant(2 : index) : i64 - // CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %1 = memref.cast %0 : memref<2xi1> to memref<*xi1> - %2 = memref.alloc() : memref<2xi1> - // CHECK: llvm.mlir.constant(2 : index) : i64 - // CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %3 = memref.cast %2 : memref<2xi1> to memref<*xi1> - memref.copy %1, %3 : memref<*xi1> to memref<*xi1> - // CHECK: [[ONE:%.*]] = llvm.mlir.constant(1 : index) : i64 - // CHECK: [[ALLOCA:%.*]] = llvm.alloca %35 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> - // CHECK: llvm.store {{%.*}}, [[ALLOCA]] : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> - // CHECK: [[BITCAST:%.*]] = llvm.bitcast [[ALLOCA]] : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr - // CHECK: [[RANK:%.*]] = llvm.mlir.constant(1 : index) : i64 - // CHECK: [[UNDEF:%.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr)> - // CHECK: [[INSERT:%.*]] = llvm.insertvalue [[RANK]], [[UNDEF]][0] : !llvm.struct<(i64, ptr)> - // CHECK: [[INSERT2:%.*]] = llvm.insertvalue [[BITCAST]], [[INSERT]][1] : !llvm.struct<(i64, ptr)> - // CHECK: [[STACKSAVE:%.*]] = llvm.intr.stacksave : !llvm.ptr - // CHECK: [[RANK2:%.*]] = llvm.mlir.constant(1 : index) : i64 - // CHECK: [[ALLOCA2:%.*]] = llvm.alloca [[RANK2]] x !llvm.struct<(i64, ptr)> : (i64) -> !llvm.ptr)>> - // CHECK: llvm.store {{%.*}}, [[ALLOCA2]] : !llvm.ptr)>> - // CHECK: [[ALLOCA3:%.*]] = llvm.alloca [[RANK2]] x !llvm.struct<(i64, ptr)> : (i64) -> !llvm.ptr)>> - // CHECK: llvm.store [[INSERT2]], [[ALLOCA3]] : !llvm.ptr)>> - // CHECK: [[SIZEPTR:%.*]] = llvm.getelementptr {{%.*}}[1] : (!llvm.ptr) -> !llvm.ptr - // CHECK: [[SIZE:%.*]] = llvm.ptrtoint [[SIZEPTR]] : !llvm.ptr to i64 - // CHECK: llvm.call @memrefCopy([[SIZE]], [[ALLOCA2]], [[ALLOCA3]]) : (i64, !llvm.ptr)>>, !llvm.ptr)>>) -> () - // CHECK: llvm.intr.stackrestore [[STACKSAVE]] - return -} - -// ----- - -// CHECK-LABEL: func @mixed_alloc( -// CHECK: %[[Marg:.*]]: index, %[[Narg:.*]]: index) -func.func @mixed_alloc(%arg0: index, %arg1: index) -> memref { -// CHECK-DAG: %[[M:.*]] = builtin.unrealized_conversion_cast %[[Marg]] -// CHECK-DAG: %[[N:.*]] = builtin.unrealized_conversion_cast %[[Narg]] -// CHECK: %[[c42:.*]] = llvm.mlir.constant(42 : index) : i64 -// CHECK-NEXT: %[[one:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK-NEXT: %[[st0:.*]] = llvm.mul %[[N]], %[[c42]] : i64 -// CHECK-NEXT: %[[sz:.*]] = llvm.mul %[[st0]], %[[M]] : i64 -// CHECK-NEXT: %[[null:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[sz]]] : (!llvm.ptr, i64) -> !llvm.ptr -// CHECK-NEXT: %[[sz_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to i64 -// CHECK-NEXT: llvm.call @malloc(%[[sz_bytes]]) : (i64) -> !llvm.ptr -// CHECK-NEXT: llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr -// CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK-NEXT: %[[off:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK-NEXT: llvm.insertvalue %[[off]], %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK-NEXT: llvm.insertvalue %[[M]], %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK-NEXT: llvm.insertvalue %[[c42]], %{{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK-NEXT: llvm.insertvalue %[[N]], %{{.*}}[3, 2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK-NEXT: llvm.insertvalue %[[st0]], %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK-NEXT: llvm.insertvalue %[[N]], %{{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK-NEXT: llvm.insertvalue %[[one]], %{{.*}}[4, 2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> - %0 = memref.alloc(%arg0, %arg1) : memref - return %0 : memref -} - -// ----- - -// CHECK-LABEL: func @mixed_dealloc -func.func @mixed_dealloc(%arg0: memref) { -// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK-NEXT: %[[ptri8:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr to !llvm.ptr -// CHECK-NEXT: llvm.call @free(%[[ptri8]]) : (!llvm.ptr) -> () - memref.dealloc %arg0 : memref - return -} - -// ----- - -// CHECK-LABEL: func @dynamic_alloc( -// CHECK: %[[Marg:.*]]: index, %[[Narg:.*]]: index) -func.func @dynamic_alloc(%arg0: index, %arg1: index) -> memref { -// CHECK-DAG: %[[M:.*]] = builtin.unrealized_conversion_cast %[[Marg]] -// CHECK-DAG: %[[N:.*]] = builtin.unrealized_conversion_cast %[[Narg]] -// CHECK-NEXT: %[[one:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK-NEXT: %[[sz:.*]] = llvm.mul %[[N]], %[[M]] : i64 -// CHECK-NEXT: %[[null:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[sz]]] : (!llvm.ptr, i64) -> !llvm.ptr -// CHECK-NEXT: %[[sz_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to i64 -// CHECK-NEXT: llvm.call @malloc(%[[sz_bytes]]) : (i64) -> !llvm.ptr -// CHECK-NEXT: llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr -// CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> -// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> -// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> -// CHECK-NEXT: %[[off:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK-NEXT: llvm.insertvalue %[[off]], %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> -// CHECK-NEXT: llvm.insertvalue %[[M]], %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> -// CHECK-NEXT: llvm.insertvalue %[[N]], %{{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> -// CHECK-NEXT: llvm.insertvalue %[[N]], %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> -// CHECK-NEXT: llvm.insertvalue %[[one]], %{{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> - %0 = memref.alloc(%arg0, %arg1) : memref - return %0 : memref -} - -// ----- - -// CHECK-LABEL: func @dynamic_dealloc -func.func @dynamic_dealloc(%arg0: memref) { -// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> -// CHECK-NEXT: %[[ptri8:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr to !llvm.ptr -// CHECK-NEXT: llvm.call @free(%[[ptri8]]) : (!llvm.ptr) -> () - memref.dealloc %arg0 : memref - return -} - -// ----- - -// CHECK-LABEL: func @memref_cast_static_to_dynamic -func.func @memref_cast_static_to_dynamic(%static : memref<10x42xf32>) { -// CHECK-NOT: llvm.bitcast - %0 = memref.cast %static : memref<10x42xf32> to memref - return -} - -// ----- - -// CHECK-LABEL: func @memref_cast_static_to_mixed -func.func @memref_cast_static_to_mixed(%static : memref<10x42xf32>) { -// CHECK-NOT: llvm.bitcast - %0 = memref.cast %static : memref<10x42xf32> to memref - return -} - -// ----- - -// CHECK-LABEL: func @memref_cast_dynamic_to_static -func.func @memref_cast_dynamic_to_static(%dynamic : memref) { -// CHECK-NOT: llvm.bitcast - %0 = memref.cast %dynamic : memref to memref<10x12xf32> - return -} - -// ----- - -// CHECK-LABEL: func @memref_cast_dynamic_to_mixed -func.func @memref_cast_dynamic_to_mixed(%dynamic : memref) { -// CHECK-NOT: llvm.bitcast - %0 = memref.cast %dynamic : memref to memref - return -} - -// ----- - -// CHECK-LABEL: func @memref_cast_mixed_to_dynamic -func.func @memref_cast_mixed_to_dynamic(%mixed : memref<42x?xf32>) { -// CHECK-NOT: llvm.bitcast - %0 = memref.cast %mixed : memref<42x?xf32> to memref - return -} - -// ----- - -// CHECK-LABEL: func @memref_cast_mixed_to_static -func.func @memref_cast_mixed_to_static(%mixed : memref<42x?xf32>) { -// CHECK-NOT: llvm.bitcast - %0 = memref.cast %mixed : memref<42x?xf32> to memref<42x1xf32> - return -} - -// ----- - -// CHECK-LABEL: func @memref_cast_mixed_to_mixed -func.func @memref_cast_mixed_to_mixed(%mixed : memref<42x?xf32>) { -// CHECK-NOT: llvm.bitcast - %0 = memref.cast %mixed : memref<42x?xf32> to memref - return -} - -// ----- - -// CHECK-LABEL: func @memref_cast_ranked_to_unranked -// CHECK32-LABEL: func @memref_cast_ranked_to_unranked -func.func @memref_cast_ranked_to_unranked(%arg : memref<42x2x?xf32>) { -// CHECK-DAG: %[[c:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK-DAG: %[[p:.*]] = llvm.alloca %[[c]] x !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<3 x i64>, array<3 x i64>)>> -// CHECK-DAG: llvm.store %{{.*}}, %[[p]] : !llvm.ptr, ptr, i64, array<3 x i64>, array<3 x i64>)>> -// CHECK-DAG: %[[p2:.*]] = llvm.bitcast %[[p]] : !llvm.ptr, ptr, i64, array<3 x i64>, array<3 x i64>)>> to !llvm.ptr -// CHECK-DAG: %[[r:.*]] = llvm.mlir.constant(3 : index) : i64 -// CHECK : llvm.mlir.undef : !llvm.struct<(i64, ptr)> -// CHECK-DAG: llvm.insertvalue %[[r]], %{{.*}}[0] : !llvm.struct<(i64, ptr)> -// CHECK-DAG: llvm.insertvalue %[[p2]], %{{.*}}[1] : !llvm.struct<(i64, ptr)> -// CHECK32-DAG: %[[c:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK32-DAG: %[[p:.*]] = llvm.alloca %[[c]] x !llvm.struct<(ptr, ptr, i32, array<3 x i32>, array<3 x i32>)> : (i64) -> !llvm.ptr, ptr, i32, array<3 x i32>, array<3 x i32>)>> -// CHECK32-DAG: llvm.store %{{.*}}, %[[p]] : !llvm.ptr, ptr, i32, array<3 x i32>, array<3 x i32>)>> -// CHECK32-DAG: %[[p2:.*]] = llvm.bitcast %[[p]] : !llvm.ptr, ptr, i32, array<3 x i32>, array<3 x i32>)>> to !llvm.ptr -// CHECK32-DAG: %[[r:.*]] = llvm.mlir.constant(3 : index) : i32 -// CHECK32 : llvm.mlir.undef : !llvm.struct<(i32, ptr)> -// CHECK32-DAG: llvm.insertvalue %[[r]], %{{.*}}[0] : !llvm.struct<(i32, ptr)> -// CHECK32-DAG: llvm.insertvalue %[[p2]], %{{.*}}[1] : !llvm.struct<(i32, ptr)> - %0 = memref.cast %arg : memref<42x2x?xf32> to memref<*xf32> - return -} - -// ----- - -// CHECK-LABEL: func @memref_cast_unranked_to_ranked -func.func @memref_cast_unranked_to_ranked(%arg : memref<*xf32>) { -// CHECK: %[[p:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(i64, ptr)> -// CHECK-NEXT: llvm.bitcast %[[p]] : !llvm.ptr to !llvm.ptr, ptr, i64, array<4 x i64>, array<4 x i64>)>> - %0 = memref.cast %arg : memref<*xf32> to memref - return -} - -// ----- - -// CHECK-LABEL: @memref_reinterpret_cast_unranked_to_dynamic_shape -func.func @memref_reinterpret_cast_unranked_to_dynamic_shape(%offset: index, - %size_0 : index, - %size_1 : index, - %stride_0 : index, - %stride_1 : index, - %input : memref<*xf32>) { - %output = memref.reinterpret_cast %input to - offset: [%offset], sizes: [%size_0, %size_1], - strides: [%stride_0, %stride_1] - : memref<*xf32> to memref> - return -} -// CHECK-SAME: ([[OFFSETarg:%[a-z,0-9]+]]: index, -// CHECK-SAME: [[SIZE_0arg:%[a-z,0-9]+]]: index, [[SIZE_1arg:%[a-z,0-9]+]]: index, -// CHECK-SAME: [[STRIDE_0arg:%[a-z,0-9]+]]: index, [[STRIDE_1arg:%[a-z,0-9]+]]: index, -// CHECK-DAG: [[OFFSET:%.*]] = builtin.unrealized_conversion_cast [[OFFSETarg]] -// CHECK-DAG: [[SIZE_0:%.*]] = builtin.unrealized_conversion_cast [[SIZE_0arg]] -// CHECK-DAG: [[SIZE_1:%.*]] = builtin.unrealized_conversion_cast [[SIZE_1arg]] -// CHECK-DAG: [[STRIDE_0:%.*]] = builtin.unrealized_conversion_cast [[STRIDE_0arg]] -// CHECK-DAG: [[STRIDE_1:%.*]] = builtin.unrealized_conversion_cast [[STRIDE_1arg]] -// CHECK-DAG: [[INPUT:%.*]] = builtin.unrealized_conversion_cast -// CHECK: [[OUT_0:%.*]] = llvm.mlir.undef : [[TY:!.*]] -// CHECK: [[DESCRIPTOR:%.*]] = llvm.extractvalue [[INPUT]][1] : !llvm.struct<(i64, ptr)> -// CHECK: [[BASE_PTR_PTR:%.*]] = llvm.bitcast [[DESCRIPTOR]] : !llvm.ptr to !llvm.ptr> -// CHECK: [[BASE_PTR:%.*]] = llvm.load [[BASE_PTR_PTR]] : !llvm.ptr> -// CHECK: [[BASE_PTR_PTR_:%.*]] = llvm.bitcast [[DESCRIPTOR]] : !llvm.ptr to !llvm.ptr> -// CHECK: [[ALIGNED_PTR_PTR:%.*]] = llvm.getelementptr [[BASE_PTR_PTR_]]{{\[}}1] -// CHECK-SAME: : (!llvm.ptr>) -> !llvm.ptr> -// CHECK: [[ALIGNED_PTR:%.*]] = llvm.load [[ALIGNED_PTR_PTR]] : !llvm.ptr> -// CHECK: [[OUT_1:%.*]] = llvm.insertvalue [[BASE_PTR]], [[OUT_0]][0] : [[TY]] -// CHECK: [[OUT_2:%.*]] = llvm.insertvalue [[ALIGNED_PTR]], [[OUT_1]][1] : [[TY]] -// CHECK: [[OUT_3:%.*]] = llvm.insertvalue [[OFFSET]], [[OUT_2]][2] : [[TY]] -// CHECK: [[OUT_4:%.*]] = llvm.insertvalue [[SIZE_0]], [[OUT_3]][3, 0] : [[TY]] -// CHECK: [[OUT_5:%.*]] = llvm.insertvalue [[STRIDE_0]], [[OUT_4]][4, 0] : [[TY]] -// CHECK: [[OUT_6:%.*]] = llvm.insertvalue [[SIZE_1]], [[OUT_5]][3, 1] : [[TY]] -// CHECK: [[OUT_7:%.*]] = llvm.insertvalue [[STRIDE_1]], [[OUT_6]][4, 1] : [[TY]] - -// ----- - -// CHECK-LABEL: @memref_reshape -func.func @memref_reshape(%input : memref<2x3xf32>, %shape : memref) { - %output = memref.reshape %input(%shape) - : (memref<2x3xf32>, memref) -> memref<*xf32> - return -} -// CHECK: [[INPUT:%.*]] = builtin.unrealized_conversion_cast %{{.*}} to [[INPUT_TY:!.*]] -// CHECK: [[SHAPE:%.*]] = builtin.unrealized_conversion_cast %{{.*}} to [[SHAPE_TY:!.*]] -// CHECK: [[RANK:%.*]] = llvm.extractvalue [[SHAPE]][3, 0] : [[SHAPE_TY]] -// CHECK: [[UNRANKED_OUT_O:%.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr)> -// CHECK: [[UNRANKED_OUT_1:%.*]] = llvm.insertvalue [[RANK]], [[UNRANKED_OUT_O]][0] : !llvm.struct<(i64, ptr)> - -// Compute size in bytes to allocate result ranked descriptor -// CHECK: [[C1:%.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: [[C2:%.*]] = llvm.mlir.constant(2 : index) : i64 -// CHECK: [[INDEX_SIZE:%.*]] = llvm.mlir.constant(8 : index) : i64 -// CHECK: [[PTR_SIZE:%.*]] = llvm.mlir.constant(8 : index) : i64 -// CHECK: [[DOUBLE_PTR_SIZE:%.*]] = llvm.mul [[C2]], [[PTR_SIZE]] : i64 -// CHECK: [[DESC_ALLOC_SIZE:%.*]] = llvm.add [[DOUBLE_PTR_SIZE]], %{{.*}} -// CHECK: [[UNDERLYING_DESC:%.*]] = llvm.alloca [[DESC_ALLOC_SIZE]] x i8 -// CHECK: llvm.insertvalue [[UNDERLYING_DESC]], [[UNRANKED_OUT_1]][1] - -// Set allocated, aligned pointers and offset. -// CHECK: [[ALLOC_PTR:%.*]] = llvm.extractvalue [[INPUT]][0] : [[INPUT_TY]] -// CHECK: [[ALIGN_PTR:%.*]] = llvm.extractvalue [[INPUT]][1] : [[INPUT_TY]] -// CHECK: [[OFFSET:%.*]] = llvm.extractvalue [[INPUT]][2] : [[INPUT_TY]] -// CHECK: [[BASE_PTR_PTR:%.*]] = llvm.bitcast [[UNDERLYING_DESC]] -// CHECK-SAME: !llvm.ptr to !llvm.ptr> -// CHECK: llvm.store [[ALLOC_PTR]], [[BASE_PTR_PTR]] : !llvm.ptr> -// CHECK: [[BASE_PTR_PTR_:%.*]] = llvm.bitcast [[UNDERLYING_DESC]] : !llvm.ptr to !llvm.ptr> -// CHECK: [[ALIGNED_PTR_PTR:%.*]] = llvm.getelementptr [[BASE_PTR_PTR_]]{{\[}}1] -// CHECK: llvm.store [[ALIGN_PTR]], [[ALIGNED_PTR_PTR]] : !llvm.ptr> -// CHECK: [[BASE_PTR_PTR__:%.*]] = llvm.bitcast [[UNDERLYING_DESC]] : !llvm.ptr to !llvm.ptr> -// CHECK: [[OFFSET_PTR_:%.*]] = llvm.getelementptr [[BASE_PTR_PTR__]]{{\[}}2] -// CHECK: [[OFFSET_PTR:%.*]] = llvm.bitcast [[OFFSET_PTR_]] -// CHECK: llvm.store [[OFFSET]], [[OFFSET_PTR]] : !llvm.ptr - -// Iterate over shape operand in reverse order and set sizes and strides. -// CHECK: [[STRUCT_PTR:%.*]] = llvm.bitcast [[UNDERLYING_DESC]] -// CHECK-SAME: !llvm.ptr to !llvm.ptr, ptr, i64, i64)>> -// CHECK: [[SIZES_PTR:%.*]] = llvm.getelementptr [[STRUCT_PTR]]{{\[}}0, 3] -// CHECK: [[STRIDES_PTR:%.*]] = llvm.getelementptr [[SIZES_PTR]]{{\[}}[[RANK]]] -// CHECK: [[SHAPE_IN_PTR:%.*]] = llvm.extractvalue [[SHAPE]][1] : [[SHAPE_TY]] -// CHECK: [[C1_:%.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: [[RANK_MIN_1:%.*]] = llvm.sub [[RANK]], [[C1_]] : i64 -// CHECK: llvm.br ^bb1([[RANK_MIN_1]], [[C1_]] : i64, i64) - -// CHECK: ^bb1([[DIM:%.*]]: i64, [[CUR_STRIDE:%.*]]: i64): -// CHECK: [[C0_:%.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: [[COND:%.*]] = llvm.icmp "sge" [[DIM]], [[C0_]] : i64 -// CHECK: llvm.cond_br [[COND]], ^bb2, ^bb3 - -// CHECK: ^bb2: -// CHECK: [[SIZE_PTR:%.*]] = llvm.getelementptr [[SHAPE_IN_PTR]]{{\[}}[[DIM]]] -// CHECK: [[SIZE:%.*]] = llvm.load [[SIZE_PTR]] : !llvm.ptr -// CHECK: [[TARGET_SIZE_PTR:%.*]] = llvm.getelementptr [[SIZES_PTR]]{{\[}}[[DIM]]] -// CHECK: llvm.store [[SIZE]], [[TARGET_SIZE_PTR]] : !llvm.ptr -// CHECK: [[TARGET_STRIDE_PTR:%.*]] = llvm.getelementptr [[STRIDES_PTR]]{{\[}}[[DIM]]] -// CHECK: llvm.store [[CUR_STRIDE]], [[TARGET_STRIDE_PTR]] : !llvm.ptr -// CHECK: [[UPDATE_STRIDE:%.*]] = llvm.mul [[CUR_STRIDE]], [[SIZE]] : i64 -// CHECK: [[STRIDE_COND:%.*]] = llvm.sub [[DIM]], [[C1_]] : i64 -// CHECK: llvm.br ^bb1([[STRIDE_COND]], [[UPDATE_STRIDE]] : i64, i64) - -// CHECK: ^bb3: -// CHECK: return diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir index 123a661193c490..745cbdbd515325 100644 --- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir +++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-nvgpu-to-nvvm='use-opaque-pointers=1' | FileCheck %s +// RUN: mlir-opt %s -convert-nvgpu-to-nvvm | FileCheck %s // RUN: mlir-opt %s -transform-interpreter | FileCheck %s // CHECK-LABEL: @m16n8k16_fp16 diff --git a/mlir/test/Conversion/NVGPUToNVVM/typed-pointers.mlir b/mlir/test/Conversion/NVGPUToNVVM/typed-pointers.mlir deleted file mode 100644 index 1a37f1c046cf66..00000000000000 --- a/mlir/test/Conversion/NVGPUToNVVM/typed-pointers.mlir +++ /dev/null @@ -1,59 +0,0 @@ -// RUN: mlir-opt --convert-nvgpu-to-nvvm='use-opaque-pointers=0' --split-input-file %s | FileCheck %s - -// CHECK-LABEL: @async_cp( -// CHECK-SAME: %[[IDX:[a-zA-Z0-9_]+]]: index) -func.func @async_cp( - %src: memref<128x128xf32>, %dst: memref<3x16x128xf32, 3>, %i : index) { - // CHECK: %[[IDX1:.*]] = builtin.unrealized_conversion_cast %[[IDX]] : index to i64 - // CHECK-DAG: %[[BASEDST:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> - // CHECK-DAG: %[[S0:.*]] = llvm.mlir.constant(2048 : index) : i64 - // CHECK-DAG: %[[LI:.*]] = llvm.mul %[[IDX1]], %[[S0]] : i64 - // CHECK-DAG: %[[S1:.*]] = llvm.mlir.constant(128 : index) : i64 - // CHECK-DAG: %[[FI0:.*]] = llvm.mul %[[IDX1]], %[[S1]] : i64 - // CHECK-DAG: %[[FI1:.*]] = llvm.add %[[LI]], %[[FI0]] : i64 - // CHECK-DAG: %[[FI2:.*]] = llvm.add %[[FI1]], %[[IDX1]] : i64 - // CHECK-DAG: %[[ADDRESSDST:.*]] = llvm.getelementptr %[[BASEDST]][%[[FI2]]] : (!llvm.ptr, i64) -> !llvm.ptr - // CHECK-DAG: %[[CAST0:.*]] = llvm.bitcast %[[ADDRESSDST]] : !llvm.ptr to !llvm.ptr - // CHECK-DAG: %[[BASESRC:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> - // CHECK-DAG: %[[S3:.*]] = llvm.mlir.constant(128 : index) : i64 - // CHECK-DAG: %[[FI3:.*]] = llvm.mul %[[IDX1]], %[[S3]] : i64 - // CHECK-DAG: %[[FI4:.*]] = llvm.add %[[FI3]], %[[IDX1]] : i64 - // CHECK-DAG: %[[ADDRESSSRC:.*]] = llvm.getelementptr %[[BASESRC]][%[[FI4]]] : (!llvm.ptr, i64) -> !llvm.ptr - // CHECK-DAG: %[[CAST1:.*]] = llvm.bitcast %[[ADDRESSSRC]] : !llvm.ptr to !llvm.ptr - // CHECK-DAG: %[[CAST2:.*]] = llvm.addrspacecast %[[CAST1]] : !llvm.ptr to !llvm.ptr - // CHECK-DAG: nvvm.cp.async.shared.global %[[CAST0]], %[[CAST2]], 16, cache = ca - %0 = nvgpu.device_async_copy %src[%i, %i], %dst[%i, %i, %i], 4 : memref<128x128xf32> to memref<3x16x128xf32, 3> - // CHECK: nvvm.cp.async.commit.group - %1 = nvgpu.device_async_create_group %0 - // CHECK: nvvm.cp.async.wait.group 1 - nvgpu.device_async_wait %1 { numGroups = 1 : i32 } - - // CHECK: nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16, cache = cg - %2 = nvgpu.device_async_copy %src[%i, %i], %dst[%i, %i, %i], 4 {bypassL1}: memref<128x128xf32> to memref<3x16x128xf32, 3> - return -} - -// ----- - -// CHECK-LABEL: @async_cp_i4( -// CHECK-SAME: %[[IDX:[a-zA-Z0-9_]+]]: index) -func.func @async_cp_i4( - %src: memref<128x64xi4>, %dst: memref<128x128xi4, 3>, %i : index) -> !nvgpu.device.async.token { - // CHECK: %[[IDX1:.*]] = builtin.unrealized_conversion_cast %[[IDX]] : index to i64 - // CHECK-DAG: %[[BASEDST:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> - // CHECK-DAG: %[[S0:.*]] = llvm.mlir.constant(128 : index) : i64 - // CHECK-DAG: %[[LI:.*]] = llvm.mul %[[IDX1]], %[[S0]] : i64 - // CHECK-DAG: %[[FI1:.*]] = llvm.add %[[LI]], %[[IDX1]] : i64 - // CHECK-DAG: %[[ADDRESSDST:.*]] = llvm.getelementptr %[[BASEDST]][%[[FI1]]] : (!llvm.ptr, i64) -> !llvm.ptr - // CHECK-DAG: %[[CAST0:.*]] = llvm.bitcast %[[ADDRESSDST]] : !llvm.ptr to !llvm.ptr - // CHECK-DAG: %[[BASESRC:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> - // CHECK-DAG: %[[S2:.*]] = llvm.mlir.constant(64 : index) : i64 - // CHECK-DAG: %[[FI2:.*]] = llvm.mul %[[IDX1]], %[[S2]] : i64 - // CHECK-DAG: %[[FI3:.*]] = llvm.add %[[FI2]], %[[IDX1]] : i64 - // CHECK-DAG: %[[ADDRESSSRC:.*]] = llvm.getelementptr %[[BASESRC]][%[[FI3]]] : (!llvm.ptr, i64) -> !llvm.ptr - // CHECK-DAG: %[[CAST1:.*]] = llvm.bitcast %[[ADDRESSSRC]] : !llvm.ptr to !llvm.ptr - // CHECK-DAG: %[[CAST2:.*]] = llvm.addrspacecast %[[CAST1]] : !llvm.ptr to !llvm.ptr - // CHECK-DAG: nvvm.cp.async.shared.global %[[CAST0]], %[[CAST2]], 16, cache = ca - %0 = nvgpu.device_async_copy %src[%i, %i], %dst[%i, %i], 32 : memref<128x64xi4> to memref<128x128xi4, 3> - return %0 : !nvgpu.device.async.token -} diff --git a/mlir/test/Conversion/SCFToOpenMP/reductions.mlir b/mlir/test/Conversion/SCFToOpenMP/reductions.mlir index c5723d96d4f1de..25b18b58a6adbd 100644 --- a/mlir/test/Conversion/SCFToOpenMP/reductions.mlir +++ b/mlir/test/Conversion/SCFToOpenMP/reductions.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -convert-scf-to-openmp='use-opaque-pointers=1' -split-input-file %s | FileCheck %s +// RUN: mlir-opt -convert-scf-to-openmp -split-input-file %s | FileCheck %s // CHECK: omp.reduction.declare @[[$REDF:.*]] : f32 diff --git a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir index 508052d483ec1d..e0fdcae1b896b8 100644 --- a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir +++ b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -convert-scf-to-openmp='use-opaque-pointers=1' %s | FileCheck %s +// RUN: mlir-opt -convert-scf-to-openmp %s | FileCheck %s // CHECK-LABEL: @parallel func.func @parallel(%arg0: index, %arg1: index, %arg2: index, diff --git a/mlir/test/Conversion/SCFToOpenMP/typed-pointers.mlir b/mlir/test/Conversion/SCFToOpenMP/typed-pointers.mlir deleted file mode 100644 index fb90c5d7d10fd1..00000000000000 --- a/mlir/test/Conversion/SCFToOpenMP/typed-pointers.mlir +++ /dev/null @@ -1,78 +0,0 @@ -// RUN: mlir-opt -convert-scf-to-openmp='use-opaque-pointers=0' -split-input-file %s | FileCheck %s - -// CHECK: omp.reduction.declare @[[$REDF1:.*]] : f32 - -// CHECK: init -// CHECK: %[[INIT:.*]] = llvm.mlir.constant(-3.4 -// CHECK: omp.yield(%[[INIT]] : f32) - -// CHECK: combiner -// CHECK: ^{{.*}}(%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32) -// CHECK: %[[CMP:.*]] = arith.cmpf oge, %[[ARG0]], %[[ARG1]] -// CHECK: %[[RES:.*]] = arith.select %[[CMP]], %[[ARG0]], %[[ARG1]] -// CHECK: omp.yield(%[[RES]] : f32) - -// CHECK-NOT: atomic - -// CHECK: omp.reduction.declare @[[$REDF2:.*]] : i64 - -// CHECK: init -// CHECK: %[[INIT:.*]] = llvm.mlir.constant -// CHECK: omp.yield(%[[INIT]] : i64) - -// CHECK: combiner -// CHECK: ^{{.*}}(%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64) -// CHECK: %[[CMP:.*]] = arith.cmpi slt, %[[ARG0]], %[[ARG1]] -// CHECK: %[[RES:.*]] = arith.select %[[CMP]], %[[ARG1]], %[[ARG0]] -// CHECK: omp.yield(%[[RES]] : i64) - -// CHECK: atomic -// CHECK: ^{{.*}}(%[[ARG0:.*]]: !llvm.ptr, %[[ARG1:.*]]: !llvm.ptr): -// CHECK: %[[RHS:.*]] = llvm.load %[[ARG1]] -// CHECK: llvm.atomicrmw max %[[ARG0]], %[[RHS]] monotonic - -// CHECK-LABEL: @reduction4 -func.func @reduction4(%arg0 : index, %arg1 : index, %arg2 : index, - %arg3 : index, %arg4 : index) -> (f32, i64) { - %step = arith.constant 1 : index - // CHECK: %[[ZERO:.*]] = arith.constant 0.0 - %zero = arith.constant 0.0 : f32 - // CHECK: %[[IONE:.*]] = arith.constant 1 - %ione = arith.constant 1 : i64 - // CHECK: %[[BUF1:.*]] = llvm.alloca %{{.*}} x f32 - // CHECK: llvm.store %[[ZERO]], %[[BUF1]] - // CHECK: %[[BUF2:.*]] = llvm.alloca %{{.*}} x i64 - // CHECK: llvm.store %[[IONE]], %[[BUF2]] - - // CHECK: omp.parallel - // CHECK: omp.wsloop - // CHECK-SAME: reduction(@[[$REDF1]] -> %[[BUF1]] - // CHECK-SAME: @[[$REDF2]] -> %[[BUF2]] - // CHECK: memref.alloca_scope - %res:2 = scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) - step (%arg4, %step) init (%zero, %ione) -> (f32, i64) { - %one = arith.constant 1.0 : f32 - // CHECK: omp.reduction %{{.*}}, %[[BUF1]] - scf.reduce(%one) : f32 { - ^bb0(%lhs : f32, %rhs: f32): - %cmp = arith.cmpf oge, %lhs, %rhs : f32 - %res = arith.select %cmp, %lhs, %rhs : f32 - scf.reduce.return %res : f32 - } - // CHECK: arith.fptosi - %1 = arith.fptosi %one : f32 to i64 - // CHECK: omp.reduction %{{.*}}, %[[BUF2]] - scf.reduce(%1) : i64 { - ^bb1(%lhs: i64, %rhs: i64): - %cmp = arith.cmpi slt, %lhs, %rhs : i64 - %res = arith.select %cmp, %rhs, %lhs : i64 - scf.reduce.return %res : i64 - } - // CHECK: omp.yield - } - // CHECK: omp.terminator - // CHECK: %[[RES1:.*]] = llvm.load %[[BUF1]] : !llvm.ptr - // CHECK: %[[RES2:.*]] = llvm.load %[[BUF2]] : !llvm.ptr - // CHECK: return %[[RES1]], %[[RES2]] - return %res#0, %res#1 : f32, i64 -} diff --git a/mlir/test/Dialect/ArmSME/arm-sme-to-llvm.mlir b/mlir/test/Dialect/ArmSME/arm-sme-to-llvm.mlir index 30ddb3c4686018..8fdcf69958244f 100644 --- a/mlir/test/Dialect/ArmSME/arm-sme-to-llvm.mlir +++ b/mlir/test/Dialect/ArmSME/arm-sme-to-llvm.mlir @@ -209,8 +209,8 @@ func.func @arm_sme_load_tile_slice_ver_f64(%src : memref, %mask : vecto // CHECK-LABEL: func.func @arm_sme_store_tile_slice_hor_i8( // CHECK-SAME: %[[TILE:.*]]: vector<[16]x[16]xi8>, // CHECK-SAME: %[[TILE_SLICE_INDEX:.*]]: index, +// CHECK-SAME: %[[MASK:.*]]: vector<[16]xi1>, // CHECK-SAME: %[[DEST:.*]]: memref) { -// CHECK: %[[PTRUE_B:.*]] = arith.constant dense : vector<[16]xi1> // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[MEM_DESC:.*]] = builtin.unrealized_conversion_cast %[[DEST]] : memref to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[C0_I64:.*]] = builtin.unrealized_conversion_cast %[[C0]] : index to i64 @@ -221,12 +221,12 @@ func.func @arm_sme_load_tile_slice_ver_f64(%src : memref, %mask : vecto // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALIGNED_BASE]]{{\[}}%[[OFFSET]]] : (!llvm.ptr, i64) -> !llvm.ptr, i8 // CHECK: %[[TILE_SLICE_INDEX_I32:.*]] = arith.index_castui %[[TILE_SLICE_INDEX]] : index to i32 // CHECK: %[[TILE_ID_I32:.*]] = arith.extui %[[TILE_ID]] : i8 to i32 -// CHECK: "arm_sme.intr.st1b.horiz"(%[[PTRUE_B]], %[[GEP]], %[[TILE_ID_I32]], %[[TILE_SLICE_INDEX_I32]]) : (vector<[16]xi1>, !llvm.ptr, i32, i32) -> () +// CHECK: "arm_sme.intr.st1b.horiz"(%[[MASK]], %[[GEP]], %[[TILE_ID_I32]], %[[TILE_SLICE_INDEX_I32]]) : (vector<[16]xi1>, !llvm.ptr, i32, i32) -> () // CHECK: return // CHECK: } -func.func @arm_sme_store_tile_slice_hor_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %dest : memref) -> () { +func.func @arm_sme_store_tile_slice_hor_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %mask : vector<[16]xi1>, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] : memref, vector<[16]x[16]xi8> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref, vector<[16]xi1>, vector<[16]x[16]xi8> return } @@ -234,9 +234,9 @@ func.func @arm_sme_store_tile_slice_hor_i8(%tile : vector<[16]x[16]xi8>, %tile_s // CHECK-LABEL: @arm_sme_store_tile_slice_hor_i16 // CHECK: "arm_sme.intr.st1h.horiz"({{.*}}) : (vector<[8]xi1>, !llvm.ptr, i32, i32) -> () -func.func @arm_sme_store_tile_slice_hor_i16(%tile : vector<[8]x[8]xi16>, %tile_slice_index : index, %dest : memref) -> () { +func.func @arm_sme_store_tile_slice_hor_i16(%tile : vector<[8]x[8]xi16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] : memref, vector<[8]x[8]xi16> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref, vector<[8]xi1>, vector<[8]x[8]xi16> return } @@ -244,9 +244,9 @@ func.func @arm_sme_store_tile_slice_hor_i16(%tile : vector<[8]x[8]xi16>, %tile_s // CHECK-LABEL: @arm_sme_store_tile_slice_hor_i32 // CHECK: "arm_sme.intr.st1w.horiz"({{.*}}) : (vector<[4]xi1>, !llvm.ptr, i32, i32) -> () -func.func @arm_sme_store_tile_slice_hor_i32(%tile : vector<[4]x[4]xi32>, %tile_slice_index : index, %dest : memref) -> () { +func.func @arm_sme_store_tile_slice_hor_i32(%tile : vector<[4]x[4]xi32>, %tile_slice_index : index, %mask : vector<[4]xi1>, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] : memref, vector<[4]x[4]xi32> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref, vector<[4]xi1>, vector<[4]x[4]xi32> return } @@ -254,9 +254,9 @@ func.func @arm_sme_store_tile_slice_hor_i32(%tile : vector<[4]x[4]xi32>, %tile_s // CHECK-LABEL: @arm_sme_store_tile_slice_hor_i64 // CHECK: "arm_sme.intr.st1d.horiz"({{.*}}) : (vector<[2]xi1>, !llvm.ptr, i32, i32) -> () -func.func @arm_sme_store_tile_slice_hor_i64(%tile : vector<[2]x[2]xi64>, %tile_slice_index : index, %dest : memref) -> () { +func.func @arm_sme_store_tile_slice_hor_i64(%tile : vector<[2]x[2]xi64>, %tile_slice_index : index, %mask : vector<[2]xi1>, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] : memref, vector<[2]x[2]xi64> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref, vector<[2]xi1>, vector<[2]x[2]xi64> return } @@ -264,9 +264,9 @@ func.func @arm_sme_store_tile_slice_hor_i64(%tile : vector<[2]x[2]xi64>, %tile_s // CHECK-LABEL: @arm_sme_store_tile_slice_hor_i128 // CHECK: "arm_sme.intr.st1q.horiz"({{.*}}) : (vector<[1]xi1>, !llvm.ptr, i32, i32) -> () -func.func @arm_sme_store_tile_slice_hor_i128(%tile : vector<[1]x[1]xi128>, %tile_slice_index : index, %dest : memref) -> () { +func.func @arm_sme_store_tile_slice_hor_i128(%tile : vector<[1]x[1]xi128>, %tile_slice_index : index, %mask : vector<[1]xi1>, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] : memref, vector<[1]x[1]xi128> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref, vector<[1]xi1>, vector<[1]x[1]xi128> return } @@ -274,9 +274,9 @@ func.func @arm_sme_store_tile_slice_hor_i128(%tile : vector<[1]x[1]xi128>, %tile // CHECK-LABEL: @arm_sme_store_tile_slice_hor_f16 // CHECK: "arm_sme.intr.st1h.horiz"({{.*}}) : (vector<[8]xi1>, !llvm.ptr, i32, i32) -> () -func.func @arm_sme_store_tile_slice_hor_f16(%tile : vector<[8]x[8]xf16>, %tile_slice_index : index, %dest : memref) -> () { +func.func @arm_sme_store_tile_slice_hor_f16(%tile : vector<[8]x[8]xf16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] : memref, vector<[8]x[8]xf16> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref, vector<[8]xi1>, vector<[8]x[8]xf16> return } @@ -284,9 +284,9 @@ func.func @arm_sme_store_tile_slice_hor_f16(%tile : vector<[8]x[8]xf16>, %tile_s // CHECK-LABEL: @arm_sme_store_tile_slice_hor_bf16 // CHECK: "arm_sme.intr.st1h.horiz"({{.*}}) : (vector<[8]xi1>, !llvm.ptr, i32, i32) -> () -func.func @arm_sme_store_tile_slice_hor_bf16(%tile : vector<[8]x[8]xbf16>, %tile_slice_index : index, %dest : memref) -> () { +func.func @arm_sme_store_tile_slice_hor_bf16(%tile : vector<[8]x[8]xbf16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] : memref, vector<[8]x[8]xbf16> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref, vector<[8]xi1>, vector<[8]x[8]xbf16> return } @@ -294,9 +294,9 @@ func.func @arm_sme_store_tile_slice_hor_bf16(%tile : vector<[8]x[8]xbf16>, %tile // CHECK-LABEL: @arm_sme_store_tile_slice_hor_f32 // CHECK: "arm_sme.intr.st1w.horiz"({{.*}}) : (vector<[4]xi1>, !llvm.ptr, i32, i32) -> () -func.func @arm_sme_store_tile_slice_hor_f32(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index, %dest : memref) -> () { +func.func @arm_sme_store_tile_slice_hor_f32(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index, %mask : vector<[4]xi1>, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] : memref, vector<[4]x[4]xf32> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref, vector<[4]xi1>, vector<[4]x[4]xf32> return } @@ -304,9 +304,9 @@ func.func @arm_sme_store_tile_slice_hor_f32(%tile : vector<[4]x[4]xf32>, %tile_s // CHECK-LABEL: @arm_sme_store_tile_slice_hor_f64 // CHECK: "arm_sme.intr.st1d.horiz"({{.*}}) : (vector<[2]xi1>, !llvm.ptr, i32, i32) -> () -func.func @arm_sme_store_tile_slice_hor_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index, %dest : memref) -> () { +func.func @arm_sme_store_tile_slice_hor_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index, %mask : vector<[2]xi1>, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] : memref, vector<[2]x[2]xf64> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref, vector<[2]xi1>, vector<[2]x[2]xf64> return } @@ -314,9 +314,9 @@ func.func @arm_sme_store_tile_slice_hor_f64(%tile : vector<[2]x[2]xf64>, %tile_s // CHECK-LABEL: @arm_sme_store_tile_slice_ver_i8 // CHECK: "arm_sme.intr.st1b.vert"({{.*}}) : (vector<[16]xi1>, !llvm.ptr, i32, i32) -> () -func.func @arm_sme_store_tile_slice_ver_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %dest : memref) -> () { +func.func @arm_sme_store_tile_slice_ver_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %mask : vector<[16]xi1>, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[16]x[16]xi8> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout : memref, vector<[16]xi1>, vector<[16]x[16]xi8> return } @@ -324,9 +324,9 @@ func.func @arm_sme_store_tile_slice_ver_i8(%tile : vector<[16]x[16]xi8>, %tile_s // CHECK-LABEL: @arm_sme_store_tile_slice_ver_i16 // CHECK: "arm_sme.intr.st1h.vert"({{.*}}) : (vector<[8]xi1>, !llvm.ptr, i32, i32) -> () -func.func @arm_sme_store_tile_slice_ver_i16(%tile : vector<[8]x[8]xi16>, %tile_slice_index : index, %dest : memref) -> () { +func.func @arm_sme_store_tile_slice_ver_i16(%tile : vector<[8]x[8]xi16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[8]x[8]xi16> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout : memref, vector<[8]xi1>, vector<[8]x[8]xi16> return } @@ -334,9 +334,9 @@ func.func @arm_sme_store_tile_slice_ver_i16(%tile : vector<[8]x[8]xi16>, %tile_s // CHECK-LABEL: @arm_sme_store_tile_slice_ver_i32 // CHECK: "arm_sme.intr.st1w.vert"({{.*}}) : (vector<[4]xi1>, !llvm.ptr, i32, i32) -> () -func.func @arm_sme_store_tile_slice_ver_i32(%tile : vector<[4]x[4]xi32>, %tile_slice_index : index, %dest : memref) -> () { +func.func @arm_sme_store_tile_slice_ver_i32(%tile : vector<[4]x[4]xi32>, %tile_slice_index : index, %mask : vector<[4]xi1>, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[4]x[4]xi32> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout : memref, vector<[4]xi1>, vector<[4]x[4]xi32> return } @@ -344,9 +344,9 @@ func.func @arm_sme_store_tile_slice_ver_i32(%tile : vector<[4]x[4]xi32>, %tile_s // CHECK-LABEL: @arm_sme_store_tile_slice_ver_i64 // CHECK: "arm_sme.intr.st1d.vert"({{.*}}) : (vector<[2]xi1>, !llvm.ptr, i32, i32) -> () -func.func @arm_sme_store_tile_slice_ver_i64(%tile : vector<[2]x[2]xi64>, %tile_slice_index : index, %dest : memref) -> () { +func.func @arm_sme_store_tile_slice_ver_i64(%tile : vector<[2]x[2]xi64>, %tile_slice_index : index, %mask : vector<[2]xi1>, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[2]x[2]xi64> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout : memref, vector<[2]xi1>, vector<[2]x[2]xi64> return } @@ -354,9 +354,9 @@ func.func @arm_sme_store_tile_slice_ver_i64(%tile : vector<[2]x[2]xi64>, %tile_s // CHECK-LABEL: @arm_sme_store_tile_slice_ver_i128 // CHECK: "arm_sme.intr.st1q.vert"({{.*}}) : (vector<[1]xi1>, !llvm.ptr, i32, i32) -> () -func.func @arm_sme_store_tile_slice_ver_i128(%tile : vector<[1]x[1]xi128>, %tile_slice_index : index, %dest : memref) -> () { +func.func @arm_sme_store_tile_slice_ver_i128(%tile : vector<[1]x[1]xi128>, %tile_slice_index : index, %mask : vector<[1]xi1>, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[1]x[1]xi128> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout : memref, vector<[1]xi1>, vector<[1]x[1]xi128> return } @@ -364,9 +364,9 @@ func.func @arm_sme_store_tile_slice_ver_i128(%tile : vector<[1]x[1]xi128>, %tile // CHECK-LABEL: @arm_sme_store_tile_slice_ver_f16 // CHECK: "arm_sme.intr.st1h.vert"({{.*}}) : (vector<[8]xi1>, !llvm.ptr, i32, i32) -> () -func.func @arm_sme_store_tile_slice_ver_f16(%tile : vector<[8]x[8]xf16>, %tile_slice_index : index, %dest : memref) -> () { +func.func @arm_sme_store_tile_slice_ver_f16(%tile : vector<[8]x[8]xf16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[8]x[8]xf16> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout : memref, vector<[8]xi1>, vector<[8]x[8]xf16> return } @@ -374,9 +374,9 @@ func.func @arm_sme_store_tile_slice_ver_f16(%tile : vector<[8]x[8]xf16>, %tile_s // CHECK-LABEL: @arm_sme_store_tile_slice_ver_bf16 // CHECK: "arm_sme.intr.st1h.vert"({{.*}}) : (vector<[8]xi1>, !llvm.ptr, i32, i32) -> () -func.func @arm_sme_store_tile_slice_ver_bf16(%tile : vector<[8]x[8]xbf16>, %tile_slice_index : index, %dest : memref) -> () { +func.func @arm_sme_store_tile_slice_ver_bf16(%tile : vector<[8]x[8]xbf16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[8]x[8]xbf16> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout : memref, vector<[8]xi1>, vector<[8]x[8]xbf16> return } @@ -384,9 +384,9 @@ func.func @arm_sme_store_tile_slice_ver_bf16(%tile : vector<[8]x[8]xbf16>, %tile // CHECK-LABEL: @arm_sme_store_tile_slice_ver_f32 // CHECK: "arm_sme.intr.st1w.vert"({{.*}}) : (vector<[4]xi1>, !llvm.ptr, i32, i32) -> () -func.func @arm_sme_store_tile_slice_ver_f32(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index, %dest : memref) -> () { +func.func @arm_sme_store_tile_slice_ver_f32(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index, %mask : vector<[4]xi1>, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[4]x[4]xf32> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout : memref, vector<[4]xi1>, vector<[4]x[4]xf32> return } @@ -394,9 +394,9 @@ func.func @arm_sme_store_tile_slice_ver_f32(%tile : vector<[4]x[4]xf32>, %tile_s // CHECK-LABEL: @arm_sme_store_tile_slice_ver_f64 // CHECK: "arm_sme.intr.st1d.vert"({{.*}}) : (vector<[2]xi1>, !llvm.ptr, i32, i32) -> () -func.func @arm_sme_store_tile_slice_ver_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index, %dest : memref) -> () { +func.func @arm_sme_store_tile_slice_ver_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index, %mask : vector<[2]xi1>, %dest : memref) -> () { %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[2]x[2]xf64> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout : memref, vector<[2]xi1>, vector<[2]x[2]xf64> return } diff --git a/mlir/test/Dialect/ArmSME/invalid.mlir b/mlir/test/Dialect/ArmSME/invalid.mlir index 588b8e891fadde..d35eb7f7bccc75 100644 --- a/mlir/test/Dialect/ArmSME/invalid.mlir +++ b/mlir/test/Dialect/ArmSME/invalid.mlir @@ -15,7 +15,7 @@ func.func @arm_sme_cast_tile_to_vector__bad_tile_id_bitwidth(%tile_id : i8) -> v // ----- func.func @arm_sme_cast_tile_to_vector__bad_vector_type_rank_1(%tile_id : i8) -> vector<[16]xi8> { - // expected-error@+1 {{op result #0 must be vector<[16]x[16]xi8> of 8-bit signless integer values or vector<[8]x[8]xi16> of 16-bit signless integer values or vector<[4]x[4]xi32> of 32-bit signless integer values or vector<[2]x[2]xi64> of 64-bit signless integer values or vector<[1]x[1]xi128> of 128-bit signless integer values or vector<[8]x[8]xf16> of 16-bit float values or vector<[8]x[8]xbf16> of bfloat16 type values or vector<[4]x[4]xf32> of 32-bit float values or vector<[2]x[2]xf64> of 64-bit float values, but got 'vector<[16]xi8>'}} + // expected-error@+1 {{op result #0 must be a vector type that fits into a SME tile, but got 'vector<[16]xi8>'}} %0 = arm_sme.cast_tile_to_vector %tile_id : i8 to vector<[16]xi8> return %0 : vector<[16]xi8> } @@ -23,7 +23,7 @@ func.func @arm_sme_cast_tile_to_vector__bad_vector_type_rank_1(%tile_id : i8) -> // ----- func.func @arm_sme_cast_tile_to_vector__bad_vector_type_i4(%tile_id : i8) -> vector<[16]x[16]xi4> { - // expected-error@+1 {{op result #0 must be vector<[16]x[16]xi8> of 8-bit signless integer values or vector<[8]x[8]xi16> of 16-bit signless integer values or vector<[4]x[4]xi32> of 32-bit signless integer values or vector<[2]x[2]xi64> of 64-bit signless integer values or vector<[1]x[1]xi128> of 128-bit signless integer values or vector<[8]x[8]xf16> of 16-bit float values or vector<[8]x[8]xbf16> of bfloat16 type values or vector<[4]x[4]xf32> of 32-bit float values or vector<[2]x[2]xf64> of 64-bit float values, but got 'vector<[16]x[16]xi4>'}} + // expected-error@+1 {{op result #0 must be a vector type that fits into a SME tile, but got 'vector<[16]x[16]xi4>'}} %0 = arm_sme.cast_tile_to_vector %tile_id : i8 to vector<[16]x[16]xi4> return %0 : vector<[16]x[16]xi4> } @@ -31,7 +31,7 @@ func.func @arm_sme_cast_tile_to_vector__bad_vector_type_i4(%tile_id : i8) -> vec // ----- func.func @arm_sme_cast_tile_to_vector__bad_vector_type_non_scalable_dim_0(%tile_id : i8) -> vector<16x[16]xi8> { - // expected-error@+1 {{op result #0 must be vector<[16]x[16]xi8> of 8-bit signless integer values or vector<[8]x[8]xi16> of 16-bit signless integer values or vector<[4]x[4]xi32> of 32-bit signless integer values or vector<[2]x[2]xi64> of 64-bit signless integer values or vector<[1]x[1]xi128> of 128-bit signless integer values or vector<[8]x[8]xf16> of 16-bit float values or vector<[8]x[8]xbf16> of bfloat16 type values or vector<[4]x[4]xf32> of 32-bit float values or vector<[2]x[2]xf64> of 64-bit float values, but got 'vector<16x[16]xi8>'}} + // expected-error@+1 {{op result #0 must be a vector type that fits into a SME tile, but got 'vector<16x[16]xi8>'}} %0 = arm_sme.cast_tile_to_vector %tile_id : i8 to vector<16x[16]xi8> return %0 : vector<16x[16]xi8> } @@ -39,7 +39,7 @@ func.func @arm_sme_cast_tile_to_vector__bad_vector_type_non_scalable_dim_0(%tile // ----- func.func @arm_sme_cast_tile_to_vector__bad_vector_type_non_scalable_dim_1(%tile_id : i8) -> vector<[16]x16xi8> { - // expected-error@+1 {{op result #0 must be vector<[16]x[16]xi8> of 8-bit signless integer values or vector<[8]x[8]xi16> of 16-bit signless integer values or vector<[4]x[4]xi32> of 32-bit signless integer values or vector<[2]x[2]xi64> of 64-bit signless integer values or vector<[1]x[1]xi128> of 128-bit signless integer values or vector<[8]x[8]xf16> of 16-bit float values or vector<[8]x[8]xbf16> of bfloat16 type values or vector<[4]x[4]xf32> of 32-bit float values or vector<[2]x[2]xf64> of 64-bit float values, but got 'vector<[16]x16xi8>'}} + // expected-error@+1 {{op result #0 must be a vector type that fits into a SME tile, but got 'vector<[16]x16xi8>'}} %0 = arm_sme.cast_tile_to_vector %tile_id : i8 to vector<[16]x16xi8> return %0 : vector<[16]x16xi8> } @@ -47,7 +47,7 @@ func.func @arm_sme_cast_tile_to_vector__bad_vector_type_non_scalable_dim_1(%tile // ----- func.func @arm_sme_cast_tile_to_vector_bad_shape(%tile_id : i8) -> vector<[4]x[16]xi8> { - // expected-error@+1 {{op result #0 must be vector<[16]x[16]xi8> of 8-bit signless integer values or vector<[8]x[8]xi16> of 16-bit signless integer values or vector<[4]x[4]xi32> of 32-bit signless integer values or vector<[2]x[2]xi64> of 64-bit signless integer values or vector<[1]x[1]xi128> of 128-bit signless integer values or vector<[8]x[8]xf16> of 16-bit float values or vector<[8]x[8]xbf16> of bfloat16 type values or vector<[4]x[4]xf32> of 32-bit float values or vector<[2]x[2]xf64> of 64-bit float values, but got 'vector<[4]x[16]xi8>'}} + // expected-error@+1 {{op result #0 must be a vector type that fits into a SME tile, but got 'vector<[4]x[16]xi8>'}} %0 = arm_sme.cast_tile_to_vector %tile_id : i8 to vector<[4]x[16]xi8> return %0 : vector<[4]x[16]xi8> } @@ -67,7 +67,7 @@ func.func @arm_sme_cast_vector_to_tile__bad_tile_id_bitwidth(%vector : vector<[1 // ----- func.func @arm_sme_cast_vector_to_tile__bad_rank_1d(%vector : vector<[16]xi8>) -> i8 { - // expected-error@+1 {{op operand #0 must be vector<[16]x[16]xi8> of 8-bit signless integer values or vector<[8]x[8]xi16> of 16-bit signless integer values or vector<[4]x[4]xi32> of 32-bit signless integer values or vector<[2]x[2]xi64> of 64-bit signless integer values or vector<[1]x[1]xi128> of 128-bit signless integer values or vector<[8]x[8]xf16> of 16-bit float values or vector<[8]x[8]xbf16> of bfloat16 type values or vector<[4]x[4]xf32> of 32-bit float values or vector<[2]x[2]xf64> of 64-bit float values, but got 'vector<[16]xi8>'}} + // expected-error@+1 {{op operand #0 must be a vector type that fits into a SME tile, but got 'vector<[16]xi8>'}} %0 = arm_sme.cast_vector_to_tile %vector : vector<[16]xi8> to i8 return %0 : i8 } @@ -79,7 +79,7 @@ func.func @arm_sme_cast_vector_to_tile__bad_rank_1d(%vector : vector<[16]xi8>) - // ----- func.func @arm_sme_get_tile_id__bad_type() -> i1 { - // expected-error@+1 {{op result #0 must be 8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer or 128-bit signless integer}} + // expected-error@+1 {{op result #0 must be an identifier of a virtual tile (of a size) within the ZA storage}} %0 = arm_sme.get_tile_id : i1 return %0 : i1 } @@ -159,7 +159,7 @@ func.func @arm_sme_tile_load__pad_but_no_mask(%src : memref, %pad : f64 func.func @arm_sme_load_tile_slice__bad_mask_type(%src : memref, %mask : vector<[2]xi1>, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) { %c0 = arith.constant 0 : index - // expected-error@+1 {{op failed to verify that mask has i1 element type and is a slice of the result}} + // expected-error@+1 {{op failed to verify that `mask` has i1 element type and the shape is a slice of `result`}} %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index : memref, vector<[2]xi1>, vector<[16]x[16]xi8> return } @@ -178,6 +178,20 @@ func.func @arm_sme_tile_store__bad_mask_type(%tile : vector<[16]x[16]xi8>, %mask return } +//===----------------------------------------------------------------------===// +// arm_sme.store_tile_slice +//===----------------------------------------------------------------------===// + + +// ----- + +func.func @arm_sme_store_tile_slice__bad_mask_type(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref) -> () { + %c0 = arith.constant 0 : index + // expected-error@+1 {{op failed to verify that `mask` has i1 element type and the shape is a slice of `tile`}} + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref, vector<[8]xi1>, vector<[16]x[16]xi8> + return +} + //===----------------------------------------------------------------------===// // arm_sme.outerproduct //===----------------------------------------------------------------------===// @@ -186,7 +200,7 @@ func.func @arm_sme_tile_store__bad_mask_type(%tile : vector<[16]x[16]xi8>, %mask func.func @arm_sme_outerproduct__bad_result_type(%vecA: vector<[2]xi16>, %vecB: vector<[2]xi16>) -> vector<[2]x[2]xi16> { - // expected-error@+1 {{op result #0 must be vector<[16]x[16]xi8> of 8-bit signless integer values or vector<[8]x[8]xi16> of 16-bit signless integer values or vector<[4]x[4]xi32> of 32-bit signless integer values or vector<[2]x[2]xi64> of 64-bit signless integer values or vector<[1]x[1]xi128> of 128-bit signless integer values or vector<[8]x[8]xf16> of 16-bit float values or vector<[8]x[8]xbf16> of bfloat16 type values or vector<[4]x[4]xf32> of 32-bit float values or vector<[2]x[2]xf64> of 64-bit float values, but got 'vector<[2]x[2]xi16>'}} + // expected-error@+1 {{op result #0 must be a vector type that fits into a SME tile, but got 'vector<[2]x[2]xi16>'}} %0 = arm_sme.outerproduct %vecA, %vecB : vector<[2]xi16>, vector<[2]xi16> return %0 : vector<[2]x[2]xi16> } diff --git a/mlir/test/Dialect/ArmSME/roundtrip.mlir b/mlir/test/Dialect/ArmSME/roundtrip.mlir index 8206bd80e3eb4c..1dbcc32e9259fc 100644 --- a/mlir/test/Dialect/ArmSME/roundtrip.mlir +++ b/mlir/test/Dialect/ArmSME/roundtrip.mlir @@ -823,173 +823,173 @@ func.func @arm_sme_load_tile_slice_hor_i8(%src : memref, %mask : vector< // ----- -func.func @arm_sme_store_tile_slice_hor_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[16]x[16]xi8> +func.func @arm_sme_store_tile_slice_hor_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %mask : vector<[16]xi1>, %dest : memref) -> () { + // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[16]xi1>, vector<[16]x[16]xi8> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] : memref, vector<[16]x[16]xi8> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref, vector<[16]xi1>, vector<[16]x[16]xi8> return } // ----- -func.func @arm_sme_store_tile_slice_hor_i16(%tile : vector<[8]x[8]xi16>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[8]x[8]xi16> +func.func @arm_sme_store_tile_slice_hor_i16(%tile : vector<[8]x[8]xi16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref) -> () { + // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[8]xi1>, vector<[8]x[8]xi16> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] : memref, vector<[8]x[8]xi16> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref, vector<[8]xi1>, vector<[8]x[8]xi16> return } // ----- -func.func @arm_sme_store_tile_slice_hor_i32(%tile : vector<[4]x[4]xi32>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[4]x[4]xi32> +func.func @arm_sme_store_tile_slice_hor_i32(%tile : vector<[4]x[4]xi32>, %tile_slice_index : index, %mask : vector<[4]xi1>, %dest : memref) -> () { + // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[4]xi1>, vector<[4]x[4]xi32> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] : memref, vector<[4]x[4]xi32> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref, vector<[4]xi1>, vector<[4]x[4]xi32> return } // ----- -func.func @arm_sme_store_tile_slice_hor_i64(%tile : vector<[2]x[2]xi64>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[2]x[2]xi64> +func.func @arm_sme_store_tile_slice_hor_i64(%tile : vector<[2]x[2]xi64>, %tile_slice_index : index, %mask : vector<[2]xi1>, %dest : memref) -> () { + // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[2]xi1>, vector<[2]x[2]xi64> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] : memref, vector<[2]x[2]xi64> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref, vector<[2]xi1>, vector<[2]x[2]xi64> return } // ----- -func.func @arm_sme_store_tile_slice_hor_i128(%tile : vector<[1]x[1]xi128>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[1]x[1]xi128> +func.func @arm_sme_store_tile_slice_hor_i128(%tile : vector<[1]x[1]xi128>, %tile_slice_index : index, %mask : vector<[1]xi1>, %dest : memref) -> () { + // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[1]xi1>, vector<[1]x[1]xi128> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] : memref, vector<[1]x[1]xi128> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref, vector<[1]xi1>, vector<[1]x[1]xi128> return } // ----- -func.func @arm_sme_store_tile_slice_hor_f16(%tile : vector<[8]x[8]xf16>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[8]x[8]xf16> +func.func @arm_sme_store_tile_slice_hor_f16(%tile : vector<[8]x[8]xf16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref) -> () { + // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[8]xi1>, vector<[8]x[8]xf16> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] : memref, vector<[8]x[8]xf16> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref, vector<[8]xi1>, vector<[8]x[8]xf16> return } // ----- -func.func @arm_sme_store_tile_slice_hor_bf16(%tile : vector<[8]x[8]xbf16>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[8]x[8]xbf16> +func.func @arm_sme_store_tile_slice_hor_bf16(%tile : vector<[8]x[8]xbf16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref) -> () { + // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[8]xi1>, vector<[8]x[8]xbf16> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] : memref, vector<[8]x[8]xbf16> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref, vector<[8]xi1>, vector<[8]x[8]xbf16> return } // ----- -func.func @arm_sme_store_tile_slice_hor_f32(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[4]x[4]xf32> +func.func @arm_sme_store_tile_slice_hor_f32(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index, %mask : vector<[4]xi1>, %dest : memref) -> () { + // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[4]xi1>, vector<[4]x[4]xf32> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] : memref, vector<[4]x[4]xf32> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref, vector<[4]xi1>, vector<[4]x[4]xf32> return } // ----- -func.func @arm_sme_store_tile_slice_hor_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[2]x[2]xf64> +func.func @arm_sme_store_tile_slice_hor_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index, %mask : vector<[2]xi1>, %dest : memref) -> () { + // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[2]xi1>, vector<[2]x[2]xf64> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] : memref, vector<[2]x[2]xf64> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref, vector<[2]xi1>, vector<[2]x[2]xf64> return } // ----- -func.func @arm_sme_store_tile_slice_ver_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[16]x[16]xi8> +func.func @arm_sme_store_tile_slice_ver_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %mask : vector<[16]xi1>, %dest : memref) -> () { + // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[16]xi1>, vector<[16]x[16]xi8> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[16]x[16]xi8> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout : memref, vector<[16]xi1>, vector<[16]x[16]xi8> return } // ----- -func.func @arm_sme_store_tile_slice_ver_i16(%tile : vector<[8]x[8]xi16>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[8]x[8]xi16> +func.func @arm_sme_store_tile_slice_ver_i16(%tile : vector<[8]x[8]xi16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref) -> () { + // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[8]xi1>, vector<[8]x[8]xi16> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[8]x[8]xi16> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout : memref, vector<[8]xi1>, vector<[8]x[8]xi16> return } // ----- -func.func @arm_sme_store_tile_slice_ver_i32(%tile : vector<[4]x[4]xi32>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[4]x[4]xi32> +func.func @arm_sme_store_tile_slice_ver_i32(%tile : vector<[4]x[4]xi32>, %tile_slice_index : index, %mask : vector<[4]xi1>, %dest : memref) -> () { + // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[4]xi1>, vector<[4]x[4]xi32> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[4]x[4]xi32> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout : memref, vector<[4]xi1>, vector<[4]x[4]xi32> return } // ----- -func.func @arm_sme_store_tile_slice_ver_i64(%tile : vector<[2]x[2]xi64>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[2]x[2]xi64> +func.func @arm_sme_store_tile_slice_ver_i64(%tile : vector<[2]x[2]xi64>, %tile_slice_index : index, %mask : vector<[2]xi1>, %dest : memref) -> () { + // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[2]xi1>, vector<[2]x[2]xi64> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[2]x[2]xi64> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout : memref, vector<[2]xi1>, vector<[2]x[2]xi64> return } // ----- -func.func @arm_sme_store_tile_slice_ver_i128(%tile : vector<[1]x[1]xi128>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[1]x[1]xi128> +func.func @arm_sme_store_tile_slice_ver_i128(%tile : vector<[1]x[1]xi128>, %tile_slice_index : index, %mask : vector<[1]xi1>, %dest : memref) -> () { + // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[1]xi1>, vector<[1]x[1]xi128> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[1]x[1]xi128> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout : memref, vector<[1]xi1>, vector<[1]x[1]xi128> return } // ----- -func.func @arm_sme_store_tile_slice_ver_f16(%tile : vector<[8]x[8]xf16>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[8]x[8]xf16> +func.func @arm_sme_store_tile_slice_ver_f16(%tile : vector<[8]x[8]xf16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref) -> () { + // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[8]xi1>, vector<[8]x[8]xf16> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[8]x[8]xf16> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout : memref, vector<[8]xi1>, vector<[8]x[8]xf16> return } // ----- -func.func @arm_sme_store_tile_slice_ver_bf16(%tile : vector<[8]x[8]xbf16>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[8]x[8]xbf16> +func.func @arm_sme_store_tile_slice_ver_bf16(%tile : vector<[8]x[8]xbf16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref) -> () { + // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[8]xi1>, vector<[8]x[8]xbf16> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[8]x[8]xbf16> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout : memref, vector<[8]xi1>, vector<[8]x[8]xbf16> return } // ----- -func.func @arm_sme_store_tile_slice_ver_f32(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[4]x[4]xf32> +func.func @arm_sme_store_tile_slice_ver_f32(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index, %mask : vector<[4]xi1>, %dest : memref) -> () { + // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[4]xi1>, vector<[4]x[4]xf32> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[4]x[4]xf32> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout : memref, vector<[4]xi1>, vector<[4]x[4]xf32> return } // ----- -func.func @arm_sme_store_tile_slice_ver_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[2]x[2]xf64> +func.func @arm_sme_store_tile_slice_ver_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index, %mask : vector<[2]xi1>, %dest : memref) -> () { + // CHECK: arm_sme.store_tile_slice {{.*}} layout : memref, vector<[2]xi1>, vector<[2]x[2]xf64> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[2]x[2]xf64> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout : memref, vector<[2]xi1>, vector<[2]x[2]xf64> return } // ----- /// Layout is optional and horizontal is the default, verify it's still parsed. -func.func @arm_sme_store_tile_slice_hor_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %dest : memref) -> () { - // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[16]x[16]xi8> +func.func @arm_sme_store_tile_slice_hor_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %mask : vector<[16]xi1>, %dest : memref) -> () { + // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref, vector<[16]xi1>, vector<[16]x[16]xi8> %c0 = arith.constant 0 : index - arm_sme.store_tile_slice %tile, %tile_slice_index, %dest[%c0] layout : memref, vector<[16]x[16]xi8> + arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout : memref, vector<[16]xi1>, vector<[16]x[16]xi8> return } diff --git a/mlir/test/Dialect/ArmSME/vector-ops-to-sme.mlir b/mlir/test/Dialect/ArmSME/vector-ops-to-sme.mlir index 5f41313fc6ac78..ed33f8508dba0b 100644 --- a/mlir/test/Dialect/ArmSME/vector-ops-to-sme.mlir +++ b/mlir/test/Dialect/ArmSME/vector-ops-to-sme.mlir @@ -1,181 +1,189 @@ // RUN: mlir-opt %s -convert-vector-to-arm-sme -split-input-file -allow-unregistered-dialect | FileCheck %s //===----------------------------------------------------------------------===// -// vector.transfer_read (with in-flight transpose) +// vector.transfer_read //===----------------------------------------------------------------------===// -// CHECK-LABEL: @transfer_read_2d_transpose_i8 -// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[16]x[16]xi8> -func.func @transfer_read_2d_transpose_i8(%src : memref) { +// CHECK-LABEL: @transfer_read_2d_i8 +// CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref, vector<[16]x[16]xi8> +func.func @transfer_read_2d_i8(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0 : i8 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : memref, vector<[16]x[16]xi8> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[16]x[16]xi8> "prevent.dce"(%0) : (vector<[16]x[16]xi8>) -> () return } // ----- -// CHECK-LABEL: @transfer_read_2d_transpose_i16 -// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[8]x[8]xi16> -func.func @transfer_read_2d_transpose_i16(%src : memref) { +// CHECK-LABEL: @transfer_read_2d_i16 +// CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref, vector<[8]x[8]xi16> +func.func @transfer_read_2d_i16(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0 : i16 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : memref, vector<[8]x[8]xi16> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[8]x[8]xi16> "prevent.dce"(%0) : (vector<[8]x[8]xi16>) -> () return } // ----- -// CHECK-LABEL: @transfer_read_2d_transpose_i32 -// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[4]x[4]xi32> -func.func @transfer_read_2d_transpose_i32(%src : memref) { +// CHECK-LABEL: @transfer_read_2d_i32 +// CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref, vector<[4]x[4]xi32> +func.func @transfer_read_2d_i32(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0 : i32 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : memref, vector<[4]x[4]xi32> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[4]x[4]xi32> "prevent.dce"(%0) : (vector<[4]x[4]xi32>) -> () return } // ----- -// CHECK-LABEL: @transfer_read_2d_transpose_i64 -// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[2]x[2]xi64> -func.func @transfer_read_2d_transpose_i64(%src : memref) { +// CHECK-LABEL: @transfer_read_2d_i64 +// CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref, vector<[2]x[2]xi64> +func.func @transfer_read_2d_i64(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0 : i64 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : memref, vector<[2]x[2]xi64> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[2]x[2]xi64> "prevent.dce"(%0) : (vector<[2]x[2]xi64>) -> () return } // ----- -// CHECK-LABEL: @transfer_read_2d_transpose_i128 -// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[1]x[1]xi128> -func.func @transfer_read_2d_transpose_i128(%src : memref) { +// CHECK-LABEL: @transfer_read_2d_i128 +// CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref, vector<[1]x[1]xi128> +func.func @transfer_read_2d_i128(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0 : i128 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : memref, vector<[1]x[1]xi128> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[1]x[1]xi128> "prevent.dce"(%0) : (vector<[1]x[1]xi128>) -> () return } // ----- -// CHECK-LABEL: @transfer_read_2d_transpose_f16 -// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[8]x[8]xf16> -func.func @transfer_read_2d_transpose_f16(%src : memref) { +// CHECK-LABEL: @transfer_read_2d_f16 +// CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref, vector<[8]x[8]xf16> +func.func @transfer_read_2d_f16(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f16 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : memref, vector<[8]x[8]xf16> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[8]x[8]xf16> "prevent.dce"(%0) : (vector<[8]x[8]xf16>) -> () return } // ----- -// CHECK-LABEL: @transfer_read_2d_transpose_bf16 -// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[8]x[8]xbf16> -func.func @transfer_read_2d_transpose_bf16(%src : memref) { +// CHECK-LABEL: @transfer_read_2d_bf16 +// CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref, vector<[8]x[8]xbf16> +func.func @transfer_read_2d_bf16(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : bf16 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : memref, vector<[8]x[8]xbf16> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[8]x[8]xbf16> "prevent.dce"(%0) : (vector<[8]x[8]xbf16>) -> () return } // ----- -// CHECK-LABEL: @transfer_read_2d_transpose_f32 -// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[4]x[4]xf32> -func.func @transfer_read_2d_transpose_f32(%src : memref) { +// CHECK-LABEL: @transfer_read_2d_f32 +// CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref, vector<[4]x[4]xf32> +func.func @transfer_read_2d_f32(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : memref, vector<[4]x[4]xf32> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[4]x[4]xf32> "prevent.dce"(%0) : (vector<[4]x[4]xf32>) -> () return } // ----- -// CHECK-LABEL: @transfer_read_2d_transpose_f64 -// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[2]x[2]xf64> -func.func @transfer_read_2d_transpose_f64(%src : memref) { +// CHECK-LABEL: @transfer_read_2d_f64 +// CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref, vector<[2]x[2]xf64> +func.func @transfer_read_2d_f64(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f64 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : memref, vector<[2]x[2]xf64> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[2]x[2]xf64> "prevent.dce"(%0) : (vector<[2]x[2]xf64>) -> () return } // ----- -// CHECK-LABEL: @transfer_read_2d__bad_type -// CHECK-NOT: arm_sme.tile_load -// CHECK: vector.transfer_read -func.func @transfer_read_2d__bad_type(%src : memref) { +// CHECK-LABEL: @transfer_read_2d_with_mask_i16 +// CHECK: arm_sme.tile_load %{{.*}}[{{.*}}], {{.*}}, {{.*}} : memref, vector<[8]x[8]xi16> +func.func @transfer_read_2d_with_mask_i16(%src : memref, %mask : vector<[8]x[8]xi1>) { %c0 = arith.constant 0 : index - %pad = arith.constant 0.0 : f64 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [false, false]} : memref, vector<[4]x[4]xf64> - "prevent.dce"(%0) : (vector<[4]x[4]xf64>) -> () + %pad = arith.constant 0 : i16 + %0 = vector.transfer_read %src[%c0, %c0], %pad, %mask {in_bounds = [true, true]} : memref, vector<[8]x[8]xi16> + "prevent.dce"(%0) : (vector<[8]x[8]xi16>) -> () return } // ----- -// CHECK-LABEL: @transfer_read_2d__non_memref_type -// CHECK-NOT: arm_sme.tile_load -// CHECK: vector.transfer_read -func.func @transfer_read_2d__non_memref_type(%src : tensor) { +/// in-flight transpose + +// CHECK-LABEL: @transfer_read_2d_transpose_i8 +// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[16]x[16]xi8> +func.func @transfer_read_2d_transpose_i8(%src : memref) { %c0 = arith.constant 0 : index - %pad = arith.constant 0.0 : f64 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : tensor, vector<[2]x[2]xf64> - "prevent.dce"(%0) : (vector<[2]x[2]xf64>) -> () + %pad = arith.constant 0 : i8 + %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : memref, vector<[16]x[16]xi8> + "prevent.dce"(%0) : (vector<[16]x[16]xi8>) -> () return } // ----- -// CHECK-LABEL: @transfer_read_2d__bad_transfer_rank +// CHECK-LABEL: @transfer_read_2d_transpose_with_mask_f32 +// CHECK: arm_sme.tile_load {{.*}} layout : memref, vector<[4]x[4]xf32> +func.func @transfer_read_2d_transpose_with_mask_f32(%src : memref, %mask : vector<[4]x[4]xi1>) { + %c0 = arith.constant 0 : index + %pad = arith.constant 0.0 : f32 + %0 = vector.transfer_read %src[%c0, %c0], %pad, %mask {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : memref, vector<[4]x[4]xf32> + "prevent.dce"(%0) : (vector<[4]x[4]xf32>) -> () + return +} + +// ----- + +// CHECK-LABEL: @transfer_read_2d__bad_type // CHECK-NOT: arm_sme.tile_load // CHECK: vector.transfer_read -func.func @transfer_read_2d__bad_transfer_rank(%src : memref) { +func.func @transfer_read_2d__bad_type(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f64 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [true]} : memref, vector<[2]xf64> - "prevent.dce"(%0) : (vector<[2]xf64>) -> () + %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [false, false]} : memref, vector<[4]x[4]xf64> + "prevent.dce"(%0) : (vector<[4]x[4]xf64>) -> () return } // ----- -// CHECK-LABEL: @transfer_read_2d__unsupported_mask +// CHECK-LABEL: @transfer_read_2d__non_memref_type // CHECK-NOT: arm_sme.tile_load // CHECK: vector.transfer_read -func.func @transfer_read_2d__unsupported_mask(%src : memref, %mask : vector<[2]x[2]xi1>) { +func.func @transfer_read_2d__non_memref_type(%src : tensor) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f64 - %0 = vector.transfer_read %src[%c0, %c0], %pad, %mask {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : memref, vector<[2]x[2]xf64> + %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : tensor, vector<[2]x[2]xf64> "prevent.dce"(%0) : (vector<[2]x[2]xf64>) -> () return } // ----- -/// transfer_read with identity map should be lowered to vector.load by -/// TransferReadToVectorLoadLowering and then arm_sme.tile_load by -/// VectorLoadToArmSMELowering. - -// CHECK-LABEL: @transfer_read_2d__non_permuting_map +// CHECK-LABEL: @transfer_read_2d__bad_transfer_rank // CHECK-NOT: arm_sme.tile_load // CHECK: vector.transfer_read -func.func @transfer_read_2d__non_permuting_map(%src : memref) { +func.func @transfer_read_2d__bad_transfer_rank(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f64 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d0, d1)>, in_bounds = [true, true]} : memref, vector<[2]x[2]xf64> - "prevent.dce"(%0) : (vector<[2]x[2]xf64>) -> () + %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [true]} : memref, vector<[2]xf64> + "prevent.dce"(%0) : (vector<[2]xf64>) -> () return } diff --git a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir index 8d52d9900a793e..3c50a9e72d9d9b 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir @@ -215,3 +215,26 @@ func.func @buffer_loop_hoisting(%lb: index, %ub: index, %step: index, %f: f32, % } return } + +// ----- + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %alloc_tensor = transform.structured.match ops{["bufferization.alloc_tensor"]} in %arg1 + : (!transform.any_op) -> !transform.op<"bufferization.alloc_tensor"> + %2, %new = transform.structured.bufferize_to_allocation %alloc_tensor + {alloc_op = "memref.alloca"} + : !transform.op<"bufferization.alloc_tensor"> + transform.yield + } +} + +// Expect `bufferization.bufferize_to_allocation` to create an alloc. +// CHECK-LABEL: func.func @empty_to_tensor_alloc() +func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { + // CHECK-NEXT: %[[alloca:.*]] = memref.alloca() : memref<2x2xf32> + // CHECK-NEXT: %[[tensor:.*]] = bufferization.to_tensor %[[alloca]] restrict writable : memref<2x2xf32> + // CHECK-NEXT: return %[[tensor]] : tensor<2x2xf32> + %0 = bufferization.alloc_tensor() : tensor<2x2xf32> + return %0 : tensor<2x2xf32> +} diff --git a/mlir/test/Dialect/SparseTensor/sparse_reinterpret_map.mlir b/mlir/test/Dialect/SparseTensor/sparse_reinterpret_map.mlir index 149c0bc46e2511..972364289ac2e2 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_reinterpret_map.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_reinterpret_map.mlir @@ -1,15 +1,4 @@ -// RUN: mlir-opt %s -split-input-file --sparse-reinterpret-map | FileCheck %s - -#SparseVector = #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> - -// CHECK-LABEL: func @sparse_nop( -// CHECK-SAME: %[[A0:.*]]: tensor>) -// CHECK: return %[[A0]] -func.func @sparse_nop(%arg0: tensor) -> tensor { - return %arg0 : tensor -} - -// ----- +// RUN: mlir-opt %s -split-input-file --sparse-reinterpret-map | FileCheck %s #trait_mul = { indexing_maps = [ @@ -55,3 +44,37 @@ func.func @mul(%arg0: tensor<32x32xf32>, return %0 : tensor<32x32xf32, #BSR> } +// ----- + +#BSR = #sparse_tensor.encoding<{ + map = ( i, j ) -> + ( i floordiv 2 : dense, + j floordiv 2 : compressed, + i mod 2 : dense, + j mod 2 : dense + ) +}> + +// CHECK-LABEL: func.func @sparse_foreach_reinterpret_map( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<2x4xf64 +// CHECK: %[[VAL_1:.*]] = bufferization.alloc_tensor() : tensor<1x2x2x2xf64 +// CHECK: %[[VAL_2:.*]] = sparse_tensor.reinterpret_map %[[VAL_0]] : tensor<2x4xf64 +// CHECK: %[[VAL_4:.*]] = sparse_tensor.foreach in %[[VAL_2]] init(%[[VAL_1]]) +// CHECK: ^bb0(%[[VAL_5:.*]]: index, %[[VAL_6:.*]]: index, %[[VAL_7:.*]]: index, %[[VAL_8:.*]]: index, %[[VAL_9:.*]]: f64, %[[VAL_10:.*]]: tensor<1x2x2x2xf64 +// CHECK: %[[VAL_11:.*]] = sparse_tensor.insert %[[VAL_9]] into %[[VAL_10]]{{\[}}%[[VAL_5]], %[[VAL_6]], %[[VAL_7]], %[[VAL_8]]] : tensor<1x2x2x2xf64 +// CHECK: sparse_tensor.yield %[[VAL_11]] : tensor<1x2x2x2xf64 +// CHECK: } +// CHECK: %[[VAL_12:.*]] = sparse_tensor.reinterpret_map %[[VAL_4]] : tensor<1x2x2x2xf64 +// CHECK: %[[VAL_13:.*]] = sparse_tensor.load %[[VAL_12]] hasInserts : tensor<2x4xf64 +// CHECK: return %[[VAL_13]] : tensor<2x4xf64 +// CHECK: } +func.func @sparse_foreach_reinterpret_map(%6 : tensor<2x4xf64, #BSR>) -> tensor<2x4xf64, #BSR> { + %7 = bufferization.alloc_tensor() : tensor<2x4xf64, #BSR> + %8 = sparse_tensor.foreach in %6 init(%7) : tensor<2x4xf64, #BSR>, tensor<2x4xf64, #BSR> -> tensor<2x4xf64, #BSR> do { + ^bb0(%arg0: index, %arg1: index, %arg2: f64, %arg3: tensor<2x4xf64, #BSR>): + %inserted = tensor.insert %arg2 into %arg3[%arg0, %arg1] : tensor<2x4xf64, #BSR> + sparse_tensor.yield %inserted : tensor<2x4xf64, #BSR> + } + %9 = sparse_tensor.load %8 hasInserts : tensor<2x4xf64, #BSR> + return %9 : tensor<2x4xf64, #BSR> +} diff --git a/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir b/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir index e1d6c3be494713..af0f98a1c447de 100644 --- a/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir +++ b/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir @@ -350,3 +350,81 @@ func.func @vector_extract_cst_maskedload_i4() -> vector<8x8x16xi4> { // CHECK32-SAME: memref<128xi32>, vector<2xi1>, vector<2xi32> into vector<2xi32> // CHECK32: %[[BITCAST:.+]] = vector.bitcast %[[LOAD]] : vector<2xi32> to vector<16xi4> // CHECK32: %[[SELECT:.+]] = arith.select %[[ORIG_EXT2]], %[[BITCAST]], %[[PASSTHRU]] : vector<16xi1>, vector<16xi4> + +// ----- + +func.func @vector_store_i8(%arg0: vector<8xi8>, %arg1: index, %arg2: index) { + %0 = memref.alloc() : memref<4x8xi8> + vector.store %arg0, %0[%arg1, %arg2] :memref<4x8xi8>, vector<8xi8> + return +} + +// Expect no conversions, i8 is supported. +// CHECK: func @vector_store_i8 +// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<4x8xi8> +// CHECK: vector.store %[[ARG0]], %[[ALLOC:.+]][%[[ARG1]], %[[ARG2]]] : memref<4x8xi8>, vector<8xi8> + +// CHECK32-DAG: affine_map<()[s0, s1] -> (s0 * 2 + s1 floordiv 4)> +// CHECK32: func @vector_store_i8 +// CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<8xi32> +// CHECK32: %[[INDEX:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG2]]] +// CHECK32: %[[VEC_I32:.+]] = vector.bitcast %[[ARG0]] : vector<8xi8> to vector<2xi32> +// CHECK32: vector.store %[[VEC_I32:.+]], %[[ALLOC:.+]][%[[INDEX:.+]]] : memref<8xi32>, vector<2xi32 + +// ----- + +func.func @vector_store_i4(%arg0: vector<8xi4>, %arg1: index, %arg2: index) { + %0 = memref.alloc() : memref<4x8xi4> + vector.store %arg0, %0[%arg1, %arg2] :memref<4x8xi4>, vector<8xi4> + return +} + +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 * 4 + s1 floordiv 2)> +// CHECK: func @vector_store_i4 +// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<16xi8> +// CHECK: %[[INDEX:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG2]]] +// CHECK: %[[VEC_I8:.+]] = vector.bitcast %[[ARG0]] : vector<8xi4> to vector<4xi8> +// CHECK: vector.store %[[VEC_I8:.+]], %[[ALLOC:.+]][%[[INDEX:.+]]] : memref<16xi8>, vector<4xi8> + +// CHECK32-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1 floordiv 8)> +// CHECK32: func @vector_store_i4 +// CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<4xi32> +// CHECK32: %[[INDEX:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG2]]] +// CHECK32: %[[VEC_I32:.+]] = vector.bitcast %[[ARG0]] : vector<8xi4> to vector<1xi32> +// CHECK32: vector.store %[[VEC_I32:.+]], %[[ALLOC:.+]][%[[INDEX:.+]]] : memref<4xi32>, vector<1xi32> + +// ----- + +func.func @vector_store_i4_dynamic(%arg0: vector<8xi4>, %arg1: index, %arg2: index, %arg3: index, %arg4: index) { + %0 = memref.alloc(%arg1, %arg2) : memref + vector.store %arg0, %0[%arg3, %arg4] : memref, vector<8xi4> + return +} + +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 2)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> ((s2 + s0 * s1) floordiv 2)> +// CHECK: func @vector_store_i4_dynamic +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: vector<8xi4> +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index +// CHECK: %[[SIZE:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG2]]] +// CHECK: %[[ALLOC:.+]] = memref.alloc(%[[SIZE]]) : memref +// CHECK: %[[INDEX:.+]] = affine.apply #[[MAP1]]()[%[[ARG3]], %[[ARG2]], %[[ARG4]]] +// CHECK: %[[VEC_I8:.+]] = vector.bitcast %[[ARG0]] : vector<8xi4> to vector<4xi8> +// CHECK: vector.store %[[VEC_I8:.+]], %[[ALLOC:.+]][%[[INDEX:.+]]] : memref, vector<4xi8> + +// CHECK32-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 8)> +// CHECK32-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> ((s2 + s0 * s1) floordiv 8)> +// CHECK32: func @vector_store_i4_dynamic +// CHECK32-SAME: %[[ARG0:[a-zA-Z0-9]+]]: vector<8xi4> +// CHECK32-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index +// CHECK32-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index +// CHECK32-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index +// CHECK32-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index +// CHECK32: %[[SIZE:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG2]]] +// CHECK32: %[[ALLOC:.+]] = memref.alloc(%[[SIZE]]) : memref +// CHECK32: %[[INDEX:.+]] = affine.apply #[[MAP1]]()[%[[ARG3]], %[[ARG2]], %[[ARG4]]] +// CHECK32: %[[VEC_I8:.+]] = vector.bitcast %[[ARG0]] : vector<8xi4> to vector<1xi32> +// CHECK32: vector.store %[[VEC_I8:.+]], %[[ALLOC:.+]][%[[INDEX:.+]]] : memref, vector<1xi32> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_block.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_block.mlir new file mode 100644 index 00000000000000..ec14492c5b4499 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_block.mlir @@ -0,0 +1,100 @@ +//-------------------------------------------------------------------------------------------------- +// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS. +// +// Set-up that's shared across all tests in this directory. In principle, this +// config could be moved to lit.local.cfg. However, there are downstream users that +// do not use these LIT config files. Hence why this is kept inline. +// +// DEFINE: %{sparse_compiler_opts} = enable-runtime-library=true +// DEFINE: %{sparse_compiler_opts_sve} = enable-arm-sve=true %{sparse_compiler_opts} +// DEFINE: %{compile} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts}" +// DEFINE: %{compile_sve} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts_sve}" +// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils +// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} +// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} +// +// DEFINE: %{env} = +//-------------------------------------------------------------------------------------------------- + +// RUN: %{compile} | %{run} | FileCheck %s +// +// Do the same run, but now with direct IR generation. +// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false enable-buffer-initialization=true +// RUN: %{compile} | %{run} | FileCheck %s +// +// Do the same run, but now with direct IR generation and vectorization. +// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false enable-buffer-initialization=true vl=2 reassociate-fp-reductions=true enable-index-optimizations=true +// RUN: %{compile} | %{run} | FileCheck %s +// +// Do the same run, but now with direct IR generation and VLA vectorization. +// RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | %{run_sve} | FileCheck %s %} + +#CSR = #sparse_tensor.encoding<{ + map = (d0, d1) -> (d0 : dense, d1 : compressed) +}> + +#CSC = #sparse_tensor.encoding<{ + map = (d0, d1) -> (d1 : dense, d0 : compressed) +}> + +#BSR = #sparse_tensor.encoding<{ + map = ( i, j ) -> + ( i floordiv 2 : dense, + j floordiv 2 : compressed, + i mod 2 : dense, + j mod 2 : dense + ) +}> + + +// +// Integration test that tests conversions between sparse tensors. +// +module { + // + // Output utilities. + // + func.func @dumpf64(%arg0: memref) { + %c0 = arith.constant 0 : index + %d0 = arith.constant -1.0 : f64 + %0 = vector.transfer_read %arg0[%c0], %d0: memref, vector<8xf64> + vector.print %0 : vector<8xf64> + return + } + + // + // Main driver. + // + func.func @entry() { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + + // + // Initialize a 2-dim dense tensor. + // + %t = arith.constant dense<[ + [ 1.0, 2.0, 3.0, 4.0 ], + [ 5.0, 6.0, 7.0, 8.0 ] + ]> : tensor<2x4xf64> + + + %1 = sparse_tensor.convert %t : tensor<2x4xf64> to tensor<2x4xf64, #CSR> + %2 = sparse_tensor.convert %1 : tensor<2x4xf64, #CSR> to tensor<2x4xf64, #BSR> + %3 = sparse_tensor.convert %2 : tensor<2x4xf64, #BSR> to tensor<2x4xf64, #CSC> + + %v1 = sparse_tensor.values %1 : tensor<2x4xf64, #CSR> to memref + %v2 = sparse_tensor.values %2 : tensor<2x4xf64, #BSR> to memref + %v3 = sparse_tensor.values %3 : tensor<2x4xf64, #CSC> to memref + + // CHECK: ( 1, 2, 3, 4, 5, 6, 7, 8 ) + // CHECK-NEXT: ( 1, 2, 5, 6, 3, 4, 7, 8 ) + // CHECK-NEXT: ( 1, 5, 2, 6, 3, 7, 4, 8 ) + call @dumpf64(%v1) : (memref) -> () + call @dumpf64(%v2) : (memref) -> () + call @dumpf64(%v3) : (memref) -> () + + return + } +} diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp index 114b4d0fa9180e..669e7227609e14 100644 --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -2402,10 +2402,12 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return Err; GridValues.GV_Warp_Size = WavefrontSize; - // Get the frequency of the steady clock. - if (auto Err = getDeviceAttr(HSA_AMD_AGENT_INFO_TIMESTAMP_FREQUENCY, - ClockFrequency)) - return Err; + // Get the frequency of the steady clock. If the attribute is missing + // assume running on an older libhsa and default to 0, omp_get_wtime + // will be inaccurate but otherwise programs can still run. + if (auto Err = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_TIMESTAMP_FREQUENCY, + ClockFrequency)) + ClockFrequency = 0; // Load the grid values dependending on the wavefront. if (WavefrontSize == 32) diff --git a/openmp/libomptarget/test/offloading/generic_multiple_parallel_regions.c b/openmp/libomptarget/test/offloading/generic_multiple_parallel_regions.c new file mode 100644 index 00000000000000..33fe2ca127b6dc --- /dev/null +++ b/openmp/libomptarget/test/offloading/generic_multiple_parallel_regions.c @@ -0,0 +1,29 @@ +// RUN: %libomptarget-compile-run-and-check-generic +// RUN: %libomptarget-compileopt-run-and-check-generic + +#include +#include + +__attribute__((optnone)) void optnone() {} + +int main() { + int i = 0; +#pragma omp target teams num_teams(1) map(tofrom : i) + { + optnone(); +#pragma omp parallel + if (omp_get_thread_num() == 0) + ++i; +#pragma omp parallel + if (omp_get_thread_num() == 0) + ++i; +#pragma omp parallel + if (omp_get_thread_num() == 0) + ++i; +#pragma omp parallel + if (omp_get_thread_num() == 0) + ++i; + } + // CHECK: 4 + printf("%i\n", i); +} diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index 1ca2493a496372..43b251d9482e27 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -903,6 +903,7 @@ cc_library( ":type_nodes_gen", "//llvm:BinaryFormat", "//llvm:Core", + "//llvm:FrontendHLSL", "//llvm:FrontendOpenMP", "//llvm:Support", "//llvm:TargetParser",