diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 741c97a3dc009b..f2f1c93ea225c8 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -16,727 +16,9 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// -// AArch64 Subtarget features. -// - -// Each SubtargetFeature which corresponds to an Arm Architecture feature should -// be annotated with the respective FEAT_ feature name from the Architecture -// Reference Manual. If a SubtargetFeature enables instructions from multiple -// Arm Architecture Features, it should list all the relevant features. Not all -// FEAT_ features have a corresponding SubtargetFeature. - -def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true", - "Enable ARMv8 FP (FEAT_FP)">; - -def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", - "Enable Advanced SIMD instructions (FEAT_AdvSIMD)", [FeatureFPARMv8]>; - -def FeatureSM4 : SubtargetFeature< - "sm4", "HasSM4", "true", - "Enable SM3 and SM4 support (FEAT_SM4, FEAT_SM3)", [FeatureNEON]>; - -def FeatureSHA2 : SubtargetFeature< - "sha2", "HasSHA2", "true", - "Enable SHA1 and SHA256 support (FEAT_SHA1, FEAT_SHA256)", [FeatureNEON]>; - -def FeatureSHA3 : SubtargetFeature< - "sha3", "HasSHA3", "true", - "Enable SHA512 and SHA3 support (FEAT_SHA3, FEAT_SHA512)", [FeatureNEON, FeatureSHA2]>; - -def FeatureAES : SubtargetFeature< - "aes", "HasAES", "true", - "Enable AES support (FEAT_AES, FEAT_PMULL)", [FeatureNEON]>; - -// Crypto has been split up and any combination is now valid (see the -// crypto definitions above). Also, crypto is now context sensitive: -// it has a different meaning for e.g. Armv8.4 than it has for Armv8.2. -// Therefore, we rely on Clang, the user interfacing tool, to pass on the -// appropriate crypto options. But here in the backend, crypto has very little -// meaning anymore. We kept the Crypto definition here for backward -// compatibility, and now imply features SHA2 and AES, which was the -// "traditional" meaning of Crypto. -def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", - "Enable cryptographic instructions", [FeatureNEON, FeatureSHA2, FeatureAES]>; - -def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", - "Enable ARMv8 CRC-32 checksum instructions (FEAT_CRC32)">; - -def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", - "Enable ARMv8 Reliability, Availability and Serviceability Extensions (FEAT_RAS, FEAT_RASv1p1)">; - -def FeatureRASv2 : SubtargetFeature<"rasv2", "HasRASv2", "true", - "Enable ARMv8.9-A Reliability, Availability and Serviceability Extensions (FEAT_RASv2)", - [FeatureRAS]>; - -def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true", - "Enable ARMv8.1 Large System Extension (LSE) atomic instructions (FEAT_LSE)">; - -def FeatureLSE2 : SubtargetFeature<"lse2", "HasLSE2", "true", - "Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules (FEAT_LSE2)">; - -def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true", - "Enable out of line atomics to support LSE instructions">; - -def FeatureFMV : SubtargetFeature<"fmv", "HasFMV", "true", - "Enable Function Multi Versioning support.">; - -def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true", - "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions (FEAT_RDM)", - [FeatureNEON]>; - -def FeaturePAN : SubtargetFeature< - "pan", "HasPAN", "true", - "Enables ARM v8.1 Privileged Access-Never extension (FEAT_PAN)">; - -def FeatureLOR : SubtargetFeature< - "lor", "HasLOR", "true", - "Enables ARM v8.1 Limited Ordering Regions extension (FEAT_LOR)">; - -def FeatureCONTEXTIDREL2 : SubtargetFeature<"CONTEXTIDREL2", "HasCONTEXTIDREL2", - "true", "Enable RW operand CONTEXTIDR_EL2" >; - -def FeatureVH : SubtargetFeature<"vh", "HasVH", "true", - "Enables ARM v8.1 Virtual Host extension (FEAT_VHE)", [FeatureCONTEXTIDREL2] >; - -// This SubtargetFeature is special. It controls only whether codegen will turn -// `llvm.readcyclecounter()` into an access to a PMUv3 System Register. The -// `FEAT_PMUv3*` system registers are always available for assembly/disassembly. -def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", - "Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension (FEAT_PMUv3)">; - -def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", - "Full FP16 (FEAT_FP16)", [FeatureFPARMv8]>; - -def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true", - "Enable FP16 FML instructions (FEAT_FHM)", [FeatureFullFP16]>; - -def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true", - "Enable Statistical Profiling extension (FEAT_SPE)">; - -def FeaturePAN_RWV : SubtargetFeature< - "pan-rwv", "HasPAN_RWV", "true", - "Enable v8.2 PAN s1e1R and s1e1W Variants (FEAT_PAN2)", - [FeaturePAN]>; - -// UAO PState -def FeaturePsUAO : SubtargetFeature< "uaops", "HasPsUAO", "true", - "Enable v8.2 UAO PState (FEAT_UAO)">; - -def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP", - "true", "Enable v8.2 data Cache Clean to Point of Persistence (FEAT_DPB)" >; - -def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true", - "Enable Scalable Vector Extension (SVE) instructions (FEAT_SVE)", [FeatureFullFP16]>; - -def FeatureFPMR : SubtargetFeature<"fpmr", "HasFPMR", "true", - "Enable FPMR Register (FEAT_FPMR)">; - -def FeatureFP8 : SubtargetFeature<"fp8", "HasFP8", "true", - "Enable FP8 instructions (FEAT_FP8)">; - -// This flag is currently still labeled as Experimental, but when fully -// implemented this should tell the compiler to use the zeroing pseudos to -// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive -// lanes are known to be zero. The pseudos will then be expanded using the -// MOVPRFX instruction to zero the inactive lanes. This feature should only be -// enabled if MOVPRFX instructions are known to merge with the destructive -// operations they prefix. -// -// This feature could similarly be extended to support cheap merging of _any_ -// value into the inactive lanes using the MOVPRFX instruction that uses -// merging-predication. -def FeatureExperimentalZeroingPseudos - : SubtargetFeature<"use-experimental-zeroing-pseudos", - "UseExperimentalZeroingPseudos", "true", - "Hint to the compiler that the MOVPRFX instruction is " - "merged with destructive operations", - []>; - -def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl", - "UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">; - -def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", - "true", "Enable BFloat16 Extension (FEAT_BF16)" >; - -def FeatureNoSVEFPLD1R : SubtargetFeature<"no-sve-fp-ld1r", - "NoSVEFPLD1R", "true", "Avoid using LD1RX instructions for FP">; - -def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true", - "Enable Scalable Vector Extension 2 (SVE2) instructions (FEAT_SVE2)", - [FeatureSVE, FeatureUseScalarIncVL]>; - -def FeatureSVE2AES : SubtargetFeature<"sve2-aes", "HasSVE2AES", "true", - "Enable AES SVE2 instructions (FEAT_SVE_AES, FEAT_SVE_PMULL128)", - [FeatureSVE2, FeatureAES]>; - -def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true", - "Enable SM4 SVE2 instructions (FEAT_SVE_SM4)", [FeatureSVE2, FeatureSM4]>; - -def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true", - "Enable SHA3 SVE2 instructions (FEAT_SVE_SHA3)", [FeatureSVE2, FeatureSHA3]>; - -def FeatureSVE2BitPerm : SubtargetFeature<"sve2-bitperm", "HasSVE2BitPerm", "true", - "Enable bit permutation SVE2 instructions (FEAT_SVE_BitPerm)", [FeatureSVE2]>; - -def FeatureSVE2p1: SubtargetFeature<"sve2p1", "HasSVE2p1", "true", - "Enable Scalable Vector Extension 2.1 instructions", [FeatureSVE2]>; - -def FeatureB16B16 : SubtargetFeature<"b16b16", "HasB16B16", "true", - "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions (FEAT_B16B16)", [FeatureBF16]>; - -def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", - "Has zero-cycle register moves">; - -def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true", - "Has zero-cycle zeroing instructions for generic registers">; - -// It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0". -// as movi is more efficient across all cores. Newer cores can eliminate -// fmovs early and there is no difference with movi, but this not true for -// all implementations. -def FeatureNoZCZeroingFP : SubtargetFeature<"no-zcz-fp", "HasZeroCycleZeroingFP", "false", - "Has no zero-cycle zeroing instructions for FP registers">; - -def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", - "Has zero-cycle zeroing instructions", - [FeatureZCZeroingGP]>; - -/// ... but the floating-point version doesn't quite work in rare cases on older -/// CPUs. -def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround", - "HasZeroCycleZeroingFPWorkaround", "true", - "The zero-cycle floating-point zeroing instruction has a bug">; - -def FeatureStrictAlign : SubtargetFeature<"strict-align", - "RequiresStrictAlign", "true", - "Disallow all unaligned memory " - "access">; - -foreach i = {1-7,9-15,18,20-28} in - def FeatureReserveX#i : SubtargetFeature<"reserve-x"#i, "ReserveXRegister["#i#"]", "true", - "Reserve X"#i#", making it unavailable " - "as a GPR">; - -foreach i = {8-15,18} in - def FeatureCallSavedX#i : SubtargetFeature<"call-saved-x"#i, - "CustomCallSavedXRegs["#i#"]", "true", "Make X"#i#" callee saved.">; - -def FeatureBalanceFPOps : SubtargetFeature<"balance-fp-ops", "BalanceFPOps", - "true", - "balance mix of odd and even D-registers for fp multiply(-accumulate) ops">; - -def FeaturePredictableSelectIsExpensive : SubtargetFeature< - "predictable-select-expensive", "PredictableSelectIsExpensive", "true", - "Prefer likely predicted branches over selects">; - -def FeatureEnableSelectOptimize : SubtargetFeature< - "enable-select-opt", "EnableSelectOptimize", "true", - "Enable the select optimize pass for select loop heuristics">; - -def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move", - "HasExynosCheapAsMoveHandling", "true", - "Use Exynos specific handling of cheap instructions">; - -def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler", - "UsePostRAScheduler", "true", "Schedule again after register allocation">; - -def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store", - "IsMisaligned128StoreSlow", "true", "Misaligned 128 bit stores are slow">; - -def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128", - "IsPaired128Slow", "true", "Paired 128 bit loads and stores are slow">; - -def FeatureAscendStoreAddress : SubtargetFeature<"ascend-store-address", - "IsStoreAddressAscend", "true", - "Schedule vector stores by ascending address">; - -def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "IsSTRQroSlow", - "true", "STR of Q register with register offset is slow">; - -def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature< - "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern", - "true", "Use alternative pattern for sextload convert to f32">; - -def FeatureArithmeticBccFusion : SubtargetFeature< - "arith-bcc-fusion", "HasArithmeticBccFusion", "true", - "CPU fuses arithmetic+bcc operations">; - -def FeatureArithmeticCbzFusion : SubtargetFeature< - "arith-cbz-fusion", "HasArithmeticCbzFusion", "true", - "CPU fuses arithmetic + cbz/cbnz operations">; - -def FeatureCmpBccFusion : SubtargetFeature< - "cmp-bcc-fusion", "HasCmpBccFusion", "true", - "CPU fuses cmp+bcc operations">; - -def FeatureFuseAddress : SubtargetFeature< - "fuse-address", "HasFuseAddress", "true", - "CPU fuses address generation and memory operations">; - -def FeatureFuseAES : SubtargetFeature< - "fuse-aes", "HasFuseAES", "true", - "CPU fuses AES crypto operations">; - -def FeatureFuseArithmeticLogic : SubtargetFeature< - "fuse-arith-logic", "HasFuseArithmeticLogic", "true", - "CPU fuses arithmetic and logic operations">; - -def FeatureFuseCCSelect : SubtargetFeature< - "fuse-csel", "HasFuseCCSelect", "true", - "CPU fuses conditional select operations">; - -def FeatureFuseCryptoEOR : SubtargetFeature< - "fuse-crypto-eor", "HasFuseCryptoEOR", "true", - "CPU fuses AES/PMULL and EOR operations">; - -def FeatureFuseAdrpAdd : SubtargetFeature< - "fuse-adrp-add", "HasFuseAdrpAdd", "true", - "CPU fuses adrp+add operations">; - -def FeatureFuseLiterals : SubtargetFeature< - "fuse-literals", "HasFuseLiterals", "true", - "CPU fuses literal generation operations">; - -def FeatureFuseAddSub2RegAndConstOne : SubtargetFeature< - "fuse-addsub-2reg-const1", "HasFuseAddSub2RegAndConstOne", "true", - "CPU fuses (a + b + 1) and (a - b - 1)">; - -def FeatureDisableLatencySchedHeuristic : SubtargetFeature< - "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", - "Disable latency scheduling heuristic">; - -def FeatureStorePairSuppress : SubtargetFeature< - "store-pair-suppress", "EnableStorePairSuppress", "true", - "Enable Store Pair Suppression heuristics">; - -def FeatureForce32BitJumpTables - : SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true", - "Force jump table entries to be 32-bits wide except at MinSize">; - -def FeatureRCPC : SubtargetFeature<"rcpc", "HasRCPC", "true", - "Enable support for RCPC extension (FEAT_LRCPC)">; - -def FeatureUseRSqrt : SubtargetFeature< - "use-reciprocal-square-root", "UseRSqrt", "true", - "Use the reciprocal square root approximation">; - -def FeatureDotProd : SubtargetFeature< - "dotprod", "HasDotProd", "true", - "Enable dot product support (FEAT_DotProd)", [FeatureNEON]>; - -def FeaturePAuth : SubtargetFeature< - "pauth", "HasPAuth", "true", - "Enable v8.3-A Pointer Authentication extension (FEAT_PAuth)">; - -def FeatureJS : SubtargetFeature< - "jsconv", "HasJS", "true", - "Enable v8.3-A JavaScript FP conversion instructions (FEAT_JSCVT)", - [FeatureFPARMv8]>; - -def FeatureCCIDX : SubtargetFeature< - "ccidx", "HasCCIDX", "true", - "Enable v8.3-A Extend of the CCSIDR number of sets (FEAT_CCIDX)">; - -def FeatureComplxNum : SubtargetFeature< - "complxnum", "HasComplxNum", "true", - "Enable v8.3-A Floating-point complex number support (FEAT_FCMA)", - [FeatureNEON]>; - -def FeatureNV : SubtargetFeature< - "nv", "HasNV", "true", - "Enable v8.4-A Nested Virtualization Enchancement (FEAT_NV, FEAT_NV2)">; - -def FeatureMPAM : SubtargetFeature< - "mpam", "HasMPAM", "true", - "Enable v8.4-A Memory system Partitioning and Monitoring extension (FEAT_MPAM)">; - -def FeatureDIT : SubtargetFeature< - "dit", "HasDIT", "true", - "Enable v8.4-A Data Independent Timing instructions (FEAT_DIT)">; - -def FeatureTRACEV8_4 : SubtargetFeature< - "tracev8.4", "HasTRACEV8_4", "true", - "Enable v8.4-A Trace extension (FEAT_TRF)">; - -def FeatureAM : SubtargetFeature< - "am", "HasAM", "true", - "Enable v8.4-A Activity Monitors extension (FEAT_AMUv1)">; - -def FeatureAMVS : SubtargetFeature< - "amvs", "HasAMVS", "true", - "Enable v8.6-A Activity Monitors Virtualization support (FEAT_AMUv1p1)", - [FeatureAM]>; - -def FeatureSEL2 : SubtargetFeature< - "sel2", "HasSEL2", "true", - "Enable v8.4-A Secure Exception Level 2 extension (FEAT_SEL2)">; - -def FeatureTLB_RMI : SubtargetFeature< - "tlb-rmi", "HasTLB_RMI", "true", - "Enable v8.4-A TLB Range and Maintenance Instructions (FEAT_TLBIOS, FEAT_TLBIRANGE)">; - -def FeatureFlagM : SubtargetFeature< - "flagm", "HasFlagM", "true", - "Enable v8.4-A Flag Manipulation Instructions (FEAT_FlagM)">; - -// 8.4 RCPC enchancements: LDAPR & STLR instructions with Immediate Offset -def FeatureRCPC_IMMO : SubtargetFeature<"rcpc-immo", "HasRCPC_IMMO", "true", - "Enable v8.4-A RCPC instructions with Immediate Offsets (FEAT_LRCPC2)", - [FeatureRCPC]>; - -def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", - "NegativeImmediates", "false", - "Convert immediates and instructions " - "to their negated or complemented " - "equivalent when the immediate does " - "not fit in the encoding.">; - -// Address operands with shift amount 2 or 3 are fast on all Arm chips except -// some old Apple cores (A7-A10?) which handle all shifts slowly. Cortex-A57 -// and derived designs through Cortex-X1 take an extra micro-op for shifts -// of 1 or 4. Other Arm chips handle all shifted operands at the same speed -// as unshifted operands. -// -// We don't try to model the behavior of the old Apple cores because new code -// targeting A7 is very unlikely to actually run on an A7. The Cortex cores -// are modeled by FeatureAddrLSLSlow14. -def FeatureAddrLSLSlow14 : SubtargetFeature< - "addr-lsl-slow-14", "HasAddrLSLSlow14", "true", - "Address operands with shift amount of 1 or 4 are slow">; - -def FeatureALULSLFast : SubtargetFeature< - "alu-lsl-fast", "HasALULSLFast", "true", - "Add/Sub operations with lsl shift <= 4 are cheap">; - -def FeatureAggressiveFMA : - SubtargetFeature<"aggressive-fma", - "HasAggressiveFMA", - "true", - "Enable Aggressive FMA for floating-point.">; - -def FeatureAltFPCmp : SubtargetFeature<"altnzcv", "HasAlternativeNZCV", "true", - "Enable alternative NZCV format for floating point comparisons (FEAT_FlagM2)">; - -def FeatureFRInt3264 : SubtargetFeature<"fptoint", "HasFRInt3264", "true", - "Enable FRInt[32|64][Z|X] instructions that round a floating-point number to " - "an integer (in FP format) forcing it to fit into a 32- or 64-bit int (FEAT_FRINTTS)" >; - -def FeatureSpecRestrict : SubtargetFeature<"specrestrict", "HasSpecRestrict", - "true", "Enable architectural speculation restriction (FEAT_CSV2_2)">; - -def FeatureSB : SubtargetFeature<"sb", "HasSB", - "true", "Enable v8.5 Speculation Barrier (FEAT_SB)" >; - -def FeatureSSBS : SubtargetFeature<"ssbs", "HasSSBS", - "true", "Enable Speculative Store Bypass Safe bit (FEAT_SSBS, FEAT_SSBS2)" >; - -def FeaturePredRes : SubtargetFeature<"predres", "HasPredRes", "true", - "Enable v8.5a execution and data prediction invalidation instructions (FEAT_SPECRES)" >; - -def FeatureCacheDeepPersist : SubtargetFeature<"ccdp", "HasCCDP", - "true", "Enable v8.5 Cache Clean to Point of Deep Persistence (FEAT_DPB2)" >; - -def FeatureBranchTargetId : SubtargetFeature<"bti", "HasBTI", - "true", "Enable Branch Target Identification (FEAT_BTI)" >; - -def FeatureRandGen : SubtargetFeature<"rand", "HasRandGen", - "true", "Enable Random Number generation instructions (FEAT_RNG)" >; - -def FeatureMTE : SubtargetFeature<"mte", "HasMTE", - "true", "Enable Memory Tagging Extension (FEAT_MTE, FEAT_MTE2)" >; - -def FeatureTRBE : SubtargetFeature<"trbe", "HasTRBE", - "true", "Enable Trace Buffer Extension (FEAT_TRBE)">; - -def FeatureETE : SubtargetFeature<"ete", "HasETE", - "true", "Enable Embedded Trace Extension (FEAT_ETE)", - [FeatureTRBE]>; - -def FeatureTME : SubtargetFeature<"tme", "HasTME", - "true", "Enable Transactional Memory Extension (FEAT_TME)" >; - -def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals", - "AllowTaggedGlobals", - "true", "Use an instruction sequence for taking the address of a global " - "that allows a memory tag in the upper address bits">; - -def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8", - "true", "Enable Matrix Multiply Int8 Extension (FEAT_I8MM)">; - -def FeatureMatMulFP32 : SubtargetFeature<"f32mm", "HasMatMulFP32", - "true", "Enable Matrix Multiply FP32 Extension (FEAT_F32MM)", [FeatureSVE]>; - -def FeatureMatMulFP64 : SubtargetFeature<"f64mm", "HasMatMulFP64", - "true", "Enable Matrix Multiply FP64 Extension (FEAT_F64MM)", [FeatureSVE]>; - -def FeatureXS : SubtargetFeature<"xs", "HasXS", - "true", "Enable Armv8.7-A limited-TLB-maintenance instruction (FEAT_XS)">; - -def FeatureWFxT : SubtargetFeature<"wfxt", "HasWFxT", - "true", "Enable Armv8.7-A WFET and WFIT instruction (FEAT_WFxT)">; - -def FeatureHCX : SubtargetFeature< - "hcx", "HasHCX", "true", "Enable Armv8.7-A HCRX_EL2 system register (FEAT_HCX)">; - -def FeatureLS64 : SubtargetFeature<"ls64", "HasLS64", - "true", "Enable Armv8.7-A LD64B/ST64B Accelerator Extension (FEAT_LS64, FEAT_LS64_V, FEAT_LS64_ACCDATA)">; - -def FeatureHBC : SubtargetFeature<"hbc", "HasHBC", - "true", "Enable Armv8.8-A Hinted Conditional Branches Extension (FEAT_HBC)">; - -def FeatureMOPS : SubtargetFeature<"mops", "HasMOPS", - "true", "Enable Armv8.8-A memcpy and memset acceleration instructions (FEAT_MOPS)">; - -def FeatureNMI : SubtargetFeature<"nmi", "HasNMI", - "true", "Enable Armv8.8-A Non-maskable Interrupts (FEAT_NMI, FEAT_GICv3_NMI)">; - -def FeatureBRBE : SubtargetFeature<"brbe", "HasBRBE", - "true", "Enable Branch Record Buffer Extension (FEAT_BRBE)">; - -def FeatureSPE_EEF : SubtargetFeature<"spe-eef", "HasSPE_EEF", - "true", "Enable extra register in the Statistical Profiling Extension (FEAT_SPEv1p2)">; - -def FeatureFineGrainedTraps : SubtargetFeature<"fgt", "HasFineGrainedTraps", - "true", "Enable fine grained virtualization traps extension (FEAT_FGT)">; - -def FeatureEnhancedCounterVirtualization : - SubtargetFeature<"ecv", "HasEnhancedCounterVirtualization", - "true", "Enable enhanced counter virtualization extension (FEAT_ECV)">; - -def FeatureRME : SubtargetFeature<"rme", "HasRME", - "true", "Enable Realm Management Extension (FEAT_RME)">; - -def FeatureSME : SubtargetFeature<"sme", "HasSME", "true", - "Enable Scalable Matrix Extension (SME) (FEAT_SME)", [FeatureBF16, FeatureUseScalarIncVL]>; - -def FeatureSMEF64F64 : SubtargetFeature<"sme-f64f64", "HasSMEF64F64", "true", - "Enable Scalable Matrix Extension (SME) F64F64 instructions (FEAT_SME_F64F64)", [FeatureSME]>; - -def FeatureSMEI16I64 : SubtargetFeature<"sme-i16i64", "HasSMEI16I64", "true", - "Enable Scalable Matrix Extension (SME) I16I64 instructions (FEAT_SME_I16I64)", [FeatureSME]>; - -def FeatureSMEF16F16 : SubtargetFeature<"sme-f16f16", "HasSMEF16F16", "true", - "Enable SME2.1 non-widening Float16 instructions (FEAT_SME_F16F16)", []>; - -def FeatureSMEFA64 : SubtargetFeature<"sme-fa64", "HasSMEFA64", "true", - "Enable the full A64 instruction set in streaming SVE mode (FEAT_SME_FA64)", [FeatureSME, FeatureSVE2]>; - -def FeatureSME2 : SubtargetFeature<"sme2", "HasSME2", "true", - "Enable Scalable Matrix Extension 2 (SME2) instructions", [FeatureSME]>; - -def FeatureSME2p1 : SubtargetFeature<"sme2p1", "HasSME2p1", "true", - "Enable Scalable Matrix Extension 2.1 (FEAT_SME2p1) instructions", [FeatureSME2]>; - -def FeatureFAMINMAX: SubtargetFeature<"faminmax", "HasFAMINMAX", "true", - "Enable FAMIN and FAMAX instructions (FEAT_FAMINMAX)">; - -def FeatureFP8FMA : SubtargetFeature<"fp8fma", "HasFP8FMA", "true", - "Enable fp8 multiply-add instructions (FEAT_FP8FMA)">; - -def FeatureSSVE_FP8FMA : SubtargetFeature<"ssve-fp8fma", "HasSSVE_FP8FMA", "true", - "Enable SVE2 fp8 multiply-add instructions (FEAT_SSVE_FP8FMA)", [FeatureSME2]>; - -def FeatureFP8DOT2: SubtargetFeature<"fp8dot2", "HasFP8DOT2", "true", - "Enable fp8 2-way dot instructions (FEAT_FP8DOT2)">; - -def FeatureSSVE_FP8DOT2 : SubtargetFeature<"ssve-fp8dot2", "HasSSVE_FP8DOT2", "true", - "Enable SVE2 fp8 2-way dot product instructions (FEAT_SSVE_FP8DOT2)", [FeatureSME2]>; - -def FeatureFP8DOT4: SubtargetFeature<"fp8dot4", "HasFP8DOT4", "true", - "Enable fp8 4-way dot instructions (FEAT_FP8DOT4)">; - -def FeatureSSVE_FP8DOT4 : SubtargetFeature<"ssve-fp8dot4", "HasSSVE_FP8DOT4", "true", - "Enable SVE2 fp8 4-way dot product instructions (FEAT_SSVE_FP8DOT4)", [FeatureSME2]>; -def FeatureLUT: SubtargetFeature<"lut", "HasLUT", "true", - "Enable Lookup Table instructions (FEAT_LUT)">; - -def FeatureSME_LUTv2 : SubtargetFeature<"sme-lutv2", "HasSME_LUTv2", "true", - "Enable Scalable Matrix Extension (SME) LUTv2 instructions (FEAT_SME_LUTv2)">; - -def FeatureSMEF8F16 : SubtargetFeature<"sme-f8f16", "HasSMEF8F16", "true", - "Enable Scalable Matrix Extension (SME) F8F16 instructions(FEAT_SME_F8F16)", [FeatureSME2, FeatureFP8]>; - -def FeatureSMEF8F32 : SubtargetFeature<"sme-f8f32", "HasSMEF8F32", "true", - "Enable Scalable Matrix Extension (SME) F8F32 instructions (FEAT_SME_F8F32)", [FeatureSME2, FeatureFP8]>; - -def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true", - "Apple A7 (the CPU formerly known as Cyclone)">; - -def FeatureEL2VMSA : SubtargetFeature<"el2vmsa", "HasEL2VMSA", "true", - "Enable Exception Level 2 Virtual Memory System Architecture">; - -def FeatureEL3 : SubtargetFeature<"el3", "HasEL3", "true", - "Enable Exception Level 3">; - -def FeatureCSSC : SubtargetFeature<"cssc", "HasCSSC", "true", - "Enable Common Short Sequence Compression (CSSC) instructions (FEAT_CSSC)">; - -def FeatureFixCortexA53_835769 : SubtargetFeature<"fix-cortex-a53-835769", - "FixCortexA53_835769", "true", "Mitigate Cortex-A53 Erratum 835769">; - -def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice", - "NoBTIAtReturnTwice", "true", - "Don't place a BTI instruction " - "after a return-twice">; - -def FeatureCHK : SubtargetFeature<"chk", "HasCHK", - "true", "Enable Armv8.0-A Check Feature Status Extension (FEAT_CHK)">; - -def FeatureGCS : SubtargetFeature<"gcs", "HasGCS", - "true", "Enable Armv9.4-A Guarded Call Stack Extension", [FeatureCHK]>; - -def FeatureCLRBHB : SubtargetFeature<"clrbhb", "HasCLRBHB", - "true", "Enable Clear BHB instruction (FEAT_CLRBHB)">; - -def FeaturePRFM_SLC : SubtargetFeature<"prfm-slc-target", "HasPRFM_SLC", - "true", "Enable SLC target for PRFM instruction">; - -def FeatureSPECRES2 : SubtargetFeature<"specres2", "HasSPECRES2", - "true", "Enable Speculation Restriction Instruction (FEAT_SPECRES2)", - [FeaturePredRes]>; - -def FeatureMEC : SubtargetFeature<"mec", "HasMEC", - "true", "Enable Memory Encryption Contexts Extension", [FeatureRME]>; - -def FeatureITE : SubtargetFeature<"ite", "HasITE", - "true", "Enable Armv9.4-A Instrumentation Extension FEAT_ITE", [FeatureETE, - FeatureTRBE]>; - -def FeatureRCPC3 : SubtargetFeature<"rcpc3", "HasRCPC3", - "true", "Enable Armv8.9-A RCPC instructions for A64 and Advanced SIMD and floating-point instruction set (FEAT_LRCPC3)", - [FeatureRCPC_IMMO]>; - -def FeatureTHE : SubtargetFeature<"the", "HasTHE", - "true", "Enable Armv8.9-A Translation Hardening Extension (FEAT_THE)">; - -def FeatureLSE128 : SubtargetFeature<"lse128", "HasLSE128", - "true", "Enable Armv9.4-A 128-bit Atomic Instructions (FEAT_LSE128)", - [FeatureLSE]>; - -// FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, and FEAT_SYSINSTR128 are mutually implicit. -// Therefore group them all under a single feature flag, d128: -def FeatureD128 : SubtargetFeature<"d128", "HasD128", - "true", "Enable Armv9.4-A 128-bit Page Table Descriptors, System Registers " - "and Instructions (FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128)", - [FeatureLSE128]>; - -def FeatureDisableLdp : SubtargetFeature<"disable-ldp", "HasDisableLdp", - "true", "Do not emit ldp">; - -def FeatureDisableStp : SubtargetFeature<"disable-stp", "HasDisableStp", - "true", "Do not emit stp">; - -def FeatureLdpAlignedOnly : SubtargetFeature<"ldp-aligned-only", "HasLdpAlignedOnly", - "true", "In order to emit ldp, first check if the load will be aligned to 2 * element_size">; - -def FeatureStpAlignedOnly : SubtargetFeature<"stp-aligned-only", "HasStpAlignedOnly", - "true", "In order to emit stp, first check if the store will be aligned to 2 * element_size">; - -// AArch64 2023 Architecture Extensions (v9.5-A) - -def FeatureCPA : SubtargetFeature<"cpa", "HasCPA", "true", - "Enable Armv9.5-A Checked Pointer Arithmetic (FEAT_CPA)">; - -def FeaturePAuthLR : SubtargetFeature<"pauth-lr", "HasPAuthLR", - "true", "Enable Armv9.5-A PAC enhancements (FEAT_PAuth_LR)">; - -def FeatureTLBIW : SubtargetFeature<"tlbiw", "HasTLBIW", "true", - "Enable ARMv9.5-A TLBI VMALL for Dirty State (FEAT_TLBIW)">; - +// Subtarget features. //===----------------------------------------------------------------------===// -// Architectures. -// -def HasV8_0aOps : SubtargetFeature<"v8a", "HasV8_0aOps", "true", - "Support ARM v8.0a instructions", [FeatureEL2VMSA, FeatureEL3]>; - -def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", - "Support ARM v8.1a instructions", [HasV8_0aOps, FeatureCRC, FeatureLSE, - FeatureRDM, FeaturePAN, FeatureLOR, FeatureVH]>; - -def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", - "Support ARM v8.2a instructions", [HasV8_1aOps, FeaturePsUAO, - FeaturePAN_RWV, FeatureRAS, FeatureCCPP]>; - -def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true", - "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC, FeaturePAuth, - FeatureJS, FeatureCCIDX, FeatureComplxNum]>; - -def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true", - "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd, - FeatureNV, FeatureMPAM, FeatureDIT, - FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeatureTLB_RMI, - FeatureFlagM, FeatureRCPC_IMMO, FeatureLSE2]>; - -def HasV8_5aOps : SubtargetFeature< - "v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions", - [HasV8_4aOps, FeatureAltFPCmp, FeatureFRInt3264, FeatureSpecRestrict, - FeatureSSBS, FeatureSB, FeaturePredRes, FeatureCacheDeepPersist, - FeatureBranchTargetId]>; - -def HasV8_6aOps : SubtargetFeature< - "v8.6a", "HasV8_6aOps", "true", "Support ARM v8.6a instructions", - [HasV8_5aOps, FeatureAMVS, FeatureBF16, FeatureFineGrainedTraps, - FeatureEnhancedCounterVirtualization, FeatureMatMulInt8]>; - -def HasV8_7aOps : SubtargetFeature< - "v8.7a", "HasV8_7aOps", "true", "Support ARM v8.7a instructions", - [HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX]>; - -def HasV8_8aOps : SubtargetFeature< - "v8.8a", "HasV8_8aOps", "true", "Support ARM v8.8a instructions", - [HasV8_7aOps, FeatureHBC, FeatureMOPS, FeatureNMI]>; - -def HasV8_9aOps : SubtargetFeature< - "v8.9a", "HasV8_9aOps", "true", "Support ARM v8.9a instructions", - [HasV8_8aOps, FeatureCLRBHB, FeaturePRFM_SLC, FeatureSPECRES2, - FeatureCSSC, FeatureRASv2, FeatureCHK]>; - -def HasV9_0aOps : SubtargetFeature< - "v9a", "HasV9_0aOps", "true", "Support ARM v9a instructions", - [HasV8_5aOps, FeatureMEC, FeatureSVE2]>; - -def HasV9_1aOps : SubtargetFeature< - "v9.1a", "HasV9_1aOps", "true", "Support ARM v9.1a instructions", - [HasV8_6aOps, HasV9_0aOps]>; - -def HasV9_2aOps : SubtargetFeature< - "v9.2a", "HasV9_2aOps", "true", "Support ARM v9.2a instructions", - [HasV8_7aOps, HasV9_1aOps]>; - -def HasV9_3aOps : SubtargetFeature< - "v9.3a", "HasV9_3aOps", "true", "Support ARM v9.3a instructions", - [HasV8_8aOps, HasV9_2aOps]>; - -def HasV9_4aOps : SubtargetFeature< - "v9.4a", "HasV9_4aOps", "true", "Support ARM v9.4a instructions", - [HasV8_9aOps, HasV9_3aOps]>; - -def HasV9_5aOps : SubtargetFeature< - "v9.5a", "HasV9_5aOps", "true", "Support ARM v9.5a instructions", - [HasV9_4aOps, FeatureCPA]>; - -def HasV8_0rOps : SubtargetFeature< - "v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions", - [//v8.1 - FeatureCRC, FeaturePAN, FeatureLSE, FeatureCONTEXTIDREL2, - //v8.2 - FeatureRAS, FeaturePsUAO, FeatureCCPP, FeaturePAN_RWV, - //v8.3 - FeatureCCIDX, FeaturePAuth, FeatureRCPC, - //v8.4 - FeatureTRACEV8_4, FeatureTLB_RMI, FeatureFlagM, FeatureDIT, FeatureSEL2, - FeatureRCPC_IMMO, - // Not mandatory in v8.0-R, but included here on the grounds that it - // only enables names of system registers - FeatureSpecRestrict - ]>; - -// Only intended to be used by disassemblers. -def FeatureAll - : SubtargetFeature<"all", "IsAll", "true", "Enable all instructions", []>; - -class AssemblerPredicateWithAll - : AssemblerPredicate<(any_of FeatureAll, cond), name>; +include "AArch64Features.td" //===----------------------------------------------------------------------===// // Register File Description @@ -765,30 +47,6 @@ def AArch64InstrInfo : InstrInfo; include "AArch64SystemOperands.td" -//===----------------------------------------------------------------------===// -// Access to privileged registers -//===----------------------------------------------------------------------===// - -foreach i = 1-3 in -def FeatureUseEL#i#ForTP : SubtargetFeature<"tpidr-el"#i, "UseEL"#i#"ForTP", - "true", "Permit use of TPIDR_EL"#i#" for the TLS base">; -def FeatureUseROEL0ForTP : SubtargetFeature<"tpidrro-el0", "UseROEL0ForTP", - "true", "Permit use of TPIDRRO_EL0 for the TLS base">; - -//===----------------------------------------------------------------------===// -// Control codegen mitigation against Straight Line Speculation vulnerability. -//===----------------------------------------------------------------------===// - -def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr", - "HardenSlsRetBr", "true", - "Harden against straight line speculation across RET and BR instructions">; -def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr", - "HardenSlsBlr", "true", - "Harden against straight line speculation across BLR instructions">; -def FeatureHardenSlsNoComdat : SubtargetFeature<"harden-sls-nocomdat", - "HardenSlsNoComdat", "true", - "Generate thunk code for SLS mitigation in the normal text section">; - //===----------------------------------------------------------------------===// // AArch64 Processors supported. // @@ -852,920 +110,7 @@ include "AArch64SchedNeoverseN2.td" include "AArch64SchedNeoverseV1.td" include "AArch64SchedNeoverseV2.td" -def TuneA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", - "Cortex-A35 ARM processors">; - -def TuneA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", - "Cortex-A53 ARM processors", [ - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureBalanceFPOps, - FeaturePostRAScheduler]>; - -def TuneA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55", - "Cortex-A55 ARM processors", [ - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeaturePostRAScheduler, - FeatureFuseAddress]>; - -def TuneA510 : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510", - "Cortex-A510 ARM processors", [ - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeaturePostRAScheduler - ]>; - -def TuneA520 : SubtargetFeature<"a520", "ARMProcFamily", "CortexA520", - "Cortex-A520 ARM processors", [ - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeaturePostRAScheduler]>; - -def TuneA520AE : SubtargetFeature<"a520ae", "ARMProcFamily", "CortexA520", - "Cortex-A520AE ARM processors", [ - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeaturePostRAScheduler]>; - -def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", - "Cortex-A57 ARM processors", [ - FeatureFuseAES, - FeatureBalanceFPOps, - FeatureFuseAdrpAdd, - FeatureFuseLiterals, - FeatureAddrLSLSlow14, - FeaturePostRAScheduler, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65", - "Cortex-A65 ARM processors", [ - FeatureFuseAES, - FeatureFuseAddress, - FeatureFuseAdrpAdd, - FeatureFuseLiterals, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", - "Cortex-A72 ARM processors", [ - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureFuseLiterals, - FeatureAddrLSLSlow14, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", - "Cortex-A73 ARM processors", [ - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureAddrLSLSlow14, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", - "Cortex-A75 ARM processors", [ - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureAddrLSLSlow14, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76", - "Cortex-A76 ARM processors", [ - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureAddrLSLSlow14, - FeatureALULSLFast, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77", - "Cortex-A77 ARM processors", [ - FeatureCmpBccFusion, - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureAddrLSLSlow14, - FeatureALULSLFast, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78", - "Cortex-A78 ARM processors", [ - FeatureCmpBccFusion, - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureAddrLSLSlow14, - FeatureALULSLFast, - FeaturePostRAScheduler, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneA78AE : SubtargetFeature<"a78ae", "ARMProcFamily", - "CortexA78AE", - "Cortex-A78AE ARM processors", [ - FeatureCmpBccFusion, - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureAddrLSLSlow14, - FeatureALULSLFast, - FeaturePostRAScheduler, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily", - "CortexA78C", - "Cortex-A78C ARM processors", [ - FeatureCmpBccFusion, - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureAddrLSLSlow14, - FeatureALULSLFast, - FeaturePostRAScheduler, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710", - "Cortex-A710 ARM processors", [ - FeatureCmpBccFusion, - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureALULSLFast, - FeaturePostRAScheduler, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715", - "Cortex-A715 ARM processors", [ - FeatureFuseAES, - FeaturePostRAScheduler, - FeatureCmpBccFusion, - FeatureALULSLFast, - FeatureFuseAdrpAdd, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneA720 : SubtargetFeature<"a720", "ARMProcFamily", "CortexA720", - "Cortex-A720 ARM processors", [ - FeatureFuseAES, - FeaturePostRAScheduler, - FeatureCmpBccFusion, - FeatureALULSLFast, - FeatureFuseAdrpAdd, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneA720AE : SubtargetFeature<"a720ae", "ARMProcFamily", "CortexA720", - "Cortex-A720AE ARM processors", [ - FeatureFuseAES, - FeaturePostRAScheduler, - FeatureCmpBccFusion, - FeatureALULSLFast, - FeatureFuseAdrpAdd, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily", - "CortexR82", - "Cortex-R82 ARM processors", [ - FeaturePostRAScheduler]>; - -def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1", - "Cortex-X1 ARM processors", [ - FeatureCmpBccFusion, - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureAddrLSLSlow14, - FeatureALULSLFast, - FeaturePostRAScheduler, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2", - "Cortex-X2 ARM processors", [ - FeatureCmpBccFusion, - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureALULSLFast, - FeaturePostRAScheduler, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3", - "Cortex-X3 ARM processors", [ - FeatureALULSLFast, - FeatureFuseAdrpAdd, - FeatureFuseAES, - FeaturePostRAScheduler, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneX4 : SubtargetFeature<"cortex-x4", "ARMProcFamily", "CortexX4", - "Cortex-X4 ARM processors", [ - FeatureALULSLFast, - FeatureFuseAdrpAdd, - FeatureFuseAES, - FeaturePostRAScheduler, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX", - "Fujitsu A64FX processors", [ - FeaturePostRAScheduler, - FeatureAggressiveFMA, - FeatureArithmeticBccFusion, - FeatureStorePairSuppress, - FeaturePredictableSelectIsExpensive]>; - -def TuneCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel", - "Nvidia Carmel processors">; - -// Note that cyclone does not fuse AES instructions, but newer apple chips do -// perform the fusion and cyclone is used by default when targetting apple OSes. -def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7", - "Apple A7 (the CPU formerly known as Cyclone)", [ - FeatureAlternateSExtLoadCVTF32Pattern, - FeatureArithmeticBccFusion, - FeatureArithmeticCbzFusion, - FeatureDisableLatencySchedHeuristic, - FeatureFuseAES, FeatureFuseCryptoEOR, - FeatureStorePairSuppress, - FeatureZCRegMove, - FeatureZCZeroing, - FeatureZCZeroingFPWorkaround]>; - -def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10", - "Apple A10", [ - FeatureAlternateSExtLoadCVTF32Pattern, - FeatureArithmeticBccFusion, - FeatureArithmeticCbzFusion, - FeatureDisableLatencySchedHeuristic, - FeatureFuseAES, - FeatureFuseCryptoEOR, - FeatureStorePairSuppress, - FeatureZCRegMove, - FeatureZCZeroing]>; - -def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11", - "Apple A11", [ - FeatureAlternateSExtLoadCVTF32Pattern, - FeatureArithmeticBccFusion, - FeatureArithmeticCbzFusion, - FeatureDisableLatencySchedHeuristic, - FeatureFuseAES, - FeatureFuseCryptoEOR, - FeatureStorePairSuppress, - FeatureZCRegMove, - FeatureZCZeroing]>; - -def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12", - "Apple A12", [ - FeatureAlternateSExtLoadCVTF32Pattern, - FeatureArithmeticBccFusion, - FeatureArithmeticCbzFusion, - FeatureDisableLatencySchedHeuristic, - FeatureFuseAES, - FeatureFuseCryptoEOR, - FeatureStorePairSuppress, - FeatureZCRegMove, - FeatureZCZeroing]>; - -def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13", - "Apple A13", [ - FeatureAlternateSExtLoadCVTF32Pattern, - FeatureArithmeticBccFusion, - FeatureArithmeticCbzFusion, - FeatureDisableLatencySchedHeuristic, - FeatureFuseAES, - FeatureFuseCryptoEOR, - FeatureStorePairSuppress, - FeatureZCRegMove, - FeatureZCZeroing]>; - -def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14", - "Apple A14", [ - FeatureAggressiveFMA, - FeatureAlternateSExtLoadCVTF32Pattern, - FeatureArithmeticBccFusion, - FeatureArithmeticCbzFusion, - FeatureDisableLatencySchedHeuristic, - FeatureFuseAddress, - FeatureFuseAES, - FeatureFuseArithmeticLogic, - FeatureFuseCCSelect, - FeatureFuseCryptoEOR, - FeatureFuseLiterals, - FeatureStorePairSuppress, - FeatureZCRegMove, - FeatureZCZeroing]>; - -def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15", - "Apple A15", [ - FeatureAlternateSExtLoadCVTF32Pattern, - FeatureArithmeticBccFusion, - FeatureArithmeticCbzFusion, - FeatureDisableLatencySchedHeuristic, - FeatureFuseAddress, - FeatureFuseAES, - FeatureFuseArithmeticLogic, - FeatureFuseCCSelect, - FeatureFuseCryptoEOR, - FeatureFuseLiterals, - FeatureStorePairSuppress, - FeatureZCRegMove, - FeatureZCZeroing]>; - -def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16", - "Apple A16", [ - FeatureAlternateSExtLoadCVTF32Pattern, - FeatureArithmeticBccFusion, - FeatureArithmeticCbzFusion, - FeatureDisableLatencySchedHeuristic, - FeatureFuseAddress, - FeatureFuseAdrpAdd, - FeatureFuseAES, - FeatureFuseArithmeticLogic, - FeatureFuseCCSelect, - FeatureFuseCryptoEOR, - FeatureFuseLiterals, - FeatureStorePairSuppress, - FeatureZCRegMove, - FeatureZCZeroing]>; - -def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17", - "Apple A17", [ - FeatureAlternateSExtLoadCVTF32Pattern, - FeatureArithmeticBccFusion, - FeatureArithmeticCbzFusion, - FeatureDisableLatencySchedHeuristic, - FeatureFuseAddress, - FeatureFuseAdrpAdd, - FeatureFuseAES, - FeatureFuseArithmeticLogic, - FeatureFuseCCSelect, - FeatureFuseCryptoEOR, - FeatureFuseLiterals, - FeatureStorePairSuppress, - FeatureZCRegMove, - FeatureZCZeroing]>; - -def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", - "Samsung Exynos-M3 processors", - [FeatureExynosCheapAsMoveHandling, - FeatureForce32BitJumpTables, - FeatureFuseAddress, - FeatureFuseAES, - FeatureFuseCCSelect, - FeatureFuseAdrpAdd, - FeatureFuseLiterals, - FeatureStorePairSuppress, - FeatureALULSLFast, - FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive]>; - -// Re-uses some scheduling and tunings from the ExynosM3 proc family. -def TuneExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3", - "Samsung Exynos-M4 processors", - [FeatureArithmeticBccFusion, - FeatureArithmeticCbzFusion, - FeatureExynosCheapAsMoveHandling, - FeatureForce32BitJumpTables, - FeatureFuseAddress, - FeatureFuseAES, - FeatureFuseArithmeticLogic, - FeatureFuseCCSelect, - FeatureFuseAdrpAdd, - FeatureFuseLiterals, - FeatureStorePairSuppress, - FeatureALULSLFast, - FeaturePostRAScheduler, - FeatureZCZeroing]>; - -def TuneKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", - "Qualcomm Kryo processors", [ - FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive, - FeatureZCZeroing, - FeatureALULSLFast, - FeatureStorePairSuppress]>; - -def TuneFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", - "Qualcomm Falkor processors", [ - FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive, - FeatureZCZeroing, - FeatureStorePairSuppress, - FeatureALULSLFast, - FeatureSlowSTRQro]>; - -def TuneNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily", "NeoverseE1", - "Neoverse E1 ARM processors", [ - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeaturePostRAScheduler]>; - -def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1", - "Neoverse N1 ARM processors", [ - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureAddrLSLSlow14, - FeatureALULSLFast, - FeaturePostRAScheduler, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2", - "Neoverse N2 ARM processors", [ - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureALULSLFast, - FeaturePostRAScheduler, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Neoverse512TVB", - "Neoverse 512-TVB ARM processors", [ - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureALULSLFast, - FeaturePostRAScheduler, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1", - "Neoverse V1 ARM processors", [ - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureAddrLSLSlow14, - FeatureALULSLFast, - FeaturePostRAScheduler, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive, - FeatureNoSVEFPLD1R]>; - -def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2", - "Neoverse V2 ARM processors", [ - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureALULSLFast, - FeaturePostRAScheduler, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; - -def TuneSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira", - "Qualcomm Saphira processors", [ - FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive, - FeatureZCZeroing, - FeatureStorePairSuppress, - FeatureALULSLFast]>; - -def TuneThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", "ThunderX2T99", - "Cavium ThunderX2 processors", [ - FeatureAggressiveFMA, - FeatureArithmeticBccFusion, - FeaturePostRAScheduler, - FeatureStorePairSuppress, - FeaturePredictableSelectIsExpensive]>; - -def TuneThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily", - "ThunderX3T110", - "Marvell ThunderX3 processors", [ - FeatureAggressiveFMA, - FeatureArithmeticBccFusion, - FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive, - FeatureBalanceFPOps, - FeatureStorePairSuppress, - FeatureStrictAlign]>; - -def TuneThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX", - "Cavium ThunderX processors", [ - FeaturePostRAScheduler, - FeatureStorePairSuppress, - FeaturePredictableSelectIsExpensive]>; - -def TuneThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily", - "ThunderXT88", - "Cavium ThunderX processors", [ - FeaturePostRAScheduler, - FeatureStorePairSuppress, - FeaturePredictableSelectIsExpensive]>; - -def TuneThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily", - "ThunderXT81", - "Cavium ThunderX processors", [ - FeaturePostRAScheduler, - FeatureStorePairSuppress, - FeaturePredictableSelectIsExpensive]>; - -def TuneThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily", - "ThunderXT83", - "Cavium ThunderX processors", [ - FeaturePostRAScheduler, - FeatureStorePairSuppress, - FeaturePredictableSelectIsExpensive]>; - -def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110", - "HiSilicon TS-V110 processors", [ - FeatureFuseAES, - FeatureStorePairSuppress, - FeaturePostRAScheduler]>; - -def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1", - "Ampere Computing Ampere-1 processors", [ - FeaturePostRAScheduler, - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureALULSLFast, - FeatureAggressiveFMA, - FeatureArithmeticBccFusion, - FeatureCmpBccFusion, - FeatureFuseAddress, - FeatureFuseLiterals, - FeatureStorePairSuppress, - FeatureLdpAlignedOnly, - FeatureStpAlignedOnly]>; - -def TuneAmpere1A : SubtargetFeature<"ampere1a", "ARMProcFamily", "Ampere1A", - "Ampere Computing Ampere-1A processors", [ - FeaturePostRAScheduler, - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureALULSLFast, - FeatureAggressiveFMA, - FeatureArithmeticBccFusion, - FeatureCmpBccFusion, - FeatureFuseAddress, - FeatureFuseLiterals, - FeatureFuseAddSub2RegAndConstOne, - FeatureStorePairSuppress, - FeatureLdpAlignedOnly, - FeatureStpAlignedOnly]>; - -def TuneAmpere1B : SubtargetFeature<"ampere1b", "ARMProcFamily", "Ampere1B", - "Ampere Computing Ampere-1B processors", [ - FeaturePostRAScheduler, - FeatureFuseAES, - FeatureFuseAdrpAdd, - FeatureALULSLFast, - FeatureAggressiveFMA, - FeatureArithmeticBccFusion, - FeatureCmpBccFusion, - FeatureFuseAddress, - FeatureFuseLiterals, - FeatureStorePairSuppress, - FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive, - FeatureLdpAlignedOnly, - FeatureStpAlignedOnly]>; - -def ProcessorFeatures { - list A53 = [HasV8_0aOps, FeatureCRC, FeatureCrypto, - FeatureFPARMv8, FeatureNEON, FeaturePerfMon]; - list A55 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeatureFullFP16, FeatureDotProd, - FeatureRCPC, FeaturePerfMon]; - list A510 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, - FeatureMatMulInt8, FeatureBF16, FeatureAM, - FeatureMTE, FeatureETE, FeatureSVE2BitPerm, - FeatureFP16FML]; - list A520 = [HasV9_2aOps, FeaturePerfMon, FeatureAM, - FeatureMTE, FeatureETE, FeatureSVE2BitPerm, - FeatureFP16FML]; - list A520AE = [HasV9_2aOps, FeaturePerfMon, FeatureAM, - FeatureMTE, FeatureETE, FeatureSVE2BitPerm, - FeatureFP16FML]; - list A65 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeatureFullFP16, FeatureDotProd, - FeatureRCPC, FeatureSSBS, FeatureRAS, - FeaturePerfMon]; - list A76 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeatureFullFP16, FeatureDotProd, - FeatureRCPC, FeatureSSBS, FeaturePerfMon]; - list A77 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeatureFullFP16, FeatureDotProd, - FeatureRCPC, FeaturePerfMon, FeatureSSBS]; - list A78 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeatureFullFP16, FeatureDotProd, - FeatureRCPC, FeaturePerfMon, FeatureSPE, - FeatureSSBS]; - list A78AE = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeatureFullFP16, FeatureDotProd, - FeatureRCPC, FeaturePerfMon, FeatureSPE, - FeatureSSBS]; - list A78C = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeatureFullFP16, FeatureDotProd, - FeatureFlagM, FeaturePAuth, - FeaturePerfMon, FeatureRCPC, FeatureSPE, - FeatureSSBS]; - list A710 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, - FeatureETE, FeatureMTE, FeatureFP16FML, - FeatureSVE2BitPerm, FeatureBF16, FeatureMatMulInt8]; - list A715 = [HasV9_0aOps, FeatureNEON, FeatureMTE, - FeatureFP16FML, FeatureSVE, FeatureTRBE, - FeatureSVE2BitPerm, FeatureBF16, FeatureETE, - FeaturePerfMon, FeatureMatMulInt8, FeatureSPE]; - list A720 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML, - FeatureTRBE, FeatureSVE2BitPerm, FeatureETE, - FeaturePerfMon, FeatureSPE, FeatureSPE_EEF]; - list A720AE = [HasV9_2aOps, FeatureMTE, FeatureFP16FML, - FeatureTRBE, FeatureSVE2BitPerm, FeatureETE, - FeaturePerfMon, FeatureSPE, FeatureSPE_EEF]; - list R82 = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16, - FeatureFP16FML, FeatureSSBS, FeaturePredRes, - FeatureSB, FeatureRDM, FeatureDotProd, - FeatureComplxNum, FeatureJS]; - list X1 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeatureRCPC, FeaturePerfMon, - FeatureSPE, FeatureFullFP16, FeatureDotProd, - FeatureSSBS]; - list X1C = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeatureRCPC_IMMO, FeaturePerfMon, - FeatureSPE, FeatureFullFP16, FeatureDotProd, - FeaturePAuth, FeatureSSBS, FeatureFlagM, - FeatureLSE2]; - list X2 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, - FeatureMatMulInt8, FeatureBF16, FeatureAM, - FeatureMTE, FeatureETE, FeatureSVE2BitPerm, - FeatureFP16FML]; - list X3 = [HasV9_0aOps, FeatureSVE, FeatureNEON, - FeaturePerfMon, FeatureETE, FeatureTRBE, - FeatureSPE, FeatureBF16, FeatureMatMulInt8, - FeatureMTE, FeatureSVE2BitPerm, FeatureFullFP16, - FeatureFP16FML]; - list X4 = [HasV9_2aOps, - FeaturePerfMon, FeatureETE, FeatureTRBE, - FeatureSPE, FeatureMTE, FeatureSVE2BitPerm, - FeatureFP16FML, FeatureSPE_EEF]; - list A64FX = [HasV8_2aOps, FeatureFPARMv8, FeatureNEON, - FeatureSHA2, FeaturePerfMon, FeatureFullFP16, - FeatureSVE, FeatureComplxNum]; - list Carmel = [HasV8_2aOps, FeatureNEON, FeatureCrypto, - FeatureFullFP16]; - list AppleA7 = [HasV8_0aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON,FeaturePerfMon, FeatureAppleA7SysReg]; - list AppleA10 = [HasV8_0aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeaturePerfMon, FeatureCRC, - FeatureRDM, FeaturePAN, FeatureLOR, FeatureVH]; - list AppleA11 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeaturePerfMon, FeatureFullFP16]; - list AppleA12 = [HasV8_3aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeaturePerfMon, FeatureFullFP16]; - list AppleA13 = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeaturePerfMon, FeatureFullFP16, - FeatureFP16FML, FeatureSHA3]; - list AppleA14 = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeaturePerfMon, FeatureFRInt3264, - FeatureSpecRestrict, FeatureSSBS, FeatureSB, - FeaturePredRes, FeatureCacheDeepPersist, - FeatureFullFP16, FeatureFP16FML, FeatureSHA3, - FeatureAltFPCmp]; - list AppleA15 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeaturePerfMon, FeatureSHA3, - FeatureFullFP16, FeatureFP16FML]; - list AppleA16 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeaturePerfMon, FeatureSHA3, - FeatureFullFP16, FeatureFP16FML, - FeatureHCX]; - list AppleA17 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeaturePerfMon, FeatureSHA3, - FeatureFullFP16, FeatureFP16FML, - FeatureHCX]; - list ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureCrypto, - FeaturePerfMon]; - list ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd, - FeatureFullFP16, FeaturePerfMon]; - list Falkor = [HasV8_0aOps, FeatureCRC, FeatureCrypto, - FeatureFPARMv8, FeatureNEON, FeaturePerfMon, - FeatureRDM]; - list NeoverseE1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd, - FeatureFPARMv8, FeatureFullFP16, FeatureNEON, - FeatureRCPC, FeatureSSBS, FeaturePerfMon]; - list NeoverseN1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd, - FeatureFPARMv8, FeatureFullFP16, FeatureNEON, - FeatureRCPC, FeatureSPE, FeatureSSBS, - FeaturePerfMon]; - list NeoverseN2 = [HasV9_0aOps, FeatureBF16, FeatureETE, FeatureFP16FML, - FeatureMatMulInt8, FeatureMTE, FeatureSVE2, - FeatureSVE2BitPerm, FeatureTRBE, - FeaturePerfMon]; - list Neoverse512TVB = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist, - FeatureCrypto, FeatureFPARMv8, FeatureFP16FML, - FeatureFullFP16, FeatureMatMulInt8, FeatureNEON, - FeaturePerfMon, FeatureRandGen, FeatureSPE, - FeatureSSBS, FeatureSVE]; - list NeoverseV1 = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist, - FeatureCrypto, FeatureFPARMv8, FeatureFP16FML, - FeatureFullFP16, FeatureMatMulInt8, FeatureNEON, - FeaturePerfMon, FeatureRandGen, FeatureSPE, - FeatureSSBS, FeatureSVE]; - list NeoverseV2 = [HasV9_0aOps, FeatureBF16, FeatureSPE, - FeaturePerfMon, FeatureETE, FeatureMatMulInt8, - FeatureNEON, FeatureSVE2BitPerm, FeatureFP16FML, - FeatureMTE, FeatureRandGen]; - list Saphira = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeatureSPE, FeaturePerfMon]; - list ThunderX = [HasV8_0aOps, FeatureCRC, FeatureCrypto, - FeatureFPARMv8, FeaturePerfMon, FeatureNEON]; - list ThunderX2T99 = [HasV8_1aOps, FeatureCRC, FeatureCrypto, - FeatureFPARMv8, FeatureNEON, FeatureLSE]; - list ThunderX3T110 = [HasV8_3aOps, FeatureCRC, FeatureCrypto, - FeatureFPARMv8, FeatureNEON, FeatureLSE, - FeaturePAuth, FeaturePerfMon]; - list TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeaturePerfMon, FeatureSPE, - FeatureFullFP16, FeatureFP16FML, FeatureDotProd, - FeatureJS, FeatureComplxNum]; - list Ampere1 = [HasV8_6aOps, FeatureNEON, FeaturePerfMon, - FeatureSSBS, FeatureRandGen, FeatureSB, - FeatureSHA2, FeatureSHA3, FeatureAES]; - list Ampere1A = [HasV8_6aOps, FeatureNEON, FeaturePerfMon, - FeatureMTE, FeatureSSBS, FeatureRandGen, - FeatureSB, FeatureSM4, FeatureSHA2, - FeatureSHA3, FeatureAES]; - list Ampere1B = [HasV8_7aOps, FeatureNEON, FeaturePerfMon, - FeatureMTE, FeatureSSBS, FeatureRandGen, - FeatureSB, FeatureSM4, FeatureSHA2, - FeatureSHA3, FeatureAES, FeatureCSSC, - FeatureWFxT, FeatureFullFP16]; - - // ETE and TRBE are future architecture extensions. We temporarily enable them - // by default for users targeting generic AArch64. The extensions do not - // affect code generated by the compiler and can be used only by explicitly - // mentioning the new system register names in assembly. - list Generic = [FeatureFPARMv8, FeatureNEON, FeatureETE]; -} - -// FeatureFuseAdrpAdd is enabled under Generic to allow linker merging -// optimizations. -def : ProcessorModel<"generic", CortexA510Model, ProcessorFeatures.Generic, - [FeatureFuseAES, FeatureFuseAdrpAdd, FeaturePostRAScheduler, - FeatureEnableSelectOptimize]>; -def : ProcessorModel<"cortex-a35", CortexA53Model, ProcessorFeatures.A53, - [TuneA35]>; -def : ProcessorModel<"cortex-a34", CortexA53Model, ProcessorFeatures.A53, - [TuneA35]>; -def : ProcessorModel<"cortex-a53", CortexA53Model, ProcessorFeatures.A53, - [TuneA53]>; -def : ProcessorModel<"cortex-a55", CortexA55Model, ProcessorFeatures.A55, - [TuneA55]>; -def : ProcessorModel<"cortex-a510", CortexA510Model, ProcessorFeatures.A510, - [TuneA510]>; -def : ProcessorModel<"cortex-a520", CortexA510Model, ProcessorFeatures.A520, - [TuneA520]>; -def : ProcessorModel<"cortex-a520ae", CortexA510Model, ProcessorFeatures.A520AE, - [TuneA520AE]>; -def : ProcessorModel<"cortex-a57", CortexA57Model, ProcessorFeatures.A53, - [TuneA57]>; -def : ProcessorModel<"cortex-a65", CortexA53Model, ProcessorFeatures.A65, - [TuneA65]>; -def : ProcessorModel<"cortex-a65ae", CortexA53Model, ProcessorFeatures.A65, - [TuneA65]>; -def : ProcessorModel<"cortex-a72", CortexA57Model, ProcessorFeatures.A53, - [TuneA72]>; -def : ProcessorModel<"cortex-a73", CortexA57Model, ProcessorFeatures.A53, - [TuneA73]>; -def : ProcessorModel<"cortex-a75", CortexA57Model, ProcessorFeatures.A55, - [TuneA75]>; -def : ProcessorModel<"cortex-a76", CortexA57Model, ProcessorFeatures.A76, - [TuneA76]>; -def : ProcessorModel<"cortex-a76ae", CortexA57Model, ProcessorFeatures.A76, - [TuneA76]>; -def : ProcessorModel<"cortex-a77", CortexA57Model, ProcessorFeatures.A77, - [TuneA77]>; -def : ProcessorModel<"cortex-a78", CortexA57Model, ProcessorFeatures.A78, - [TuneA78]>; -def : ProcessorModel<"cortex-a78ae", CortexA57Model, ProcessorFeatures.A78AE, - [TuneA78AE]>; -def : ProcessorModel<"cortex-a78c", CortexA57Model, ProcessorFeatures.A78C, - [TuneA78C]>; -def : ProcessorModel<"cortex-a710", NeoverseN2Model, ProcessorFeatures.A710, - [TuneA710]>; -def : ProcessorModel<"cortex-a715", NeoverseN2Model, ProcessorFeatures.A715, - [TuneA715]>; -def : ProcessorModel<"cortex-a720", NeoverseN2Model, ProcessorFeatures.A720, - [TuneA720]>; -def : ProcessorModel<"cortex-a720ae", NeoverseN2Model, ProcessorFeatures.A720AE, - [TuneA720AE]>; -def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82, - [TuneR82]>; -def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1, - [TuneX1]>; -def : ProcessorModel<"cortex-x1c", CortexA57Model, ProcessorFeatures.X1C, - [TuneX1]>; -def : ProcessorModel<"cortex-x2", NeoverseN2Model, ProcessorFeatures.X2, - [TuneX2]>; -def : ProcessorModel<"cortex-x3", NeoverseN2Model, ProcessorFeatures.X3, - [TuneX3]>; -def : ProcessorModel<"cortex-x4", NeoverseN2Model, ProcessorFeatures.X4, - [TuneX4]>; -def : ProcessorModel<"neoverse-e1", CortexA53Model, - ProcessorFeatures.NeoverseE1, [TuneNeoverseE1]>; -def : ProcessorModel<"neoverse-n1", NeoverseN1Model, - ProcessorFeatures.NeoverseN1, [TuneNeoverseN1]>; -def : ProcessorModel<"neoverse-n2", NeoverseN2Model, - ProcessorFeatures.NeoverseN2, [TuneNeoverseN2]>; -def : ProcessorModel<"neoverse-512tvb", NeoverseV1Model, - ProcessorFeatures.Neoverse512TVB, [TuneNeoverse512TVB]>; -def : ProcessorModel<"neoverse-v1", NeoverseV1Model, - ProcessorFeatures.NeoverseV1, [TuneNeoverseV1]>; -def : ProcessorModel<"neoverse-v2", NeoverseV2Model, - ProcessorFeatures.NeoverseV2, [TuneNeoverseV2]>; -def : ProcessorModel<"exynos-m3", ExynosM3Model, ProcessorFeatures.ExynosM3, - [TuneExynosM3]>; -def : ProcessorModel<"exynos-m4", ExynosM4Model, ProcessorFeatures.ExynosM4, - [TuneExynosM4]>; -def : ProcessorModel<"exynos-m5", ExynosM5Model, ProcessorFeatures.ExynosM4, - [TuneExynosM4]>; -def : ProcessorModel<"falkor", FalkorModel, ProcessorFeatures.Falkor, - [TuneFalkor]>; -def : ProcessorModel<"saphira", FalkorModel, ProcessorFeatures.Saphira, - [TuneSaphira]>; -def : ProcessorModel<"kryo", KryoModel, ProcessorFeatures.A53, [TuneKryo]>; - -// Cavium ThunderX/ThunderX T8X Processors -def : ProcessorModel<"thunderx", ThunderXT8XModel, ProcessorFeatures.ThunderX, - [TuneThunderX]>; -def : ProcessorModel<"thunderxt88", ThunderXT8XModel, - ProcessorFeatures.ThunderX, [TuneThunderXT88]>; -def : ProcessorModel<"thunderxt81", ThunderXT8XModel, - ProcessorFeatures.ThunderX, [TuneThunderXT81]>; -def : ProcessorModel<"thunderxt83", ThunderXT8XModel, - ProcessorFeatures.ThunderX, [TuneThunderXT83]>; -// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan. -def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, - ProcessorFeatures.ThunderX2T99, [TuneThunderX2T99]>; -// Marvell ThunderX3T110 Processors. -def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, - ProcessorFeatures.ThunderX3T110, [TuneThunderX3T110]>; -def : ProcessorModel<"tsv110", TSV110Model, ProcessorFeatures.TSV110, - [TuneTSV110]>; - -// Support cyclone as an alias for apple-a7 so we can still LTO old bitcode. -def : ProcessorModel<"cyclone", CycloneModel, ProcessorFeatures.AppleA7, - [TuneAppleA7]>; - -// iPhone and iPad CPUs -def : ProcessorModel<"apple-a7", CycloneModel, ProcessorFeatures.AppleA7, - [TuneAppleA7]>; -def : ProcessorModel<"apple-a8", CycloneModel, ProcessorFeatures.AppleA7, - [TuneAppleA7]>; -def : ProcessorModel<"apple-a9", CycloneModel, ProcessorFeatures.AppleA7, - [TuneAppleA7]>; -def : ProcessorModel<"apple-a10", CycloneModel, ProcessorFeatures.AppleA10, - [TuneAppleA10]>; -def : ProcessorModel<"apple-a11", CycloneModel, ProcessorFeatures.AppleA11, - [TuneAppleA11]>; -def : ProcessorModel<"apple-a12", CycloneModel, ProcessorFeatures.AppleA12, - [TuneAppleA12]>; -def : ProcessorModel<"apple-a13", CycloneModel, ProcessorFeatures.AppleA13, - [TuneAppleA13]>; -def : ProcessorModel<"apple-a14", CycloneModel, ProcessorFeatures.AppleA14, - [TuneAppleA14]>; -def : ProcessorModel<"apple-a15", CycloneModel, ProcessorFeatures.AppleA15, - [TuneAppleA15]>; -def : ProcessorModel<"apple-a16", CycloneModel, ProcessorFeatures.AppleA16, - [TuneAppleA16]>; -def : ProcessorModel<"apple-a17", CycloneModel, ProcessorFeatures.AppleA17, - [TuneAppleA17]>; -// Mac CPUs -def : ProcessorModel<"apple-m1", CycloneModel, ProcessorFeatures.AppleA14, - [TuneAppleA14]>; -def : ProcessorModel<"apple-m2", CycloneModel, ProcessorFeatures.AppleA15, - [TuneAppleA15]>; -def : ProcessorModel<"apple-m3", CycloneModel, ProcessorFeatures.AppleA16, - [TuneAppleA16]>; - -// watch CPUs. -def : ProcessorModel<"apple-s4", CycloneModel, ProcessorFeatures.AppleA12, - [TuneAppleA12]>; -def : ProcessorModel<"apple-s5", CycloneModel, ProcessorFeatures.AppleA12, - [TuneAppleA12]>; - -// Alias for the latest Apple processor model supported by LLVM. -def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleA16, - [TuneAppleA16]>; - -// Fujitsu A64FX -def : ProcessorModel<"a64fx", A64FXModel, ProcessorFeatures.A64FX, - [TuneA64FX]>; - -// Nvidia Carmel -def : ProcessorModel<"carmel", NoSchedModel, ProcessorFeatures.Carmel, - [TuneCarmel]>; - -// Ampere Computing -def : ProcessorModel<"ampere1", Ampere1Model, ProcessorFeatures.Ampere1, - [TuneAmpere1]>; - -def : ProcessorModel<"ampere1a", Ampere1Model, ProcessorFeatures.Ampere1A, - [TuneAmpere1A]>; - -def : ProcessorModel<"ampere1b", Ampere1BModel, ProcessorFeatures.Ampere1B, - [TuneAmpere1B]>; +include "AArch64Processors.td" //===----------------------------------------------------------------------===// // Assembly parser diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td new file mode 100644 index 00000000000000..efda45a72ef424 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64Features.td @@ -0,0 +1,752 @@ +//=- AArch64Features.td - Describe AArch64 SubtargetFeatures -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +// Each SubtargetFeature which corresponds to an Arm Architecture feature should +// be annotated with the respective FEAT_ feature name from the Architecture +// Reference Manual. If a SubtargetFeature enables instructions from multiple +// Arm Architecture Features, it should list all the relevant features. Not all +// FEAT_ features have a corresponding SubtargetFeature. + +def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true", + "Enable ARMv8 FP (FEAT_FP)">; + +def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", + "Enable Advanced SIMD instructions (FEAT_AdvSIMD)", [FeatureFPARMv8]>; + +def FeatureSM4 : SubtargetFeature< + "sm4", "HasSM4", "true", + "Enable SM3 and SM4 support (FEAT_SM4, FEAT_SM3)", [FeatureNEON]>; + +def FeatureSHA2 : SubtargetFeature< + "sha2", "HasSHA2", "true", + "Enable SHA1 and SHA256 support (FEAT_SHA1, FEAT_SHA256)", [FeatureNEON]>; + +def FeatureSHA3 : SubtargetFeature< + "sha3", "HasSHA3", "true", + "Enable SHA512 and SHA3 support (FEAT_SHA3, FEAT_SHA512)", [FeatureNEON, FeatureSHA2]>; + +def FeatureAES : SubtargetFeature< + "aes", "HasAES", "true", + "Enable AES support (FEAT_AES, FEAT_PMULL)", [FeatureNEON]>; + +// Crypto has been split up and any combination is now valid (see the +// crypto definitions above). Also, crypto is now context sensitive: +// it has a different meaning for e.g. Armv8.4 than it has for Armv8.2. +// Therefore, we rely on Clang, the user interfacing tool, to pass on the +// appropriate crypto options. But here in the backend, crypto has very little +// meaning anymore. We kept the Crypto definition here for backward +// compatibility, and now imply features SHA2 and AES, which was the +// "traditional" meaning of Crypto. +def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", + "Enable cryptographic instructions", [FeatureNEON, FeatureSHA2, FeatureAES]>; + +def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", + "Enable ARMv8 CRC-32 checksum instructions (FEAT_CRC32)">; + +def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", + "Enable ARMv8 Reliability, Availability and Serviceability Extensions (FEAT_RAS, FEAT_RASv1p1)">; + +def FeatureRASv2 : SubtargetFeature<"rasv2", "HasRASv2", "true", + "Enable ARMv8.9-A Reliability, Availability and Serviceability Extensions (FEAT_RASv2)", + [FeatureRAS]>; + +def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true", + "Enable ARMv8.1 Large System Extension (LSE) atomic instructions (FEAT_LSE)">; + +def FeatureLSE2 : SubtargetFeature<"lse2", "HasLSE2", "true", + "Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules (FEAT_LSE2)">; + +def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true", + "Enable out of line atomics to support LSE instructions">; + +def FeatureFMV : SubtargetFeature<"fmv", "HasFMV", "true", + "Enable Function Multi Versioning support.">; + +def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true", + "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions (FEAT_RDM)", + [FeatureNEON]>; + +def FeaturePAN : SubtargetFeature< + "pan", "HasPAN", "true", + "Enables ARM v8.1 Privileged Access-Never extension (FEAT_PAN)">; + +def FeatureLOR : SubtargetFeature< + "lor", "HasLOR", "true", + "Enables ARM v8.1 Limited Ordering Regions extension (FEAT_LOR)">; + +def FeatureCONTEXTIDREL2 : SubtargetFeature<"CONTEXTIDREL2", "HasCONTEXTIDREL2", + "true", "Enable RW operand CONTEXTIDR_EL2" >; + +def FeatureVH : SubtargetFeature<"vh", "HasVH", "true", + "Enables ARM v8.1 Virtual Host extension (FEAT_VHE)", [FeatureCONTEXTIDREL2] >; + +// This SubtargetFeature is special. It controls only whether codegen will turn +// `llvm.readcyclecounter()` into an access to a PMUv3 System Register. The +// `FEAT_PMUv3*` system registers are always available for assembly/disassembly. +def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", + "Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension (FEAT_PMUv3)">; + +def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", + "Full FP16 (FEAT_FP16)", [FeatureFPARMv8]>; + +def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true", + "Enable FP16 FML instructions (FEAT_FHM)", [FeatureFullFP16]>; + +def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true", + "Enable Statistical Profiling extension (FEAT_SPE)">; + +def FeaturePAN_RWV : SubtargetFeature< + "pan-rwv", "HasPAN_RWV", "true", + "Enable v8.2 PAN s1e1R and s1e1W Variants (FEAT_PAN2)", + [FeaturePAN]>; + +// UAO PState +def FeaturePsUAO : SubtargetFeature< "uaops", "HasPsUAO", "true", + "Enable v8.2 UAO PState (FEAT_UAO)">; + +def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP", + "true", "Enable v8.2 data Cache Clean to Point of Persistence (FEAT_DPB)" >; + +def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true", + "Enable Scalable Vector Extension (SVE) instructions (FEAT_SVE)", [FeatureFullFP16]>; + +def FeatureFPMR : SubtargetFeature<"fpmr", "HasFPMR", "true", + "Enable FPMR Register (FEAT_FPMR)">; + +def FeatureFP8 : SubtargetFeature<"fp8", "HasFP8", "true", + "Enable FP8 instructions (FEAT_FP8)">; + +// This flag is currently still labeled as Experimental, but when fully +// implemented this should tell the compiler to use the zeroing pseudos to +// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive +// lanes are known to be zero. The pseudos will then be expanded using the +// MOVPRFX instruction to zero the inactive lanes. This feature should only be +// enabled if MOVPRFX instructions are known to merge with the destructive +// operations they prefix. +// +// This feature could similarly be extended to support cheap merging of _any_ +// value into the inactive lanes using the MOVPRFX instruction that uses +// merging-predication. +def FeatureExperimentalZeroingPseudos + : SubtargetFeature<"use-experimental-zeroing-pseudos", + "UseExperimentalZeroingPseudos", "true", + "Hint to the compiler that the MOVPRFX instruction is " + "merged with destructive operations", + []>; + +def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl", + "UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">; + +def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", + "true", "Enable BFloat16 Extension (FEAT_BF16)" >; + +def FeatureNoSVEFPLD1R : SubtargetFeature<"no-sve-fp-ld1r", + "NoSVEFPLD1R", "true", "Avoid using LD1RX instructions for FP">; + +def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true", + "Enable Scalable Vector Extension 2 (SVE2) instructions (FEAT_SVE2)", + [FeatureSVE, FeatureUseScalarIncVL]>; + +def FeatureSVE2AES : SubtargetFeature<"sve2-aes", "HasSVE2AES", "true", + "Enable AES SVE2 instructions (FEAT_SVE_AES, FEAT_SVE_PMULL128)", + [FeatureSVE2, FeatureAES]>; + +def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true", + "Enable SM4 SVE2 instructions (FEAT_SVE_SM4)", [FeatureSVE2, FeatureSM4]>; + +def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true", + "Enable SHA3 SVE2 instructions (FEAT_SVE_SHA3)", [FeatureSVE2, FeatureSHA3]>; + +def FeatureSVE2BitPerm : SubtargetFeature<"sve2-bitperm", "HasSVE2BitPerm", "true", + "Enable bit permutation SVE2 instructions (FEAT_SVE_BitPerm)", [FeatureSVE2]>; + +def FeatureSVE2p1: SubtargetFeature<"sve2p1", "HasSVE2p1", "true", + "Enable Scalable Vector Extension 2.1 instructions", [FeatureSVE2]>; + +def FeatureB16B16 : SubtargetFeature<"b16b16", "HasB16B16", "true", + "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions (FEAT_B16B16)", [FeatureBF16]>; + +def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", + "Has zero-cycle register moves">; + +def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true", + "Has zero-cycle zeroing instructions for generic registers">; + +// It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0". +// as movi is more efficient across all cores. Newer cores can eliminate +// fmovs early and there is no difference with movi, but this not true for +// all implementations. +def FeatureNoZCZeroingFP : SubtargetFeature<"no-zcz-fp", "HasZeroCycleZeroingFP", "false", + "Has no zero-cycle zeroing instructions for FP registers">; + +def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", + "Has zero-cycle zeroing instructions", + [FeatureZCZeroingGP]>; + +/// ... but the floating-point version doesn't quite work in rare cases on older +/// CPUs. +def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround", + "HasZeroCycleZeroingFPWorkaround", "true", + "The zero-cycle floating-point zeroing instruction has a bug">; + +def FeatureStrictAlign : SubtargetFeature<"strict-align", + "RequiresStrictAlign", "true", + "Disallow all unaligned memory " + "access">; + +foreach i = {1-7,9-15,18,20-28} in + def FeatureReserveX#i : SubtargetFeature<"reserve-x"#i, "ReserveXRegister["#i#"]", "true", + "Reserve X"#i#", making it unavailable " + "as a GPR">; + +foreach i = {8-15,18} in + def FeatureCallSavedX#i : SubtargetFeature<"call-saved-x"#i, + "CustomCallSavedXRegs["#i#"]", "true", "Make X"#i#" callee saved.">; + +def FeatureBalanceFPOps : SubtargetFeature<"balance-fp-ops", "BalanceFPOps", + "true", + "balance mix of odd and even D-registers for fp multiply(-accumulate) ops">; + +def FeaturePredictableSelectIsExpensive : SubtargetFeature< + "predictable-select-expensive", "PredictableSelectIsExpensive", "true", + "Prefer likely predicted branches over selects">; + +def FeatureEnableSelectOptimize : SubtargetFeature< + "enable-select-opt", "EnableSelectOptimize", "true", + "Enable the select optimize pass for select loop heuristics">; + +def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move", + "HasExynosCheapAsMoveHandling", "true", + "Use Exynos specific handling of cheap instructions">; + +def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler", + "UsePostRAScheduler", "true", "Schedule again after register allocation">; + +def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store", + "IsMisaligned128StoreSlow", "true", "Misaligned 128 bit stores are slow">; + +def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128", + "IsPaired128Slow", "true", "Paired 128 bit loads and stores are slow">; + +def FeatureAscendStoreAddress : SubtargetFeature<"ascend-store-address", + "IsStoreAddressAscend", "true", + "Schedule vector stores by ascending address">; + +def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "IsSTRQroSlow", + "true", "STR of Q register with register offset is slow">; + +def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature< + "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern", + "true", "Use alternative pattern for sextload convert to f32">; + +def FeatureArithmeticBccFusion : SubtargetFeature< + "arith-bcc-fusion", "HasArithmeticBccFusion", "true", + "CPU fuses arithmetic+bcc operations">; + +def FeatureArithmeticCbzFusion : SubtargetFeature< + "arith-cbz-fusion", "HasArithmeticCbzFusion", "true", + "CPU fuses arithmetic + cbz/cbnz operations">; + +def FeatureCmpBccFusion : SubtargetFeature< + "cmp-bcc-fusion", "HasCmpBccFusion", "true", + "CPU fuses cmp+bcc operations">; + +def FeatureFuseAddress : SubtargetFeature< + "fuse-address", "HasFuseAddress", "true", + "CPU fuses address generation and memory operations">; + +def FeatureFuseAES : SubtargetFeature< + "fuse-aes", "HasFuseAES", "true", + "CPU fuses AES crypto operations">; + +def FeatureFuseArithmeticLogic : SubtargetFeature< + "fuse-arith-logic", "HasFuseArithmeticLogic", "true", + "CPU fuses arithmetic and logic operations">; + +def FeatureFuseCCSelect : SubtargetFeature< + "fuse-csel", "HasFuseCCSelect", "true", + "CPU fuses conditional select operations">; + +def FeatureFuseCryptoEOR : SubtargetFeature< + "fuse-crypto-eor", "HasFuseCryptoEOR", "true", + "CPU fuses AES/PMULL and EOR operations">; + +def FeatureFuseAdrpAdd : SubtargetFeature< + "fuse-adrp-add", "HasFuseAdrpAdd", "true", + "CPU fuses adrp+add operations">; + +def FeatureFuseLiterals : SubtargetFeature< + "fuse-literals", "HasFuseLiterals", "true", + "CPU fuses literal generation operations">; + +def FeatureFuseAddSub2RegAndConstOne : SubtargetFeature< + "fuse-addsub-2reg-const1", "HasFuseAddSub2RegAndConstOne", "true", + "CPU fuses (a + b + 1) and (a - b - 1)">; + +def FeatureDisableLatencySchedHeuristic : SubtargetFeature< + "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", + "Disable latency scheduling heuristic">; + +def FeatureStorePairSuppress : SubtargetFeature< + "store-pair-suppress", "EnableStorePairSuppress", "true", + "Enable Store Pair Suppression heuristics">; + +def FeatureForce32BitJumpTables + : SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true", + "Force jump table entries to be 32-bits wide except at MinSize">; + +def FeatureRCPC : SubtargetFeature<"rcpc", "HasRCPC", "true", + "Enable support for RCPC extension (FEAT_LRCPC)">; + +def FeatureUseRSqrt : SubtargetFeature< + "use-reciprocal-square-root", "UseRSqrt", "true", + "Use the reciprocal square root approximation">; + +def FeatureDotProd : SubtargetFeature< + "dotprod", "HasDotProd", "true", + "Enable dot product support (FEAT_DotProd)", [FeatureNEON]>; + +def FeaturePAuth : SubtargetFeature< + "pauth", "HasPAuth", "true", + "Enable v8.3-A Pointer Authentication extension (FEAT_PAuth)">; + +def FeatureJS : SubtargetFeature< + "jsconv", "HasJS", "true", + "Enable v8.3-A JavaScript FP conversion instructions (FEAT_JSCVT)", + [FeatureFPARMv8]>; + +def FeatureCCIDX : SubtargetFeature< + "ccidx", "HasCCIDX", "true", + "Enable v8.3-A Extend of the CCSIDR number of sets (FEAT_CCIDX)">; + +def FeatureComplxNum : SubtargetFeature< + "complxnum", "HasComplxNum", "true", + "Enable v8.3-A Floating-point complex number support (FEAT_FCMA)", + [FeatureNEON]>; + +def FeatureNV : SubtargetFeature< + "nv", "HasNV", "true", + "Enable v8.4-A Nested Virtualization Enchancement (FEAT_NV, FEAT_NV2)">; + +def FeatureMPAM : SubtargetFeature< + "mpam", "HasMPAM", "true", + "Enable v8.4-A Memory system Partitioning and Monitoring extension (FEAT_MPAM)">; + +def FeatureDIT : SubtargetFeature< + "dit", "HasDIT", "true", + "Enable v8.4-A Data Independent Timing instructions (FEAT_DIT)">; + +def FeatureTRACEV8_4 : SubtargetFeature< + "tracev8.4", "HasTRACEV8_4", "true", + "Enable v8.4-A Trace extension (FEAT_TRF)">; + +def FeatureAM : SubtargetFeature< + "am", "HasAM", "true", + "Enable v8.4-A Activity Monitors extension (FEAT_AMUv1)">; + +def FeatureAMVS : SubtargetFeature< + "amvs", "HasAMVS", "true", + "Enable v8.6-A Activity Monitors Virtualization support (FEAT_AMUv1p1)", + [FeatureAM]>; + +def FeatureSEL2 : SubtargetFeature< + "sel2", "HasSEL2", "true", + "Enable v8.4-A Secure Exception Level 2 extension (FEAT_SEL2)">; + +def FeatureTLB_RMI : SubtargetFeature< + "tlb-rmi", "HasTLB_RMI", "true", + "Enable v8.4-A TLB Range and Maintenance Instructions (FEAT_TLBIOS, FEAT_TLBIRANGE)">; + +def FeatureFlagM : SubtargetFeature< + "flagm", "HasFlagM", "true", + "Enable v8.4-A Flag Manipulation Instructions (FEAT_FlagM)">; + +// 8.4 RCPC enchancements: LDAPR & STLR instructions with Immediate Offset +def FeatureRCPC_IMMO : SubtargetFeature<"rcpc-immo", "HasRCPC_IMMO", "true", + "Enable v8.4-A RCPC instructions with Immediate Offsets (FEAT_LRCPC2)", + [FeatureRCPC]>; + +def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", + "NegativeImmediates", "false", + "Convert immediates and instructions " + "to their negated or complemented " + "equivalent when the immediate does " + "not fit in the encoding.">; + +// Address operands with shift amount 2 or 3 are fast on all Arm chips except +// some old Apple cores (A7-A10?) which handle all shifts slowly. Cortex-A57 +// and derived designs through Cortex-X1 take an extra micro-op for shifts +// of 1 or 4. Other Arm chips handle all shifted operands at the same speed +// as unshifted operands. +// +// We don't try to model the behavior of the old Apple cores because new code +// targeting A7 is very unlikely to actually run on an A7. The Cortex cores +// are modeled by FeatureAddrLSLSlow14. +def FeatureAddrLSLSlow14 : SubtargetFeature< + "addr-lsl-slow-14", "HasAddrLSLSlow14", "true", + "Address operands with shift amount of 1 or 4 are slow">; + +def FeatureALULSLFast : SubtargetFeature< + "alu-lsl-fast", "HasALULSLFast", "true", + "Add/Sub operations with lsl shift <= 4 are cheap">; + +def FeatureAggressiveFMA : + SubtargetFeature<"aggressive-fma", + "HasAggressiveFMA", + "true", + "Enable Aggressive FMA for floating-point.">; + +def FeatureAltFPCmp : SubtargetFeature<"altnzcv", "HasAlternativeNZCV", "true", + "Enable alternative NZCV format for floating point comparisons (FEAT_FlagM2)">; + +def FeatureFRInt3264 : SubtargetFeature<"fptoint", "HasFRInt3264", "true", + "Enable FRInt[32|64][Z|X] instructions that round a floating-point number to " + "an integer (in FP format) forcing it to fit into a 32- or 64-bit int (FEAT_FRINTTS)" >; + +def FeatureSpecRestrict : SubtargetFeature<"specrestrict", "HasSpecRestrict", + "true", "Enable architectural speculation restriction (FEAT_CSV2_2)">; + +def FeatureSB : SubtargetFeature<"sb", "HasSB", + "true", "Enable v8.5 Speculation Barrier (FEAT_SB)" >; + +def FeatureSSBS : SubtargetFeature<"ssbs", "HasSSBS", + "true", "Enable Speculative Store Bypass Safe bit (FEAT_SSBS, FEAT_SSBS2)" >; + +def FeaturePredRes : SubtargetFeature<"predres", "HasPredRes", "true", + "Enable v8.5a execution and data prediction invalidation instructions (FEAT_SPECRES)" >; + +def FeatureCacheDeepPersist : SubtargetFeature<"ccdp", "HasCCDP", + "true", "Enable v8.5 Cache Clean to Point of Deep Persistence (FEAT_DPB2)" >; + +def FeatureBranchTargetId : SubtargetFeature<"bti", "HasBTI", + "true", "Enable Branch Target Identification (FEAT_BTI)" >; + +def FeatureRandGen : SubtargetFeature<"rand", "HasRandGen", + "true", "Enable Random Number generation instructions (FEAT_RNG)" >; + +def FeatureMTE : SubtargetFeature<"mte", "HasMTE", + "true", "Enable Memory Tagging Extension (FEAT_MTE, FEAT_MTE2)" >; + +def FeatureTRBE : SubtargetFeature<"trbe", "HasTRBE", + "true", "Enable Trace Buffer Extension (FEAT_TRBE)">; + +def FeatureETE : SubtargetFeature<"ete", "HasETE", + "true", "Enable Embedded Trace Extension (FEAT_ETE)", + [FeatureTRBE]>; + +def FeatureTME : SubtargetFeature<"tme", "HasTME", + "true", "Enable Transactional Memory Extension (FEAT_TME)" >; + +def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals", + "AllowTaggedGlobals", + "true", "Use an instruction sequence for taking the address of a global " + "that allows a memory tag in the upper address bits">; + +def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8", + "true", "Enable Matrix Multiply Int8 Extension (FEAT_I8MM)">; + +def FeatureMatMulFP32 : SubtargetFeature<"f32mm", "HasMatMulFP32", + "true", "Enable Matrix Multiply FP32 Extension (FEAT_F32MM)", [FeatureSVE]>; + +def FeatureMatMulFP64 : SubtargetFeature<"f64mm", "HasMatMulFP64", + "true", "Enable Matrix Multiply FP64 Extension (FEAT_F64MM)", [FeatureSVE]>; + +def FeatureXS : SubtargetFeature<"xs", "HasXS", + "true", "Enable Armv8.7-A limited-TLB-maintenance instruction (FEAT_XS)">; + +def FeatureWFxT : SubtargetFeature<"wfxt", "HasWFxT", + "true", "Enable Armv8.7-A WFET and WFIT instruction (FEAT_WFxT)">; + +def FeatureHCX : SubtargetFeature< + "hcx", "HasHCX", "true", "Enable Armv8.7-A HCRX_EL2 system register (FEAT_HCX)">; + +def FeatureLS64 : SubtargetFeature<"ls64", "HasLS64", + "true", "Enable Armv8.7-A LD64B/ST64B Accelerator Extension (FEAT_LS64, FEAT_LS64_V, FEAT_LS64_ACCDATA)">; + +def FeatureHBC : SubtargetFeature<"hbc", "HasHBC", + "true", "Enable Armv8.8-A Hinted Conditional Branches Extension (FEAT_HBC)">; + +def FeatureMOPS : SubtargetFeature<"mops", "HasMOPS", + "true", "Enable Armv8.8-A memcpy and memset acceleration instructions (FEAT_MOPS)">; + +def FeatureNMI : SubtargetFeature<"nmi", "HasNMI", + "true", "Enable Armv8.8-A Non-maskable Interrupts (FEAT_NMI, FEAT_GICv3_NMI)">; + +def FeatureBRBE : SubtargetFeature<"brbe", "HasBRBE", + "true", "Enable Branch Record Buffer Extension (FEAT_BRBE)">; + +def FeatureSPE_EEF : SubtargetFeature<"spe-eef", "HasSPE_EEF", + "true", "Enable extra register in the Statistical Profiling Extension (FEAT_SPEv1p2)">; + +def FeatureFineGrainedTraps : SubtargetFeature<"fgt", "HasFineGrainedTraps", + "true", "Enable fine grained virtualization traps extension (FEAT_FGT)">; + +def FeatureEnhancedCounterVirtualization : + SubtargetFeature<"ecv", "HasEnhancedCounterVirtualization", + "true", "Enable enhanced counter virtualization extension (FEAT_ECV)">; + +def FeatureRME : SubtargetFeature<"rme", "HasRME", + "true", "Enable Realm Management Extension (FEAT_RME)">; + +def FeatureSME : SubtargetFeature<"sme", "HasSME", "true", + "Enable Scalable Matrix Extension (SME) (FEAT_SME)", [FeatureBF16, FeatureUseScalarIncVL]>; + +def FeatureSMEF64F64 : SubtargetFeature<"sme-f64f64", "HasSMEF64F64", "true", + "Enable Scalable Matrix Extension (SME) F64F64 instructions (FEAT_SME_F64F64)", [FeatureSME]>; + +def FeatureSMEI16I64 : SubtargetFeature<"sme-i16i64", "HasSMEI16I64", "true", + "Enable Scalable Matrix Extension (SME) I16I64 instructions (FEAT_SME_I16I64)", [FeatureSME]>; + +def FeatureSMEF16F16 : SubtargetFeature<"sme-f16f16", "HasSMEF16F16", "true", + "Enable SME2.1 non-widening Float16 instructions (FEAT_SME_F16F16)", []>; + +def FeatureSMEFA64 : SubtargetFeature<"sme-fa64", "HasSMEFA64", "true", + "Enable the full A64 instruction set in streaming SVE mode (FEAT_SME_FA64)", [FeatureSME, FeatureSVE2]>; + +def FeatureSME2 : SubtargetFeature<"sme2", "HasSME2", "true", + "Enable Scalable Matrix Extension 2 (SME2) instructions", [FeatureSME]>; + +def FeatureSME2p1 : SubtargetFeature<"sme2p1", "HasSME2p1", "true", + "Enable Scalable Matrix Extension 2.1 (FEAT_SME2p1) instructions", [FeatureSME2]>; + +def FeatureFAMINMAX: SubtargetFeature<"faminmax", "HasFAMINMAX", "true", + "Enable FAMIN and FAMAX instructions (FEAT_FAMINMAX)">; + +def FeatureFP8FMA : SubtargetFeature<"fp8fma", "HasFP8FMA", "true", + "Enable fp8 multiply-add instructions (FEAT_FP8FMA)">; + +def FeatureSSVE_FP8FMA : SubtargetFeature<"ssve-fp8fma", "HasSSVE_FP8FMA", "true", + "Enable SVE2 fp8 multiply-add instructions (FEAT_SSVE_FP8FMA)", [FeatureSME2]>; + +def FeatureFP8DOT2: SubtargetFeature<"fp8dot2", "HasFP8DOT2", "true", + "Enable fp8 2-way dot instructions (FEAT_FP8DOT2)">; + +def FeatureSSVE_FP8DOT2 : SubtargetFeature<"ssve-fp8dot2", "HasSSVE_FP8DOT2", "true", + "Enable SVE2 fp8 2-way dot product instructions (FEAT_SSVE_FP8DOT2)", [FeatureSME2]>; + +def FeatureFP8DOT4: SubtargetFeature<"fp8dot4", "HasFP8DOT4", "true", + "Enable fp8 4-way dot instructions (FEAT_FP8DOT4)">; + +def FeatureSSVE_FP8DOT4 : SubtargetFeature<"ssve-fp8dot4", "HasSSVE_FP8DOT4", "true", + "Enable SVE2 fp8 4-way dot product instructions (FEAT_SSVE_FP8DOT4)", [FeatureSME2]>; +def FeatureLUT: SubtargetFeature<"lut", "HasLUT", "true", + "Enable Lookup Table instructions (FEAT_LUT)">; + +def FeatureSME_LUTv2 : SubtargetFeature<"sme-lutv2", "HasSME_LUTv2", "true", + "Enable Scalable Matrix Extension (SME) LUTv2 instructions (FEAT_SME_LUTv2)">; + +def FeatureSMEF8F16 : SubtargetFeature<"sme-f8f16", "HasSMEF8F16", "true", + "Enable Scalable Matrix Extension (SME) F8F16 instructions(FEAT_SME_F8F16)", [FeatureSME2, FeatureFP8]>; + +def FeatureSMEF8F32 : SubtargetFeature<"sme-f8f32", "HasSMEF8F32", "true", + "Enable Scalable Matrix Extension (SME) F8F32 instructions (FEAT_SME_F8F32)", [FeatureSME2, FeatureFP8]>; + +def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true", + "Apple A7 (the CPU formerly known as Cyclone)">; + +def FeatureEL2VMSA : SubtargetFeature<"el2vmsa", "HasEL2VMSA", "true", + "Enable Exception Level 2 Virtual Memory System Architecture">; + +def FeatureEL3 : SubtargetFeature<"el3", "HasEL3", "true", + "Enable Exception Level 3">; + +def FeatureCSSC : SubtargetFeature<"cssc", "HasCSSC", "true", + "Enable Common Short Sequence Compression (CSSC) instructions (FEAT_CSSC)">; + +def FeatureFixCortexA53_835769 : SubtargetFeature<"fix-cortex-a53-835769", + "FixCortexA53_835769", "true", "Mitigate Cortex-A53 Erratum 835769">; + +def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice", + "NoBTIAtReturnTwice", "true", + "Don't place a BTI instruction " + "after a return-twice">; + +def FeatureCHK : SubtargetFeature<"chk", "HasCHK", + "true", "Enable Armv8.0-A Check Feature Status Extension (FEAT_CHK)">; + +def FeatureGCS : SubtargetFeature<"gcs", "HasGCS", + "true", "Enable Armv9.4-A Guarded Call Stack Extension", [FeatureCHK]>; + +def FeatureCLRBHB : SubtargetFeature<"clrbhb", "HasCLRBHB", + "true", "Enable Clear BHB instruction (FEAT_CLRBHB)">; + +def FeaturePRFM_SLC : SubtargetFeature<"prfm-slc-target", "HasPRFM_SLC", + "true", "Enable SLC target for PRFM instruction">; + +def FeatureSPECRES2 : SubtargetFeature<"specres2", "HasSPECRES2", + "true", "Enable Speculation Restriction Instruction (FEAT_SPECRES2)", + [FeaturePredRes]>; + +def FeatureMEC : SubtargetFeature<"mec", "HasMEC", + "true", "Enable Memory Encryption Contexts Extension", [FeatureRME]>; + +def FeatureITE : SubtargetFeature<"ite", "HasITE", + "true", "Enable Armv9.4-A Instrumentation Extension FEAT_ITE", [FeatureETE, + FeatureTRBE]>; + +def FeatureRCPC3 : SubtargetFeature<"rcpc3", "HasRCPC3", + "true", "Enable Armv8.9-A RCPC instructions for A64 and Advanced SIMD and floating-point instruction set (FEAT_LRCPC3)", + [FeatureRCPC_IMMO]>; + +def FeatureTHE : SubtargetFeature<"the", "HasTHE", + "true", "Enable Armv8.9-A Translation Hardening Extension (FEAT_THE)">; + +def FeatureLSE128 : SubtargetFeature<"lse128", "HasLSE128", + "true", "Enable Armv9.4-A 128-bit Atomic Instructions (FEAT_LSE128)", + [FeatureLSE]>; + +// FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, and FEAT_SYSINSTR128 are mutually implicit. +// Therefore group them all under a single feature flag, d128: +def FeatureD128 : SubtargetFeature<"d128", "HasD128", + "true", "Enable Armv9.4-A 128-bit Page Table Descriptors, System Registers " + "and Instructions (FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128)", + [FeatureLSE128]>; + +def FeatureDisableLdp : SubtargetFeature<"disable-ldp", "HasDisableLdp", + "true", "Do not emit ldp">; + +def FeatureDisableStp : SubtargetFeature<"disable-stp", "HasDisableStp", + "true", "Do not emit stp">; + +def FeatureLdpAlignedOnly : SubtargetFeature<"ldp-aligned-only", "HasLdpAlignedOnly", + "true", "In order to emit ldp, first check if the load will be aligned to 2 * element_size">; + +def FeatureStpAlignedOnly : SubtargetFeature<"stp-aligned-only", "HasStpAlignedOnly", + "true", "In order to emit stp, first check if the store will be aligned to 2 * element_size">; + +// AArch64 2023 Architecture Extensions (v9.5-A) + +def FeatureCPA : SubtargetFeature<"cpa", "HasCPA", "true", + "Enable Armv9.5-A Checked Pointer Arithmetic (FEAT_CPA)">; + +def FeaturePAuthLR : SubtargetFeature<"pauth-lr", "HasPAuthLR", + "true", "Enable Armv9.5-A PAC enhancements (FEAT_PAuth_LR)">; + +def FeatureTLBIW : SubtargetFeature<"tlbiw", "HasTLBIW", "true", + "Enable ARMv9.5-A TLBI VMALL for Dirty State (FEAT_TLBIW)">; + + +//===----------------------------------------------------------------------===// +// Architectures. +// +def HasV8_0aOps : SubtargetFeature<"v8a", "HasV8_0aOps", "true", + "Support ARM v8.0a instructions", [FeatureEL2VMSA, FeatureEL3]>; + +def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", + "Support ARM v8.1a instructions", [HasV8_0aOps, FeatureCRC, FeatureLSE, + FeatureRDM, FeaturePAN, FeatureLOR, FeatureVH]>; + +def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", + "Support ARM v8.2a instructions", [HasV8_1aOps, FeaturePsUAO, + FeaturePAN_RWV, FeatureRAS, FeatureCCPP]>; + +def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true", + "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC, FeaturePAuth, + FeatureJS, FeatureCCIDX, FeatureComplxNum]>; + +def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true", + "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd, + FeatureNV, FeatureMPAM, FeatureDIT, + FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeatureTLB_RMI, + FeatureFlagM, FeatureRCPC_IMMO, FeatureLSE2]>; + +def HasV8_5aOps : SubtargetFeature< + "v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions", + [HasV8_4aOps, FeatureAltFPCmp, FeatureFRInt3264, FeatureSpecRestrict, + FeatureSSBS, FeatureSB, FeaturePredRes, FeatureCacheDeepPersist, + FeatureBranchTargetId]>; + +def HasV8_6aOps : SubtargetFeature< + "v8.6a", "HasV8_6aOps", "true", "Support ARM v8.6a instructions", + [HasV8_5aOps, FeatureAMVS, FeatureBF16, FeatureFineGrainedTraps, + FeatureEnhancedCounterVirtualization, FeatureMatMulInt8]>; + +def HasV8_7aOps : SubtargetFeature< + "v8.7a", "HasV8_7aOps", "true", "Support ARM v8.7a instructions", + [HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX]>; + +def HasV8_8aOps : SubtargetFeature< + "v8.8a", "HasV8_8aOps", "true", "Support ARM v8.8a instructions", + [HasV8_7aOps, FeatureHBC, FeatureMOPS, FeatureNMI]>; + +def HasV8_9aOps : SubtargetFeature< + "v8.9a", "HasV8_9aOps", "true", "Support ARM v8.9a instructions", + [HasV8_8aOps, FeatureCLRBHB, FeaturePRFM_SLC, FeatureSPECRES2, + FeatureCSSC, FeatureRASv2, FeatureCHK]>; + +def HasV9_0aOps : SubtargetFeature< + "v9a", "HasV9_0aOps", "true", "Support ARM v9a instructions", + [HasV8_5aOps, FeatureMEC, FeatureSVE2]>; + +def HasV9_1aOps : SubtargetFeature< + "v9.1a", "HasV9_1aOps", "true", "Support ARM v9.1a instructions", + [HasV8_6aOps, HasV9_0aOps]>; + +def HasV9_2aOps : SubtargetFeature< + "v9.2a", "HasV9_2aOps", "true", "Support ARM v9.2a instructions", + [HasV8_7aOps, HasV9_1aOps]>; + +def HasV9_3aOps : SubtargetFeature< + "v9.3a", "HasV9_3aOps", "true", "Support ARM v9.3a instructions", + [HasV8_8aOps, HasV9_2aOps]>; + +def HasV9_4aOps : SubtargetFeature< + "v9.4a", "HasV9_4aOps", "true", "Support ARM v9.4a instructions", + [HasV8_9aOps, HasV9_3aOps]>; + +def HasV9_5aOps : SubtargetFeature< + "v9.5a", "HasV9_5aOps", "true", "Support ARM v9.5a instructions", + [HasV9_4aOps, FeatureCPA]>; + +def HasV8_0rOps : SubtargetFeature< + "v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions", + [//v8.1 + FeatureCRC, FeaturePAN, FeatureLSE, FeatureCONTEXTIDREL2, + //v8.2 + FeatureRAS, FeaturePsUAO, FeatureCCPP, FeaturePAN_RWV, + //v8.3 + FeatureCCIDX, FeaturePAuth, FeatureRCPC, + //v8.4 + FeatureTRACEV8_4, FeatureTLB_RMI, FeatureFlagM, FeatureDIT, FeatureSEL2, + FeatureRCPC_IMMO, + // Not mandatory in v8.0-R, but included here on the grounds that it + // only enables names of system registers + FeatureSpecRestrict + ]>; + +//===----------------------------------------------------------------------===// +// Access to privileged registers +//===----------------------------------------------------------------------===// + +foreach i = 1-3 in +def FeatureUseEL#i#ForTP : SubtargetFeature<"tpidr-el"#i, "UseEL"#i#"ForTP", + "true", "Permit use of TPIDR_EL"#i#" for the TLS base">; +def FeatureUseROEL0ForTP : SubtargetFeature<"tpidrro-el0", "UseROEL0ForTP", + "true", "Permit use of TPIDRRO_EL0 for the TLS base">; + +//===----------------------------------------------------------------------===// +// Control codegen mitigation against Straight Line Speculation vulnerability. +//===----------------------------------------------------------------------===// + +def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr", + "HardenSlsRetBr", "true", + "Harden against straight line speculation across RET and BR instructions">; +def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr", + "HardenSlsBlr", "true", + "Harden against straight line speculation across BLR instructions">; +def FeatureHardenSlsNoComdat : SubtargetFeature<"harden-sls-nocomdat", + "HardenSlsNoComdat", "true", + "Generate thunk code for SLS mitigation in the normal text section">; + + +// Only intended to be used by disassemblers. +def FeatureAll + : SubtargetFeature<"all", "IsAll", "true", "Enable all instructions", []>; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index ebddbefeb94f94..fb18d1c63ae80d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -10,9 +10,14 @@ // //===----------------------------------------------------------------------===// + //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // + +class AssemblerPredicateWithAll + : AssemblerPredicate<(any_of FeatureAll, cond), name>; + def HasV8_0a : Predicate<"Subtarget->hasV8_0aOps()">, AssemblerPredicate<(all_of HasV8_0aOps), "armv8.0a">; def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td new file mode 100644 index 00000000000000..c50a8200dd898d --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64Processors.td @@ -0,0 +1,931 @@ +//=- AArch64Processors.td - Describe AArch64 Processors ------*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// AArch64 Processor subtarget features. +//===----------------------------------------------------------------------===// + + +def TuneA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", + "Cortex-A35 ARM processors">; + +def TuneA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", + "Cortex-A53 ARM processors", [ + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureBalanceFPOps, + FeaturePostRAScheduler]>; + +def TuneA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55", + "Cortex-A55 ARM processors", [ + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeaturePostRAScheduler, + FeatureFuseAddress]>; + +def TuneA510 : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510", + "Cortex-A510 ARM processors", [ + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeaturePostRAScheduler + ]>; + +def TuneA520 : SubtargetFeature<"a520", "ARMProcFamily", "CortexA520", + "Cortex-A520 ARM processors", [ + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeaturePostRAScheduler]>; + +def TuneA520AE : SubtargetFeature<"a520ae", "ARMProcFamily", "CortexA520", + "Cortex-A520AE ARM processors", [ + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeaturePostRAScheduler]>; + +def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", + "Cortex-A57 ARM processors", [ + FeatureFuseAES, + FeatureBalanceFPOps, + FeatureFuseAdrpAdd, + FeatureFuseLiterals, + FeatureAddrLSLSlow14, + FeaturePostRAScheduler, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65", + "Cortex-A65 ARM processors", [ + FeatureFuseAES, + FeatureFuseAddress, + FeatureFuseAdrpAdd, + FeatureFuseLiterals, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", + "Cortex-A72 ARM processors", [ + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureFuseLiterals, + FeatureAddrLSLSlow14, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", + "Cortex-A73 ARM processors", [ + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureAddrLSLSlow14, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", + "Cortex-A75 ARM processors", [ + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureAddrLSLSlow14, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76", + "Cortex-A76 ARM processors", [ + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureAddrLSLSlow14, + FeatureALULSLFast, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77", + "Cortex-A77 ARM processors", [ + FeatureCmpBccFusion, + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureAddrLSLSlow14, + FeatureALULSLFast, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78", + "Cortex-A78 ARM processors", [ + FeatureCmpBccFusion, + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureAddrLSLSlow14, + FeatureALULSLFast, + FeaturePostRAScheduler, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneA78AE : SubtargetFeature<"a78ae", "ARMProcFamily", + "CortexA78AE", + "Cortex-A78AE ARM processors", [ + FeatureCmpBccFusion, + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureAddrLSLSlow14, + FeatureALULSLFast, + FeaturePostRAScheduler, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily", + "CortexA78C", + "Cortex-A78C ARM processors", [ + FeatureCmpBccFusion, + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureAddrLSLSlow14, + FeatureALULSLFast, + FeaturePostRAScheduler, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710", + "Cortex-A710 ARM processors", [ + FeatureCmpBccFusion, + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureALULSLFast, + FeaturePostRAScheduler, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715", + "Cortex-A715 ARM processors", [ + FeatureFuseAES, + FeaturePostRAScheduler, + FeatureCmpBccFusion, + FeatureALULSLFast, + FeatureFuseAdrpAdd, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneA720 : SubtargetFeature<"a720", "ARMProcFamily", "CortexA720", + "Cortex-A720 ARM processors", [ + FeatureFuseAES, + FeaturePostRAScheduler, + FeatureCmpBccFusion, + FeatureALULSLFast, + FeatureFuseAdrpAdd, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneA720AE : SubtargetFeature<"a720ae", "ARMProcFamily", "CortexA720", + "Cortex-A720AE ARM processors", [ + FeatureFuseAES, + FeaturePostRAScheduler, + FeatureCmpBccFusion, + FeatureALULSLFast, + FeatureFuseAdrpAdd, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily", + "CortexR82", + "Cortex-R82 ARM processors", [ + FeaturePostRAScheduler]>; + +def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1", + "Cortex-X1 ARM processors", [ + FeatureCmpBccFusion, + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureAddrLSLSlow14, + FeatureALULSLFast, + FeaturePostRAScheduler, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2", + "Cortex-X2 ARM processors", [ + FeatureCmpBccFusion, + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureALULSLFast, + FeaturePostRAScheduler, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3", + "Cortex-X3 ARM processors", [ + FeatureALULSLFast, + FeatureFuseAdrpAdd, + FeatureFuseAES, + FeaturePostRAScheduler, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneX4 : SubtargetFeature<"cortex-x4", "ARMProcFamily", "CortexX4", + "Cortex-X4 ARM processors", [ + FeatureALULSLFast, + FeatureFuseAdrpAdd, + FeatureFuseAES, + FeaturePostRAScheduler, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX", + "Fujitsu A64FX processors", [ + FeaturePostRAScheduler, + FeatureAggressiveFMA, + FeatureArithmeticBccFusion, + FeatureStorePairSuppress, + FeaturePredictableSelectIsExpensive]>; + +def TuneCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel", + "Nvidia Carmel processors">; + +// Note that cyclone does not fuse AES instructions, but newer apple chips do +// perform the fusion and cyclone is used by default when targetting apple OSes. +def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7", + "Apple A7 (the CPU formerly known as Cyclone)", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureDisableLatencySchedHeuristic, + FeatureFuseAES, FeatureFuseCryptoEOR, + FeatureStorePairSuppress, + FeatureZCRegMove, + FeatureZCZeroing, + FeatureZCZeroingFPWorkaround]>; + +def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10", + "Apple A10", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureDisableLatencySchedHeuristic, + FeatureFuseAES, + FeatureFuseCryptoEOR, + FeatureStorePairSuppress, + FeatureZCRegMove, + FeatureZCZeroing]>; + +def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11", + "Apple A11", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureDisableLatencySchedHeuristic, + FeatureFuseAES, + FeatureFuseCryptoEOR, + FeatureStorePairSuppress, + FeatureZCRegMove, + FeatureZCZeroing]>; + +def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12", + "Apple A12", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureDisableLatencySchedHeuristic, + FeatureFuseAES, + FeatureFuseCryptoEOR, + FeatureStorePairSuppress, + FeatureZCRegMove, + FeatureZCZeroing]>; + +def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13", + "Apple A13", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureDisableLatencySchedHeuristic, + FeatureFuseAES, + FeatureFuseCryptoEOR, + FeatureStorePairSuppress, + FeatureZCRegMove, + FeatureZCZeroing]>; + +def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14", + "Apple A14", [ + FeatureAggressiveFMA, + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureDisableLatencySchedHeuristic, + FeatureFuseAddress, + FeatureFuseAES, + FeatureFuseArithmeticLogic, + FeatureFuseCCSelect, + FeatureFuseCryptoEOR, + FeatureFuseLiterals, + FeatureStorePairSuppress, + FeatureZCRegMove, + FeatureZCZeroing]>; + +def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15", + "Apple A15", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureDisableLatencySchedHeuristic, + FeatureFuseAddress, + FeatureFuseAES, + FeatureFuseArithmeticLogic, + FeatureFuseCCSelect, + FeatureFuseCryptoEOR, + FeatureFuseLiterals, + FeatureStorePairSuppress, + FeatureZCRegMove, + FeatureZCZeroing]>; + +def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16", + "Apple A16", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureDisableLatencySchedHeuristic, + FeatureFuseAddress, + FeatureFuseAdrpAdd, + FeatureFuseAES, + FeatureFuseArithmeticLogic, + FeatureFuseCCSelect, + FeatureFuseCryptoEOR, + FeatureFuseLiterals, + FeatureStorePairSuppress, + FeatureZCRegMove, + FeatureZCZeroing]>; + +def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17", + "Apple A17", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureDisableLatencySchedHeuristic, + FeatureFuseAddress, + FeatureFuseAdrpAdd, + FeatureFuseAES, + FeatureFuseArithmeticLogic, + FeatureFuseCCSelect, + FeatureFuseCryptoEOR, + FeatureFuseLiterals, + FeatureStorePairSuppress, + FeatureZCRegMove, + FeatureZCZeroing]>; + +def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", + "Samsung Exynos-M3 processors", + [FeatureExynosCheapAsMoveHandling, + FeatureForce32BitJumpTables, + FeatureFuseAddress, + FeatureFuseAES, + FeatureFuseCCSelect, + FeatureFuseAdrpAdd, + FeatureFuseLiterals, + FeatureStorePairSuppress, + FeatureALULSLFast, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive]>; + +// Re-uses some scheduling and tunings from the ExynosM3 proc family. +def TuneExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3", + "Samsung Exynos-M4 processors", + [FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureExynosCheapAsMoveHandling, + FeatureForce32BitJumpTables, + FeatureFuseAddress, + FeatureFuseAES, + FeatureFuseArithmeticLogic, + FeatureFuseCCSelect, + FeatureFuseAdrpAdd, + FeatureFuseLiterals, + FeatureStorePairSuppress, + FeatureALULSLFast, + FeaturePostRAScheduler, + FeatureZCZeroing]>; + +def TuneKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", + "Qualcomm Kryo processors", [ + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureZCZeroing, + FeatureALULSLFast, + FeatureStorePairSuppress]>; + +def TuneFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", + "Qualcomm Falkor processors", [ + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureZCZeroing, + FeatureStorePairSuppress, + FeatureALULSLFast, + FeatureSlowSTRQro]>; + +def TuneNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily", "NeoverseE1", + "Neoverse E1 ARM processors", [ + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeaturePostRAScheduler]>; + +def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1", + "Neoverse N1 ARM processors", [ + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureAddrLSLSlow14, + FeatureALULSLFast, + FeaturePostRAScheduler, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2", + "Neoverse N2 ARM processors", [ + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureALULSLFast, + FeaturePostRAScheduler, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Neoverse512TVB", + "Neoverse 512-TVB ARM processors", [ + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureALULSLFast, + FeaturePostRAScheduler, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1", + "Neoverse V1 ARM processors", [ + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureAddrLSLSlow14, + FeatureALULSLFast, + FeaturePostRAScheduler, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive, + FeatureNoSVEFPLD1R]>; + +def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2", + "Neoverse V2 ARM processors", [ + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureALULSLFast, + FeaturePostRAScheduler, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira", + "Qualcomm Saphira processors", [ + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureZCZeroing, + FeatureStorePairSuppress, + FeatureALULSLFast]>; + +def TuneThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", "ThunderX2T99", + "Cavium ThunderX2 processors", [ + FeatureAggressiveFMA, + FeatureArithmeticBccFusion, + FeaturePostRAScheduler, + FeatureStorePairSuppress, + FeaturePredictableSelectIsExpensive]>; + +def TuneThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily", + "ThunderX3T110", + "Marvell ThunderX3 processors", [ + FeatureAggressiveFMA, + FeatureArithmeticBccFusion, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureBalanceFPOps, + FeatureStorePairSuppress, + FeatureStrictAlign]>; + +def TuneThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX", + "Cavium ThunderX processors", [ + FeaturePostRAScheduler, + FeatureStorePairSuppress, + FeaturePredictableSelectIsExpensive]>; + +def TuneThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily", + "ThunderXT88", + "Cavium ThunderX processors", [ + FeaturePostRAScheduler, + FeatureStorePairSuppress, + FeaturePredictableSelectIsExpensive]>; + +def TuneThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily", + "ThunderXT81", + "Cavium ThunderX processors", [ + FeaturePostRAScheduler, + FeatureStorePairSuppress, + FeaturePredictableSelectIsExpensive]>; + +def TuneThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily", + "ThunderXT83", + "Cavium ThunderX processors", [ + FeaturePostRAScheduler, + FeatureStorePairSuppress, + FeaturePredictableSelectIsExpensive]>; + +def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110", + "HiSilicon TS-V110 processors", [ + FeatureFuseAES, + FeatureStorePairSuppress, + FeaturePostRAScheduler]>; + +def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1", + "Ampere Computing Ampere-1 processors", [ + FeaturePostRAScheduler, + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureALULSLFast, + FeatureAggressiveFMA, + FeatureArithmeticBccFusion, + FeatureCmpBccFusion, + FeatureFuseAddress, + FeatureFuseLiterals, + FeatureStorePairSuppress, + FeatureLdpAlignedOnly, + FeatureStpAlignedOnly]>; + +def TuneAmpere1A : SubtargetFeature<"ampere1a", "ARMProcFamily", "Ampere1A", + "Ampere Computing Ampere-1A processors", [ + FeaturePostRAScheduler, + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureALULSLFast, + FeatureAggressiveFMA, + FeatureArithmeticBccFusion, + FeatureCmpBccFusion, + FeatureFuseAddress, + FeatureFuseLiterals, + FeatureFuseAddSub2RegAndConstOne, + FeatureStorePairSuppress, + FeatureLdpAlignedOnly, + FeatureStpAlignedOnly]>; + +def TuneAmpere1B : SubtargetFeature<"ampere1b", "ARMProcFamily", "Ampere1B", + "Ampere Computing Ampere-1B processors", [ + FeaturePostRAScheduler, + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureALULSLFast, + FeatureAggressiveFMA, + FeatureArithmeticBccFusion, + FeatureCmpBccFusion, + FeatureFuseAddress, + FeatureFuseLiterals, + FeatureStorePairSuppress, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive, + FeatureLdpAlignedOnly, + FeatureStpAlignedOnly]>; + + +def ProcessorFeatures { + list A53 = [HasV8_0aOps, FeatureCRC, FeatureCrypto, + FeatureFPARMv8, FeatureNEON, FeaturePerfMon]; + list A55 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureFullFP16, FeatureDotProd, + FeatureRCPC, FeaturePerfMon]; + list A510 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, + FeatureMatMulInt8, FeatureBF16, FeatureAM, + FeatureMTE, FeatureETE, FeatureSVE2BitPerm, + FeatureFP16FML]; + list A520 = [HasV9_2aOps, FeaturePerfMon, FeatureAM, + FeatureMTE, FeatureETE, FeatureSVE2BitPerm, + FeatureFP16FML]; + list A520AE = [HasV9_2aOps, FeaturePerfMon, FeatureAM, + FeatureMTE, FeatureETE, FeatureSVE2BitPerm, + FeatureFP16FML]; + list A65 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureFullFP16, FeatureDotProd, + FeatureRCPC, FeatureSSBS, FeatureRAS, + FeaturePerfMon]; + list A76 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureFullFP16, FeatureDotProd, + FeatureRCPC, FeatureSSBS, FeaturePerfMon]; + list A77 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureFullFP16, FeatureDotProd, + FeatureRCPC, FeaturePerfMon, FeatureSSBS]; + list A78 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureFullFP16, FeatureDotProd, + FeatureRCPC, FeaturePerfMon, FeatureSPE, + FeatureSSBS]; + list A78AE = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureFullFP16, FeatureDotProd, + FeatureRCPC, FeaturePerfMon, FeatureSPE, + FeatureSSBS]; + list A78C = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureFullFP16, FeatureDotProd, + FeatureFlagM, FeaturePAuth, + FeaturePerfMon, FeatureRCPC, FeatureSPE, + FeatureSSBS]; + list A710 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, + FeatureETE, FeatureMTE, FeatureFP16FML, + FeatureSVE2BitPerm, FeatureBF16, FeatureMatMulInt8]; + list A715 = [HasV9_0aOps, FeatureNEON, FeatureMTE, + FeatureFP16FML, FeatureSVE, FeatureTRBE, + FeatureSVE2BitPerm, FeatureBF16, FeatureETE, + FeaturePerfMon, FeatureMatMulInt8, FeatureSPE]; + list A720 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML, + FeatureTRBE, FeatureSVE2BitPerm, FeatureETE, + FeaturePerfMon, FeatureSPE, FeatureSPE_EEF]; + list A720AE = [HasV9_2aOps, FeatureMTE, FeatureFP16FML, + FeatureTRBE, FeatureSVE2BitPerm, FeatureETE, + FeaturePerfMon, FeatureSPE, FeatureSPE_EEF]; + list R82 = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16, + FeatureFP16FML, FeatureSSBS, FeaturePredRes, + FeatureSB, FeatureRDM, FeatureDotProd, + FeatureComplxNum, FeatureJS]; + list X1 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureRCPC, FeaturePerfMon, + FeatureSPE, FeatureFullFP16, FeatureDotProd, + FeatureSSBS]; + list X1C = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureRCPC_IMMO, FeaturePerfMon, + FeatureSPE, FeatureFullFP16, FeatureDotProd, + FeaturePAuth, FeatureSSBS, FeatureFlagM, + FeatureLSE2]; + list X2 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, + FeatureMatMulInt8, FeatureBF16, FeatureAM, + FeatureMTE, FeatureETE, FeatureSVE2BitPerm, + FeatureFP16FML]; + list X3 = [HasV9_0aOps, FeatureSVE, FeatureNEON, + FeaturePerfMon, FeatureETE, FeatureTRBE, + FeatureSPE, FeatureBF16, FeatureMatMulInt8, + FeatureMTE, FeatureSVE2BitPerm, FeatureFullFP16, + FeatureFP16FML]; + list X4 = [HasV9_2aOps, + FeaturePerfMon, FeatureETE, FeatureTRBE, + FeatureSPE, FeatureMTE, FeatureSVE2BitPerm, + FeatureFP16FML, FeatureSPE_EEF]; + list A64FX = [HasV8_2aOps, FeatureFPARMv8, FeatureNEON, + FeatureSHA2, FeaturePerfMon, FeatureFullFP16, + FeatureSVE, FeatureComplxNum]; + list Carmel = [HasV8_2aOps, FeatureNEON, FeatureCrypto, + FeatureFullFP16]; + list AppleA7 = [HasV8_0aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON,FeaturePerfMon, FeatureAppleA7SysReg]; + list AppleA10 = [HasV8_0aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureCRC, + FeatureRDM, FeaturePAN, FeatureLOR, FeatureVH]; + list AppleA11 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureFullFP16]; + list AppleA12 = [HasV8_3aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureFullFP16]; + list AppleA13 = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureFullFP16, + FeatureFP16FML, FeatureSHA3]; + list AppleA14 = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureFRInt3264, + FeatureSpecRestrict, FeatureSSBS, FeatureSB, + FeaturePredRes, FeatureCacheDeepPersist, + FeatureFullFP16, FeatureFP16FML, FeatureSHA3, + FeatureAltFPCmp]; + list AppleA15 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureSHA3, + FeatureFullFP16, FeatureFP16FML]; + list AppleA16 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureSHA3, + FeatureFullFP16, FeatureFP16FML, + FeatureHCX]; + list AppleA17 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureSHA3, + FeatureFullFP16, FeatureFP16FML, + FeatureHCX]; + list ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureCrypto, + FeaturePerfMon]; + list ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd, + FeatureFullFP16, FeaturePerfMon]; + list Falkor = [HasV8_0aOps, FeatureCRC, FeatureCrypto, + FeatureFPARMv8, FeatureNEON, FeaturePerfMon, + FeatureRDM]; + list NeoverseE1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd, + FeatureFPARMv8, FeatureFullFP16, FeatureNEON, + FeatureRCPC, FeatureSSBS, FeaturePerfMon]; + list NeoverseN1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd, + FeatureFPARMv8, FeatureFullFP16, FeatureNEON, + FeatureRCPC, FeatureSPE, FeatureSSBS, + FeaturePerfMon]; + list NeoverseN2 = [HasV9_0aOps, FeatureBF16, FeatureETE, FeatureFP16FML, + FeatureMatMulInt8, FeatureMTE, FeatureSVE2, + FeatureSVE2BitPerm, FeatureTRBE, + FeaturePerfMon]; + list Neoverse512TVB = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist, + FeatureCrypto, FeatureFPARMv8, FeatureFP16FML, + FeatureFullFP16, FeatureMatMulInt8, FeatureNEON, + FeaturePerfMon, FeatureRandGen, FeatureSPE, + FeatureSSBS, FeatureSVE]; + list NeoverseV1 = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist, + FeatureCrypto, FeatureFPARMv8, FeatureFP16FML, + FeatureFullFP16, FeatureMatMulInt8, FeatureNEON, + FeaturePerfMon, FeatureRandGen, FeatureSPE, + FeatureSSBS, FeatureSVE]; + list NeoverseV2 = [HasV9_0aOps, FeatureBF16, FeatureSPE, + FeaturePerfMon, FeatureETE, FeatureMatMulInt8, + FeatureNEON, FeatureSVE2BitPerm, FeatureFP16FML, + FeatureMTE, FeatureRandGen]; + list Saphira = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureSPE, FeaturePerfMon]; + list ThunderX = [HasV8_0aOps, FeatureCRC, FeatureCrypto, + FeatureFPARMv8, FeaturePerfMon, FeatureNEON]; + list ThunderX2T99 = [HasV8_1aOps, FeatureCRC, FeatureCrypto, + FeatureFPARMv8, FeatureNEON, FeatureLSE]; + list ThunderX3T110 = [HasV8_3aOps, FeatureCRC, FeatureCrypto, + FeatureFPARMv8, FeatureNEON, FeatureLSE, + FeaturePAuth, FeaturePerfMon]; + list TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureSPE, + FeatureFullFP16, FeatureFP16FML, FeatureDotProd, + FeatureJS, FeatureComplxNum]; + list Ampere1 = [HasV8_6aOps, FeatureNEON, FeaturePerfMon, + FeatureSSBS, FeatureRandGen, FeatureSB, + FeatureSHA2, FeatureSHA3, FeatureAES]; + list Ampere1A = [HasV8_6aOps, FeatureNEON, FeaturePerfMon, + FeatureMTE, FeatureSSBS, FeatureRandGen, + FeatureSB, FeatureSM4, FeatureSHA2, + FeatureSHA3, FeatureAES]; + list Ampere1B = [HasV8_7aOps, FeatureNEON, FeaturePerfMon, + FeatureMTE, FeatureSSBS, FeatureRandGen, + FeatureSB, FeatureSM4, FeatureSHA2, + FeatureSHA3, FeatureAES, FeatureCSSC, + FeatureWFxT, FeatureFullFP16]; + + // ETE and TRBE are future architecture extensions. We temporarily enable them + // by default for users targeting generic AArch64. The extensions do not + // affect code generated by the compiler and can be used only by explicitly + // mentioning the new system register names in assembly. + list Generic = [FeatureFPARMv8, FeatureNEON, FeatureETE]; +} + +// FeatureFuseAdrpAdd is enabled under Generic to allow linker merging +// optimizations. +def : ProcessorModel<"generic", CortexA510Model, ProcessorFeatures.Generic, + [FeatureFuseAES, FeatureFuseAdrpAdd, FeaturePostRAScheduler, + FeatureEnableSelectOptimize]>; +def : ProcessorModel<"cortex-a35", CortexA53Model, ProcessorFeatures.A53, + [TuneA35]>; +def : ProcessorModel<"cortex-a34", CortexA53Model, ProcessorFeatures.A53, + [TuneA35]>; +def : ProcessorModel<"cortex-a53", CortexA53Model, ProcessorFeatures.A53, + [TuneA53]>; +def : ProcessorModel<"cortex-a55", CortexA55Model, ProcessorFeatures.A55, + [TuneA55]>; +def : ProcessorModel<"cortex-a510", CortexA510Model, ProcessorFeatures.A510, + [TuneA510]>; +def : ProcessorModel<"cortex-a520", CortexA510Model, ProcessorFeatures.A520, + [TuneA520]>; +def : ProcessorModel<"cortex-a520ae", CortexA510Model, ProcessorFeatures.A520AE, + [TuneA520AE]>; +def : ProcessorModel<"cortex-a57", CortexA57Model, ProcessorFeatures.A53, + [TuneA57]>; +def : ProcessorModel<"cortex-a65", CortexA53Model, ProcessorFeatures.A65, + [TuneA65]>; +def : ProcessorModel<"cortex-a65ae", CortexA53Model, ProcessorFeatures.A65, + [TuneA65]>; +def : ProcessorModel<"cortex-a72", CortexA57Model, ProcessorFeatures.A53, + [TuneA72]>; +def : ProcessorModel<"cortex-a73", CortexA57Model, ProcessorFeatures.A53, + [TuneA73]>; +def : ProcessorModel<"cortex-a75", CortexA57Model, ProcessorFeatures.A55, + [TuneA75]>; +def : ProcessorModel<"cortex-a76", CortexA57Model, ProcessorFeatures.A76, + [TuneA76]>; +def : ProcessorModel<"cortex-a76ae", CortexA57Model, ProcessorFeatures.A76, + [TuneA76]>; +def : ProcessorModel<"cortex-a77", CortexA57Model, ProcessorFeatures.A77, + [TuneA77]>; +def : ProcessorModel<"cortex-a78", CortexA57Model, ProcessorFeatures.A78, + [TuneA78]>; +def : ProcessorModel<"cortex-a78ae", CortexA57Model, ProcessorFeatures.A78AE, + [TuneA78AE]>; +def : ProcessorModel<"cortex-a78c", CortexA57Model, ProcessorFeatures.A78C, + [TuneA78C]>; +def : ProcessorModel<"cortex-a710", NeoverseN2Model, ProcessorFeatures.A710, + [TuneA710]>; +def : ProcessorModel<"cortex-a715", NeoverseN2Model, ProcessorFeatures.A715, + [TuneA715]>; +def : ProcessorModel<"cortex-a720", NeoverseN2Model, ProcessorFeatures.A720, + [TuneA720]>; +def : ProcessorModel<"cortex-a720ae", NeoverseN2Model, ProcessorFeatures.A720AE, + [TuneA720AE]>; +def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82, + [TuneR82]>; +def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1, + [TuneX1]>; +def : ProcessorModel<"cortex-x1c", CortexA57Model, ProcessorFeatures.X1C, + [TuneX1]>; +def : ProcessorModel<"cortex-x2", NeoverseN2Model, ProcessorFeatures.X2, + [TuneX2]>; +def : ProcessorModel<"cortex-x3", NeoverseN2Model, ProcessorFeatures.X3, + [TuneX3]>; +def : ProcessorModel<"cortex-x4", NeoverseN2Model, ProcessorFeatures.X4, + [TuneX4]>; +def : ProcessorModel<"neoverse-e1", CortexA53Model, + ProcessorFeatures.NeoverseE1, [TuneNeoverseE1]>; +def : ProcessorModel<"neoverse-n1", NeoverseN1Model, + ProcessorFeatures.NeoverseN1, [TuneNeoverseN1]>; +def : ProcessorModel<"neoverse-n2", NeoverseN2Model, + ProcessorFeatures.NeoverseN2, [TuneNeoverseN2]>; +def : ProcessorModel<"neoverse-512tvb", NeoverseV1Model, + ProcessorFeatures.Neoverse512TVB, [TuneNeoverse512TVB]>; +def : ProcessorModel<"neoverse-v1", NeoverseV1Model, + ProcessorFeatures.NeoverseV1, [TuneNeoverseV1]>; +def : ProcessorModel<"neoverse-v2", NeoverseV2Model, + ProcessorFeatures.NeoverseV2, [TuneNeoverseV2]>; +def : ProcessorModel<"exynos-m3", ExynosM3Model, ProcessorFeatures.ExynosM3, + [TuneExynosM3]>; +def : ProcessorModel<"exynos-m4", ExynosM4Model, ProcessorFeatures.ExynosM4, + [TuneExynosM4]>; +def : ProcessorModel<"exynos-m5", ExynosM5Model, ProcessorFeatures.ExynosM4, + [TuneExynosM4]>; +def : ProcessorModel<"falkor", FalkorModel, ProcessorFeatures.Falkor, + [TuneFalkor]>; +def : ProcessorModel<"saphira", FalkorModel, ProcessorFeatures.Saphira, + [TuneSaphira]>; +def : ProcessorModel<"kryo", KryoModel, ProcessorFeatures.A53, [TuneKryo]>; + +// Cavium ThunderX/ThunderX T8X Processors +def : ProcessorModel<"thunderx", ThunderXT8XModel, ProcessorFeatures.ThunderX, + [TuneThunderX]>; +def : ProcessorModel<"thunderxt88", ThunderXT8XModel, + ProcessorFeatures.ThunderX, [TuneThunderXT88]>; +def : ProcessorModel<"thunderxt81", ThunderXT8XModel, + ProcessorFeatures.ThunderX, [TuneThunderXT81]>; +def : ProcessorModel<"thunderxt83", ThunderXT8XModel, + ProcessorFeatures.ThunderX, [TuneThunderXT83]>; +// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan. +def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, + ProcessorFeatures.ThunderX2T99, [TuneThunderX2T99]>; +// Marvell ThunderX3T110 Processors. +def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, + ProcessorFeatures.ThunderX3T110, [TuneThunderX3T110]>; +def : ProcessorModel<"tsv110", TSV110Model, ProcessorFeatures.TSV110, + [TuneTSV110]>; + +// Support cyclone as an alias for apple-a7 so we can still LTO old bitcode. +def : ProcessorModel<"cyclone", CycloneModel, ProcessorFeatures.AppleA7, + [TuneAppleA7]>; + +// iPhone and iPad CPUs +def : ProcessorModel<"apple-a7", CycloneModel, ProcessorFeatures.AppleA7, + [TuneAppleA7]>; +def : ProcessorModel<"apple-a8", CycloneModel, ProcessorFeatures.AppleA7, + [TuneAppleA7]>; +def : ProcessorModel<"apple-a9", CycloneModel, ProcessorFeatures.AppleA7, + [TuneAppleA7]>; +def : ProcessorModel<"apple-a10", CycloneModel, ProcessorFeatures.AppleA10, + [TuneAppleA10]>; +def : ProcessorModel<"apple-a11", CycloneModel, ProcessorFeatures.AppleA11, + [TuneAppleA11]>; +def : ProcessorModel<"apple-a12", CycloneModel, ProcessorFeatures.AppleA12, + [TuneAppleA12]>; +def : ProcessorModel<"apple-a13", CycloneModel, ProcessorFeatures.AppleA13, + [TuneAppleA13]>; +def : ProcessorModel<"apple-a14", CycloneModel, ProcessorFeatures.AppleA14, + [TuneAppleA14]>; +def : ProcessorModel<"apple-a15", CycloneModel, ProcessorFeatures.AppleA15, + [TuneAppleA15]>; +def : ProcessorModel<"apple-a16", CycloneModel, ProcessorFeatures.AppleA16, + [TuneAppleA16]>; +def : ProcessorModel<"apple-a17", CycloneModel, ProcessorFeatures.AppleA17, + [TuneAppleA17]>; +// Mac CPUs +def : ProcessorModel<"apple-m1", CycloneModel, ProcessorFeatures.AppleA14, + [TuneAppleA14]>; +def : ProcessorModel<"apple-m2", CycloneModel, ProcessorFeatures.AppleA15, + [TuneAppleA15]>; +def : ProcessorModel<"apple-m3", CycloneModel, ProcessorFeatures.AppleA16, + [TuneAppleA16]>; + +// watch CPUs. +def : ProcessorModel<"apple-s4", CycloneModel, ProcessorFeatures.AppleA12, + [TuneAppleA12]>; +def : ProcessorModel<"apple-s5", CycloneModel, ProcessorFeatures.AppleA12, + [TuneAppleA12]>; + +// Alias for the latest Apple processor model supported by LLVM. +def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleA16, + [TuneAppleA16]>; + +// Fujitsu A64FX +def : ProcessorModel<"a64fx", A64FXModel, ProcessorFeatures.A64FX, + [TuneA64FX]>; + +// Nvidia Carmel +def : ProcessorModel<"carmel", NoSchedModel, ProcessorFeatures.Carmel, + [TuneCarmel]>; + +// Ampere Computing +def : ProcessorModel<"ampere1", Ampere1Model, ProcessorFeatures.Ampere1, + [TuneAmpere1]>; + +def : ProcessorModel<"ampere1a", Ampere1Model, ProcessorFeatures.Ampere1A, + [TuneAmpere1A]>; + +def : ProcessorModel<"ampere1b", Ampere1BModel, ProcessorFeatures.Ampere1B, + [TuneAmpere1B]>; diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 66596dbda83c95..570aae9b3c7a76 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -15,1203 +15,8 @@ include "llvm/Target/Target.td" -//===----------------------------------------------------------------------===// -// ARM Subtarget state. -// - -// True if compiling for Thumb, false for ARM. -def ModeThumb : SubtargetFeature<"thumb-mode", "IsThumb", - "true", "Thumb mode">; - -// True if we're using software floating point features. -def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat", - "true", "Use software floating " - "point features.">; - - -//===----------------------------------------------------------------------===// -// ARM Subtarget features. -// - -// Floating Point, HW Division and Neon Support - -// FP loads/stores/moves, shared between VFP and MVE (even in the integer-only -// version). -def FeatureFPRegs : SubtargetFeature<"fpregs", "HasFPRegs", "true", - "Enable FP registers">; - -// 16-bit FP loads/stores/moves, shared between VFP (with the v8.2A FP16 -// extension) and MVE (even in the integer-only version). -def FeatureFPRegs16 : SubtargetFeature<"fpregs16", "HasFPRegs16", "true", - "Enable 16-bit FP registers", - [FeatureFPRegs]>; - -def FeatureFPRegs64 : SubtargetFeature<"fpregs64", "HasFPRegs64", "true", - "Enable 64-bit FP registers", - [FeatureFPRegs]>; - -// True if the floating point unit supports double precision. -def FeatureFP64 : SubtargetFeature<"fp64", "HasFP64", "true", - "Floating point unit supports " - "double precision", - [FeatureFPRegs64]>; - -// True if subtarget has the full 32 double precision FP registers for VFPv3. -def FeatureD32 : SubtargetFeature<"d32", "HasD32", "true", - "Extend FP to 32 double registers">; - -/// Versions of the VFP flags restricted to single precision, or to -/// 16 d-registers, or both. -multiclass VFPver prev, - list otherimplies, - list vfp2prev = []> { - def _D16_SP: SubtargetFeature< - name#"d16sp", query#"D16SP", "true", - description#" with only 16 d-registers and no double precision", - !foreach(v, prev, !cast(v # "_D16_SP")) # - !foreach(v, vfp2prev, !cast(v # "_SP")) # - otherimplies>; - def _SP: SubtargetFeature< - name#"sp", query#"SP", "true", - description#" with no double precision", - !foreach(v, prev, !cast(v # "_SP")) # - otherimplies # [FeatureD32, !cast(NAME # "_D16_SP")]>; - def _D16: SubtargetFeature< - name#"d16", query#"D16", "true", - description#" with only 16 d-registers", - !foreach(v, prev, !cast(v # "_D16")) # - vfp2prev # - otherimplies # [FeatureFP64, !cast(NAME # "_D16_SP")]>; - def "": SubtargetFeature< - name, query, "true", description, - prev # otherimplies # [ - !cast(NAME # "_D16"), - !cast(NAME # "_SP")]>; -} - -def FeatureVFP2_SP : SubtargetFeature<"vfp2sp", "HasVFPv2SP", "true", - "Enable VFP2 instructions with " - "no double precision", - [FeatureFPRegs]>; - -def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", - "Enable VFP2 instructions", - [FeatureFP64, FeatureVFP2_SP]>; - -defm FeatureVFP3: VFPver<"vfp3", "HasVFPv3", "Enable VFP3 instructions", - [], [], [FeatureVFP2]>; - -def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", - "Enable NEON instructions", - [FeatureVFP3]>; - -// True if subtarget supports half-precision FP conversions. -def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", - "Enable half-precision " - "floating point">; - -defm FeatureVFP4: VFPver<"vfp4", "HasVFPv4", "Enable VFP4 instructions", - [FeatureVFP3], [FeatureFP16]>; - -defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP", - [FeatureVFP4], []>; - -// True if subtarget supports half-precision FP operations. -def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", - "Enable full half-precision " - "floating point", - [FeatureFPARMv8_D16_SP, FeatureFPRegs16]>; - -// True if subtarget supports half-precision FP fml operations. -def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true", - "Enable full half-precision " - "floating point fml instructions", - [FeatureFullFP16]>; - -// True if subtarget supports [su]div in Thumb mode. -def FeatureHWDivThumb : SubtargetFeature<"hwdiv", - "HasDivideInThumbMode", "true", - "Enable divide instructions in Thumb">; - -// True if subtarget supports [su]div in ARM mode. -def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", - "HasDivideInARMMode", "true", - "Enable divide instructions in ARM mode">; - -// Atomic Support - -// True if the subtarget supports DMB / DSB data barrier instructions. -def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", - "Has data barrier (dmb/dsb) instructions">; - -// True if the subtarget supports CLREX instructions. -def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", - "Has v7 clrex instruction">; - -// True if the subtarget supports DFB data barrier instruction. -def FeatureDFB : SubtargetFeature<"dfb", "HasFullDataBarrier", "true", - "Has full data barrier (dfb) instruction">; - -// True if the subtarget supports v8 atomics (LDA/LDAEX etc) instructions. -def FeatureAcquireRelease : SubtargetFeature<"acquire-release", - "HasAcquireRelease", "true", - "Has v8 acquire/release (lda/ldaex " - " etc) instructions">; - - -// True if floating point compare + branch is slow. -def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "IsFPBrccSlow", "true", - "FP compare + branch is slow">; - -// True if the processor supports the Performance Monitor Extensions. These -// include a generic cycle-counter as well as more fine-grained (often -// implementation-specific) events. -def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", - "Enable support for Performance " - "Monitor extensions">; - - -// TrustZone Security Extensions - -// True if processor supports TrustZone security extensions. -def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", - "Enable support for TrustZone " - "security extensions">; - -// True if processor supports ARMv8-M Security Extensions. -def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true", - "Enable support for ARMv8-M " - "Security Extensions">; - -// True if processor supports SHA1 and SHA256. -def FeatureSHA2 : SubtargetFeature<"sha2", "HasSHA2", "true", - "Enable SHA1 and SHA256 support", [FeatureNEON]>; - -def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", - "Enable AES support", [FeatureNEON]>; - -// True if processor supports Cryptography extensions. -def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", - "Enable support for " - "Cryptography extensions", - [FeatureNEON, FeatureSHA2, FeatureAES]>; - -// True if processor supports CRC instructions. -def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", - "Enable support for CRC instructions">; - -// True if the ARMv8.2A dot product instructions are supported. -def FeatureDotProd : SubtargetFeature<"dotprod", "HasDotProd", "true", - "Enable support for dot product instructions", - [FeatureNEON]>; - -// True if the processor supports RAS extensions. -// Not to be confused with FeatureHasRetAddrStack (return address stack). -def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", - "Enable Reliability, Availability " - "and Serviceability extensions">; - -// Fast computation of non-negative address offsets. -// True if processor does positive address offset computation faster. -def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", - "Enable fast computation of " - "positive address offsets">; - -// Fast execution of AES crypto operations. -// True if processor executes back to back AES instruction pairs faster. -def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true", - "CPU fuses AES crypto operations">; - -// Fast execution of bottom and top halves of literal generation. -// True if processor executes back to back bottom and top halves of literal generation faster. -def FeatureFuseLiterals : SubtargetFeature<"fuse-literals", "HasFuseLiterals", "true", - "CPU fuses literal generation operations">; - -// Choice of hardware register to use as the thread pointer, if any. -def FeatureReadTpTPIDRURW : SubtargetFeature<"read-tp-tpidrurw", "IsReadTPTPIDRURW", "true", - "Reading thread pointer from TPIDRURW register">; -def FeatureReadTpTPIDRURO : SubtargetFeature<"read-tp-tpidruro", "IsReadTPTPIDRURO", "true", - "Reading thread pointer from TPIDRURO register">; -def FeatureReadTpTPIDRPRW : SubtargetFeature<"read-tp-tpidrprw", "IsReadTPTPIDRPRW", "true", - "Reading thread pointer from TPIDRPRW register">; - -// Cyclone can zero VFP registers in 0 cycles. -// True if the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are -// particularly effective at zeroing a VFP register. -def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", - "Has zero-cycle zeroing instructions">; - -// Whether it is profitable to unpredicate certain instructions during if-conversion. -// True if if conversion may decide to leave some instructions unpredicated. -def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr", - "IsProfitableToUnpredicate", "true", - "Is profitable to unpredicate">; - -// Some targets (e.g. Swift) have microcoded VGETLNi32. -// True if VMOV will be favored over VGETLNi32. -def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32", - "HasSlowVGETLNi32", "true", - "Has slow VGETLNi32 - prefer VMOV">; - -// Some targets (e.g. Swift) have microcoded VDUP32. -// True if VMOV will be favored over VDUP. -def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", - "true", - "Has slow VDUP32 - prefer VMOV">; - -// Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON -// for scalar FP, as this allows more effective execution domain optimization. -// True if VMOVSR will be favored over VMOVDRR. -def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR", - "true", "Prefer VMOVSR">; - -// Swift has ISHST barriers compatible with Atomic Release semantics but weaker -// than ISH. -// True if ISHST barriers will be used for Release semantics. -def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHSTBarriers", - "true", "Prefer ISHST barriers">; - -// Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU. -// True if the AGU and NEON/FPU units are multiplexed. -def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", - "true", - "Has muxed AGU and NEON/FPU">; - -// Whether VLDM/VSTM starting with odd register number need more microops -// than single VLDRS. -// True if a VLDM/VSTM starting with an odd register number is considered to -// take more microops than single VLDRS/VSTRS. -def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "HasSlowOddRegister", - "true", "VLDM/VSTM starting " - "with an odd register is slow">; - -// Some targets have a renaming dependency when loading into D subregisters. -// True if loading into a D subregister will be penalized. -def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg", - "HasSlowLoadDSubregister", "true", - "Loading into D subregs is slow">; - -// True if use a wider stride when allocating VFP registers. -def FeatureUseWideStrideVFP : SubtargetFeature<"wide-stride-vfp", - "UseWideStrideVFP", "true", - "Use a wide stride when allocating VFP registers">; - -// Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD. -// True if VMOVS will never be widened to VMOVD. -def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs", - "DontWidenVMOVS", "true", - "Don't widen VMOVS to VMOVD">; - -// Some targets (e.g. Cortex-A15) prefer to avoid mixing operations on different -// VFP register widths. -// True if splat a register between VFP and NEON instructions. -def FeatureSplatVFPToNeon : SubtargetFeature<"splat-vfp-neon", - "UseSplatVFPToNeon", "true", - "Splat register from VFP to NEON", - [FeatureDontWidenVMOVS]>; - -// Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions. -// True if run the MLx expansion pass. -def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", - "ExpandMLx", "true", - "Expand VFP/NEON MLA/MLS instructions">; - -// Some targets have special RAW hazards for VFP/NEON VMLA/VMLS. -// True if VFP/NEON VMLA/VMLS have special RAW hazards. -def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards", - "true", "Has VMLx hazards">; - -// Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from -// VFP to NEON, as an execution domain optimization. -// True if VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON. -def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", - "UseNEONForFPMovs", "true", - "Convert VMOVSR, VMOVRS, " - "VMOVS to NEON">; - -// Some processors benefit from using NEON instructions for scalar -// single-precision FP operations. This affects instruction selection and should -// only be enabled if the handling of denormals is not important. -// Use the method useNEONForSinglePrecisionFP() to determine if NEON should actually be used. -def FeatureNEONForFP : SubtargetFeature<"neonfp", - "HasNEONForFP", - "true", - "Use NEON for single precision FP">; - -// On some processors, VLDn instructions that access unaligned data take one -// extra cycle. Take that into account when computing operand latencies. -// True if VLDn instructions take an extra cycle for unaligned accesses. -def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAccessAlignment", - "true", - "Check for VLDn unaligned access">; - -// Some processors have a nonpipelined VFP coprocessor. -// True if VFP instructions are not pipelined. -def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp", - "NonpipelinedVFP", "true", - "VFP instructions are not pipelined">; - -// Some processors have FP multiply-accumulate instructions that don't -// play nicely with other VFP / NEON instructions, and it's generally better -// to just not use them. -// If the VFP2 / NEON instructions are available, indicates -// whether the FP VML[AS] instructions are slow (if so, don't use them). -def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", - "Disable VFP / NEON MAC instructions">; - -// VFPv4 added VFMA instructions that can similarly be fast or slow. -// If the VFP4 / NEON instructions are available, indicates -// whether the FP VFM[AS] instructions are slow (if so, don't use them). -def FeatureHasSlowFPVFMx : SubtargetFeature<"slowfpvfmx", "SlowFPVFMx", "true", - "Disable VFP / NEON FMA instructions">; - -// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding. -/// True if NEON has special multiplier accumulator -/// forwarding to allow mul + mla being issued back to back. -def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", - "HasVMLxForwarding", "true", - "Has multiplier accumulator forwarding">; - -// Disable 32-bit to 16-bit narrowing for experimentation. -// True if codegen would prefer 32-bit Thumb instructions over 16-bit ones. -def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Prefers32BitThumb", "true", - "Prefer 32-bit Thumb instrs">; - -def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopLogAlignment","2", - "Prefer 32-bit alignment for loops">; - -def FeatureMVEVectorCostFactor1 : SubtargetFeature<"mve1beat", "MVEVectorCostFactor", "4", - "Model MVE instructions as a 1 beat per tick architecture">; - -def FeatureMVEVectorCostFactor2 : SubtargetFeature<"mve2beat", "MVEVectorCostFactor", "2", - "Model MVE instructions as a 2 beats per tick architecture">; - -def FeatureMVEVectorCostFactor4 : SubtargetFeature<"mve4beat", "MVEVectorCostFactor", "1", - "Model MVE instructions as a 4 beats per tick architecture">; - -/// Some instructions update CPSR partially, which can add false dependency for -/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is -/// mapped to a separate physical register. Avoid partial CPSR update for these -/// processors. -/// True if codegen would avoid using instructions -/// that partially update CPSR and add false dependency on the previous -/// CPSR setting instruction. -def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", - "AvoidCPSRPartialUpdate", "true", - "Avoid CPSR partial update for OOO execution">; - -/// Disable +1 predication cost for instructions updating CPSR. -/// Enabled for Cortex-A57. -/// True if disable +1 predication cost for instructions updating CPSR. Enabled for Cortex-A57. -def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr", - "CheapPredicableCPSRDef", - "true", - "Disable +1 predication cost for instructions updating CPSR">; - -// True if codegen should avoid using flag setting movs with shifter operand (i.e. asr, lsl, lsr). -def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", - "AvoidMOVsShifterOperand", "true", - "Avoid movs instructions with " - "shifter operand">; - -// Some processors perform return stack prediction. CodeGen should avoid issue -// "normal" call instructions to callees which do not return. -def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", - "HasRetAddrStack", "true", - "Has return address stack">; - -// Some processors have no branch predictor, which changes the expected cost of -// taking a branch which affects the choice of whether to use predicated -// instructions. -// True if the subtarget has a branch predictor. Having -// a branch predictor or not changes the expected cost of taking a branch -// which affects the choice of whether to use predicated instructions. -def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor", - "HasBranchPredictor", "false", - "Has no branch predictor">; - -/// DSP extension. -/// True if the subtarget supports the DSP (saturating arith and such) instructions. -def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", - "Supports DSP instructions in " - "ARM and/or Thumb2">; - -// True if the subtarget supports Multiprocessing extension (ARMv7 only). -def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", - "Supports Multiprocessing extension">; - -// Virtualization extension - requires HW divide (ARMv7-AR ARMARM - 4.4.8). -def FeatureVirtualization : SubtargetFeature<"virtualization", - "HasVirtualization", "true", - "Supports Virtualization extension", - [FeatureHWDivThumb, FeatureHWDivARM]>; - -// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. -// See ARMInstrInfo.td for details. -// True if NaCl TRAP instruction is generated instead of the regular TRAP. -def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", - "NaCl trap">; - -// True if the subtarget disallows unaligned memory -// accesses for some types. For details, see -// ARMTargetLowering::allowsMisalignedMemoryAccesses(). -def FeatureStrictAlign : SubtargetFeature<"strict-align", - "StrictAlign", "true", - "Disallow all unaligned memory " - "access">; - -// Generate calls via indirect call instructions. -def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", - "Generate calls via indirect call " - "instructions">; - -// Generate code that does not contain data access to code sections. -def FeatureExecuteOnly : SubtargetFeature<"execute-only", - "GenExecuteOnly", "true", - "Enable the generation of " - "execute only code.">; - -// True if R9 is not available as a general purpose register. -def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", - "Reserve R9, making it unavailable" - " as GPR">; - -// True if MOVT / MOVW pairs are not used for materialization of -// 32-bit imms (including global addresses). -def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", - "Don't use movt/movw pairs for " - "32-bit imms">; - -/// Implicitly convert an instruction to a different one if its immediates -/// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1. -def FeatureNoNegativeImmediates - : SubtargetFeature<"no-neg-immediates", - "NegativeImmediates", "false", - "Convert immediates and instructions " - "to their negated or complemented " - "equivalent when the immediate does " - "not fit in the encoding.">; - -// Use the MachineScheduler for instruction scheduling for the subtarget. -def FeatureUseMISched: SubtargetFeature<"use-misched", "UseMISched", "true", - "Use the MachineScheduler">; - -// Use the MachinePipeliner for instruction scheduling for the subtarget. -def FeatureUseMIPipeliner: SubtargetFeature<"use-mipipeliner", "UseMIPipeliner", "true", - "Use the MachinePipeliner">; - -// False if scheduling should happen again after register allocation. -def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler", - "DisablePostRAScheduler", "true", - "Don't schedule again after register allocation">; - -// Armv8.5-A extensions - -// Has speculation barrier. -def FeatureSB : SubtargetFeature<"sb", "HasSB", "true", - "Enable v8.5a Speculation Barrier" >; - -// Armv8.6-A extensions - -// True if subtarget supports BFloat16 floating point operations. -def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", "true", - "Enable support for BFloat16 instructions", [FeatureNEON]>; - -// True if subtarget supports 8-bit integer matrix multiply. -def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8", - "true", "Enable Matrix Multiply Int8 Extension", [FeatureNEON]>; - -// Armv8.1-M extensions - -// True if the processor supports the Low Overhead Branch extension. -def FeatureLOB : SubtargetFeature<"lob", "HasLOB", "true", - "Enable Low Overhead Branch " - "extensions">; - -// Mitigate against the cve-2021-35465 security vulnurability. -def FeatureFixCMSE_CVE_2021_35465 : SubtargetFeature<"fix-cmse-cve-2021-35465", - "FixCMSE_CVE_2021_35465", "true", - "Mitigate against the cve-2021-35465 " - "security vulnurability">; - -def FeaturePACBTI : SubtargetFeature<"pacbti", "HasPACBTI", "true", - "Enable Pointer Authentication and Branch " - "Target Identification">; - -/// Don't place a BTI instruction after return-twice constructs (setjmp). -def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice", - "NoBTIAtReturnTwice", "true", - "Don't place a BTI instruction " - "after a return-twice">; - -// Armv8.9-A/Armv9.4-A 2022 Architecture Extensions -def FeatureCLRBHB : SubtargetFeature<"clrbhb", "HasCLRBHB", "true", - "Enable Clear BHB instruction">; - - -def FeatureFixCortexA57AES1742098 : SubtargetFeature<"fix-cortex-a57-aes-1742098", - "FixCortexA57AES1742098", "true", - "Work around Cortex-A57 Erratum 1742098 / Cortex-A72 Erratum 1655431 (AES)">; - -def FeatureAAPCSFrameChain : SubtargetFeature<"aapcs-frame-chain", - "CreateAAPCSFrameChain", "true", - "Create an AAPCS compliant frame chain">; - -def FeatureAAPCSFrameChainLeaf : SubtargetFeature<"aapcs-frame-chain-leaf", - "CreateAAPCSFrameChainLeaf", "true", - "Create an AAPCS compliant frame chain " - "for leaf functions", - [FeatureAAPCSFrameChain]>; - -// Assume that lock-free 32-bit atomics are available, even if the target -// and operating system combination would not usually provide them. The user -// is responsible for providing any necessary __sync implementations. Code -// built with this feature is not ABI-compatible with code built without this -// feature, if atomic variables are exposed across the ABI boundary. -def FeatureAtomics32 : SubtargetFeature< - "atomics-32", "HasForced32BitAtomics", "true", - "Assume that lock-free 32-bit atomics are available">; - -//===----------------------------------------------------------------------===// -// ARM architecture class -// - -// A-series ISA -def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass", - "Is application profile ('A' series)">; - -// R-series ISA -def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass", - "Is realtime profile ('R' series)">; - -// M-series ISA -def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", - "Is microcontroller profile ('M' series)">; - -// True if Thumb2 instructions are supported. -def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", - "Enable Thumb2 instructions">; - -// True if subtarget does not support ARM mode execution. -def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", - "Does not support ARM mode execution">; - -//===----------------------------------------------------------------------===// -// ARM ISAa. -// -// Specify whether target support specific ARM ISA variants. - -def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", - "Support ARM v4T instructions">; - -def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true", - "Support ARM v5T instructions", - [HasV4TOps]>; - -def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true", - "Support ARM v5TE, v5TEj, and " - "v5TExp instructions", - [HasV5TOps]>; - -def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true", - "Support ARM v6 instructions", - [HasV5TEOps]>; - -def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true", - "Support ARM v6M instructions", - [HasV6Ops]>; - -def HasV8MBaselineOps : SubtargetFeature<"v8m", "HasV8MBaselineOps", "true", - "Support ARM v8M Baseline instructions", - [HasV6MOps]>; - -def HasV6KOps : SubtargetFeature<"v6k", "HasV6KOps", "true", - "Support ARM v6k instructions", - [HasV6Ops]>; - -def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", - "Support ARM v6t2 instructions", - [HasV8MBaselineOps, HasV6KOps, FeatureThumb2]>; - -def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", - "Support ARM v7 instructions", - [HasV6T2Ops, FeatureV7Clrex]>; - -def HasV8MMainlineOps : - SubtargetFeature<"v8m.main", "HasV8MMainlineOps", "true", - "Support ARM v8M Mainline instructions", - [HasV7Ops]>; - -def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", - "Support ARM v8 instructions", - [HasV7Ops, FeaturePerfMon, FeatureAcquireRelease]>; - -def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", - "Support ARM v8.1a instructions", - [HasV8Ops]>; - -def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", - "Support ARM v8.2a instructions", - [HasV8_1aOps]>; - -def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true", - "Support ARM v8.3a instructions", - [HasV8_2aOps]>; - -def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true", - "Support ARM v8.4a instructions", - [HasV8_3aOps, FeatureDotProd]>; - -def HasV8_5aOps : SubtargetFeature<"v8.5a", "HasV8_5aOps", "true", - "Support ARM v8.5a instructions", - [HasV8_4aOps, FeatureSB]>; - -def HasV8_6aOps : SubtargetFeature<"v8.6a", "HasV8_6aOps", "true", - "Support ARM v8.6a instructions", - [HasV8_5aOps, FeatureBF16, - FeatureMatMulInt8]>; - -def HasV8_7aOps : SubtargetFeature<"v8.7a", "HasV8_7aOps", "true", - "Support ARM v8.7a instructions", - [HasV8_6aOps]>; - -def HasV8_8aOps : SubtargetFeature<"v8.8a", "HasV8_8aOps", "true", - "Support ARM v8.8a instructions", - [HasV8_7aOps]>; - -def HasV8_9aOps : SubtargetFeature<"v8.9a", "HasV8_9aOps", "true", - "Support ARM v8.9a instructions", - [HasV8_8aOps, FeatureCLRBHB]>; - -def HasV9_0aOps : SubtargetFeature<"v9a", "HasV9_0aOps", "true", - "Support ARM v9a instructions", - [HasV8_5aOps]>; - -def HasV9_1aOps : SubtargetFeature<"v9.1a", "HasV9_1aOps", "true", - "Support ARM v9.1a instructions", - [HasV8_6aOps, HasV9_0aOps]>; - -def HasV9_2aOps : SubtargetFeature<"v9.2a", "HasV9_2aOps", "true", - "Support ARM v9.2a instructions", - [HasV8_7aOps, HasV9_1aOps]>; - -def HasV9_3aOps : SubtargetFeature<"v9.3a", "HasV9_3aOps", "true", - "Support ARM v9.3a instructions", - [HasV8_8aOps, HasV9_2aOps]>; - -def HasV9_4aOps : SubtargetFeature<"v9.4a", "HasV9_4aOps", "true", - "Support ARM v9.4a instructions", - [HasV8_9aOps, HasV9_3aOps]>; - -// Armv9.5-A is a v9-only architecture. From v9.5-A onwards there's no mapping -// to an equivalent v8.x version. -def HasV9_5aOps : SubtargetFeature<"v9.5a", "HasV9_5aOps", "true", - "Support ARM v9.5a instructions", - [HasV9_4aOps]>; - -def HasV8_1MMainlineOps : SubtargetFeature< - "v8.1m.main", "HasV8_1MMainlineOps", "true", - "Support ARM v8-1M Mainline instructions", - [HasV8MMainlineOps]>; -def HasMVEIntegerOps : SubtargetFeature< - "mve", "HasMVEIntegerOps", "true", - "Support M-Class Vector Extension with integer ops", - [HasV8_1MMainlineOps, FeatureDSP, FeatureFPRegs16, FeatureFPRegs64]>; -def HasMVEFloatOps : SubtargetFeature< - "mve.fp", "HasMVEFloatOps", "true", - "Support M-Class Vector Extension with integer and floating ops", - [HasMVEIntegerOps, FeatureFPARMv8_D16_SP, FeatureFullFP16]>; - -def HasCDEOps : SubtargetFeature<"cde", "HasCDEOps", "true", - "Support CDE instructions", - [HasV8MMainlineOps]>; - -foreach i = {0-7} in - def FeatureCoprocCDE#i : SubtargetFeature<"cdecp"#i, - "CoprocCDE["#i#"]", "true", - "Coprocessor "#i#" ISA is CDEv1", - [HasCDEOps]>; - -//===----------------------------------------------------------------------===// -// Control codegen mitigation against Straight Line Speculation vulnerability. -//===----------------------------------------------------------------------===// - -/// Harden against Straight Line Speculation for Returns and Indirect Branches. -def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr", - "HardenSlsRetBr", "true", - "Harden against straight line speculation across RETurn and BranchRegister " - "instructions">; -/// Harden against Straight Line Speculation for indirect calls. -def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr", - "HardenSlsBlr", "true", - "Harden against straight line speculation across indirect calls">; -/// Generate thunk code for SLS mitigation in the normal text section. -def FeatureHardenSlsNoComdat : SubtargetFeature<"harden-sls-nocomdat", - "HardenSlsNoComdat", "true", - "Generate thunk code for SLS mitigation in the normal text section">; - -//===----------------------------------------------------------------------===// -// Endianness of instruction encodings in memory. -// -// In the current Arm architecture, this is usually little-endian regardless of -// data endianness. But before Armv7 it was typical for instruction endianness -// to match data endianness, so that a big-endian system was consistently big- -// endian. And Armv7-R can be configured to use big-endian instructions. -// -// Additionally, even when targeting Armv7-A, big-endian instructions can be -// found in relocatable object files, because the Arm ABI specifies that the -// linker byte-reverses them depending on the target architecture. -// -// So we have a feature here to indicate that instructions are stored big- -// endian, which you can set when instantiating an MCDisassembler. -def ModeBigEndianInstructions : SubtargetFeature<"big-endian-instructions", - "BigEndianInstructions", "true", - "Expect instructions to be stored big-endian.">; - -//===----------------------------------------------------------------------===// -// ARM Processor subtarget features. -// - -def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5", - "Cortex-A5 ARM processors", []>; -def ProcA7 : SubtargetFeature<"a7", "ARMProcFamily", "CortexA7", - "Cortex-A7 ARM processors", []>; -def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", - "Cortex-A8 ARM processors", []>; -def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", - "Cortex-A9 ARM processors", []>; -def ProcA12 : SubtargetFeature<"a12", "ARMProcFamily", "CortexA12", - "Cortex-A12 ARM processors", []>; -def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", - "Cortex-A15 ARM processors", []>; -def ProcA17 : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17", - "Cortex-A17 ARM processors", []>; -def ProcA32 : SubtargetFeature<"a32", "ARMProcFamily", "CortexA32", - "Cortex-A32 ARM processors", []>; -def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", - "Cortex-A35 ARM processors", []>; -def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", - "Cortex-A53 ARM processors", []>; -def ProcA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55", - "Cortex-A55 ARM processors", []>; -def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", - "Cortex-A57 ARM processors", []>; -def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", - "Cortex-A72 ARM processors", []>; -def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", - "Cortex-A73 ARM processors", []>; -def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", - "Cortex-A75 ARM processors", []>; -def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76", - "Cortex-A76 ARM processors", []>; -def ProcA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77", - "Cortex-A77 ARM processors", []>; -def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily", "CortexA78", - "Cortex-A78 ARM processors", []>; -def ProcA78AE : SubtargetFeature<"cortex-a78ae", "ARMProcFamily", "CortexA78AE", - "Cortex-A78AE ARM processors", []>; -def ProcA78C : SubtargetFeature<"a78c", "ARMProcFamily", "CortexA78C", - "Cortex-A78C ARM processors", []>; -def ProcA710 : SubtargetFeature<"cortex-a710", "ARMProcFamily", - "CortexA710", "Cortex-A710 ARM processors", []>; -def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1", - "Cortex-X1 ARM processors", []>; -def ProcX1C : SubtargetFeature<"cortex-x1c", "ARMProcFamily", "CortexX1C", - "Cortex-X1C ARM processors", []>; - -def ProcV1 : SubtargetFeature<"neoverse-v1", "ARMProcFamily", - "NeoverseV1", "Neoverse-V1 ARM processors", []>; - -def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait", - "Qualcomm Krait processors", []>; -def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", - "Qualcomm Kryo processors", []>; -def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", - "Swift ARM processors", []>; - -def ProcExynos : SubtargetFeature<"exynos", "ARMProcFamily", "Exynos", - "Samsung Exynos processors", - [FeatureZCZeroing, - FeatureUseWideStrideVFP, - FeatureSplatVFPToNeon, - FeatureSlowVGETLNi32, - FeatureSlowVDUP32, - FeatureSlowFPBrcc, - FeatureProfUnpredicate, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureHasSlowFPVMLx, - FeatureHasSlowFPVFMx, - FeatureHasRetAddrStack, - FeatureFuseLiterals, - FeatureFuseAES, - FeatureExpandMLx, - FeatureCrypto, - FeatureCRC]>; - -def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4", - "Cortex-R4 ARM processors", []>; -def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", - "Cortex-R5 ARM processors", []>; -def ProcR7 : SubtargetFeature<"r7", "ARMProcFamily", "CortexR7", - "Cortex-R7 ARM processors", []>; -def ProcR52 : SubtargetFeature<"r52", "ARMProcFamily", "CortexR52", - "Cortex-R52 ARM processors", []>; - -def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3", - "Cortex-M3 ARM processors", []>; -def ProcM7 : SubtargetFeature<"m7", "ARMProcFamily", "CortexM7", - "Cortex-M7 ARM processors", []>; - -//===----------------------------------------------------------------------===// -// ARM Helper classes. -// - -class Architecture features> - : SubtargetFeature; - -class ProcNoItin Features> - : Processor; - - -//===----------------------------------------------------------------------===// -// ARM architectures -// - -def ARMv4 : Architecture<"armv4", "ARMv4", []>; - -def ARMv4t : Architecture<"armv4t", "ARMv4t", [HasV4TOps]>; - -def ARMv5t : Architecture<"armv5t", "ARMv5t", [HasV5TOps]>; - -def ARMv5te : Architecture<"armv5te", "ARMv5te", [HasV5TEOps]>; - -def ARMv5tej : Architecture<"armv5tej", "ARMv5tej", [HasV5TEOps]>; - -def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops, - FeatureDSP]>; - -def ARMv6t2 : Architecture<"armv6t2", "ARMv6t2", [HasV6T2Ops, - FeatureDSP]>; - -def ARMv6k : Architecture<"armv6k", "ARMv6k", [HasV6KOps]>; - -def ARMv6kz : Architecture<"armv6kz", "ARMv6kz", [HasV6KOps, - FeatureTrustZone]>; - -def ARMv6m : Architecture<"armv6-m", "ARMv6m", [HasV6MOps, - FeatureNoARM, - ModeThumb, - FeatureDB, - FeatureMClass, - FeatureStrictAlign]>; - -def ARMv6sm : Architecture<"armv6s-m", "ARMv6sm", [HasV6MOps, - FeatureNoARM, - ModeThumb, - FeatureDB, - FeatureMClass, - FeatureStrictAlign]>; - -def ARMv7a : Architecture<"armv7-a", "ARMv7a", [HasV7Ops, - FeatureNEON, - FeatureDB, - FeatureDSP, - FeatureAClass, - FeaturePerfMon]>; - -def ARMv7ve : Architecture<"armv7ve", "ARMv7ve", [HasV7Ops, - FeatureNEON, - FeatureDB, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureAClass, - FeaturePerfMon]>; - -def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops, - FeatureDB, - FeatureDSP, - FeatureHWDivThumb, - FeatureRClass, - FeaturePerfMon]>; - -def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops, - FeatureThumb2, - FeatureNoARM, - ModeThumb, - FeatureDB, - FeatureHWDivThumb, - FeatureMClass]>; - -def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops, - FeatureThumb2, - FeatureNoARM, - ModeThumb, - FeatureDB, - FeatureHWDivThumb, - FeatureMClass, - FeatureDSP]>; - -def ARMv8a : Architecture<"armv8-a", "ARMv8a", [HasV8Ops, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC]>; - -def ARMv81a : Architecture<"armv8.1-a", "ARMv81a", [HasV8_1aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC]>; - -def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC, - FeatureRAS]>; - -def ARMv83a : Architecture<"armv8.3-a", "ARMv83a", [HasV8_3aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC, - FeatureRAS]>; - -def ARMv84a : Architecture<"armv8.4-a", "ARMv84a", [HasV8_4aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC, - FeatureRAS, - FeatureDotProd]>; - -def ARMv85a : Architecture<"armv8.5-a", "ARMv85a", [HasV8_5aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC, - FeatureRAS, - FeatureDotProd]>; -def ARMv86a : Architecture<"armv8.6-a", "ARMv86a", [HasV8_6aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC, - FeatureRAS, - FeatureDotProd]>; -def ARMv87a : Architecture<"armv8.7-a", "ARMv87a", [HasV8_7aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC, - FeatureRAS, - FeatureDotProd]>; -def ARMv88a : Architecture<"armv8.8-a", "ARMv88a", [HasV8_8aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC, - FeatureRAS, - FeatureDotProd]>; -def ARMv89a : Architecture<"armv8.9-a", "ARMv89a", [HasV8_9aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC, - FeatureRAS, - FeatureDotProd]>; - -def ARMv9a : Architecture<"armv9-a", "ARMv9a", [HasV9_0aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCRC, - FeatureRAS, - FeatureDotProd]>; -def ARMv91a : Architecture<"armv9.1-a", "ARMv91a", [HasV9_1aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCRC, - FeatureRAS, - FeatureDotProd]>; -def ARMv92a : Architecture<"armv9.2-a", "ARMv92a", [HasV9_2aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCRC, - FeatureRAS, - FeatureDotProd]>; -def ARMv93a : Architecture<"armv9.3-a", "ARMv93a", [HasV9_3aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC, - FeatureRAS, - FeatureDotProd]>; -def ARMv94a : Architecture<"armv9.4-a", "ARMv94a", [HasV9_4aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCRC, - FeatureRAS, - FeatureDotProd]>; -def ARMv95a : Architecture<"armv9.5-a", "ARMv95a", [HasV9_5aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCRC, - FeatureRAS, - FeatureDotProd]>; - -def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops, - FeatureRClass, - FeatureDB, - FeatureDFB, - FeatureDSP, - FeatureCRC, - FeatureMP, - FeatureVirtualization, - FeatureFPARMv8, - FeatureNEON]>; - -def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline", - [HasV8MBaselineOps, - FeatureNoARM, - ModeThumb, - FeatureDB, - FeatureHWDivThumb, - FeatureV7Clrex, - Feature8MSecExt, - FeatureAcquireRelease, - FeatureMClass, - FeatureStrictAlign]>; - -def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline", - [HasV8MMainlineOps, - FeatureNoARM, - ModeThumb, - FeatureDB, - FeatureHWDivThumb, - Feature8MSecExt, - FeatureAcquireRelease, - FeatureMClass]>; - -def ARMv81mMainline : Architecture<"armv8.1-m.main", "ARMv81mMainline", - [HasV8_1MMainlineOps, - FeatureNoARM, - ModeThumb, - FeatureDB, - FeatureHWDivThumb, - Feature8MSecExt, - FeatureAcquireRelease, - FeatureMClass, - FeatureRAS, - FeatureLOB]>; - -// Aliases -def IWMMXT : Architecture<"iwmmxt", "ARMv5te", [ARMv5te]>; -def IWMMXT2 : Architecture<"iwmmxt2", "ARMv5te", [ARMv5te]>; -def XScale : Architecture<"xscale", "ARMv5te", [ARMv5te]>; -def ARMv6j : Architecture<"armv6j", "ARMv7a", [ARMv6]>; -def ARMv7k : Architecture<"armv7k", "ARMv7a", [ARMv7a]>; -def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>; +include "ARMFeatures.td" +include "ARMArchitectures.td" //===----------------------------------------------------------------------===// // Register File Description @@ -1249,485 +54,7 @@ include "ARMScheduleM55.td" include "ARMScheduleM7.td" include "ARMScheduleM85.td" -//===----------------------------------------------------------------------===// -// ARM processors -// -// Dummy CPU, used to target architectures -def : ProcessorModel<"generic", CortexA8Model, []>; - -// FIXME: Several processors below are not using their own scheduler -// model, but one of similar/previous processor. These should be fixed. - -def : ProcNoItin<"arm8", [ARMv4]>; -def : ProcNoItin<"arm810", [ARMv4]>; -def : ProcNoItin<"strongarm", [ARMv4]>; -def : ProcNoItin<"strongarm110", [ARMv4]>; -def : ProcNoItin<"strongarm1100", [ARMv4]>; -def : ProcNoItin<"strongarm1110", [ARMv4]>; - -def : ProcNoItin<"arm7tdmi", [ARMv4t]>; -def : ProcNoItin<"arm7tdmi-s", [ARMv4t]>; -def : ProcNoItin<"arm710t", [ARMv4t]>; -def : ProcNoItin<"arm720t", [ARMv4t]>; -def : ProcNoItin<"arm9", [ARMv4t]>; -def : ProcNoItin<"arm9tdmi", [ARMv4t]>; -def : ProcNoItin<"arm920", [ARMv4t]>; -def : ProcNoItin<"arm920t", [ARMv4t]>; -def : ProcNoItin<"arm922t", [ARMv4t]>; -def : ProcNoItin<"arm940t", [ARMv4t]>; -def : ProcNoItin<"ep9312", [ARMv4t]>; - -def : ProcNoItin<"arm10tdmi", [ARMv5t]>; -def : ProcNoItin<"arm1020t", [ARMv5t]>; - -def : ProcNoItin<"arm9e", [ARMv5te]>; -def : ProcNoItin<"arm926ej-s", [ARMv5te]>; -def : ProcNoItin<"arm946e-s", [ARMv5te]>; -def : ProcNoItin<"arm966e-s", [ARMv5te]>; -def : ProcNoItin<"arm968e-s", [ARMv5te]>; -def : ProcNoItin<"arm10e", [ARMv5te]>; -def : ProcNoItin<"arm1020e", [ARMv5te]>; -def : ProcNoItin<"arm1022e", [ARMv5te]>; -def : ProcNoItin<"xscale", [ARMv5te]>; -def : ProcNoItin<"iwmmxt", [ARMv5te]>; - -def : Processor<"arm1136j-s", ARMV6Itineraries, [ARMv6]>; -def : Processor<"arm1136jf-s", ARMV6Itineraries, [ARMv6, - FeatureVFP2, - FeatureHasSlowFPVMLx]>; - -def : Processor<"cortex-m0", ARMV6Itineraries, [ARMv6m, - FeatureHasNoBranchPredictor]>; -def : Processor<"cortex-m0plus", ARMV6Itineraries, [ARMv6m, - FeatureHasNoBranchPredictor]>; -def : Processor<"cortex-m1", ARMV6Itineraries, [ARMv6m, - FeatureHasNoBranchPredictor]>; -def : Processor<"sc000", ARMV6Itineraries, [ARMv6m, - FeatureHasNoBranchPredictor]>; - -def : Processor<"arm1176jz-s", ARMV6Itineraries, [ARMv6kz]>; -def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ARMv6kz, - FeatureVFP2, - FeatureHasSlowFPVMLx]>; - -def : Processor<"mpcorenovfp", ARMV6Itineraries, [ARMv6k]>; -def : Processor<"mpcore", ARMV6Itineraries, [ARMv6k, - FeatureVFP2, - FeatureHasSlowFPVMLx]>; - -def : Processor<"arm1156t2-s", ARMV6Itineraries, [ARMv6t2]>; -def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ARMv6t2, - FeatureVFP2, - FeatureHasSlowFPVMLx]>; - -def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5, - FeatureHasRetAddrStack, - FeatureTrustZone, - FeatureSlowFPBrcc, - FeatureHasSlowFPVMLx, - FeatureHasSlowFPVFMx, - FeatureVMLxForwarding, - FeatureMP, - FeatureVFP4]>; - -def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7, - FeatureHasRetAddrStack, - FeatureTrustZone, - FeatureSlowFPBrcc, - FeatureHasVMLxHazards, - FeatureHasSlowFPVMLx, - FeatureHasSlowFPVFMx, - FeatureVMLxForwarding, - FeatureMP, - FeatureVFP4, - FeatureVirtualization]>; - -def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8, - FeatureHasRetAddrStack, - FeatureNonpipelinedVFP, - FeatureTrustZone, - FeatureSlowFPBrcc, - FeatureHasVMLxHazards, - FeatureHasSlowFPVMLx, - FeatureHasSlowFPVFMx, - FeatureVMLxForwarding]>; - -def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9, - FeatureHasRetAddrStack, - FeatureTrustZone, - FeatureHasVMLxHazards, - FeatureVMLxForwarding, - FeatureFP16, - FeatureAvoidPartialCPSR, - FeatureExpandMLx, - FeaturePreferVMOVSR, - FeatureMuxedUnits, - FeatureNEONForFPMovs, - FeatureCheckVLDnAlign, - FeatureMP]>; - -def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12, - FeatureHasRetAddrStack, - FeatureTrustZone, - FeatureVMLxForwarding, - FeatureVFP4, - FeatureAvoidPartialCPSR, - FeatureVirtualization, - FeatureMP]>; - -def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15, - FeatureDontWidenVMOVS, - FeatureSplatVFPToNeon, - FeatureHasRetAddrStack, - FeatureMuxedUnits, - FeatureTrustZone, - FeatureVFP4, - FeatureMP, - FeatureCheckVLDnAlign, - FeatureAvoidPartialCPSR, - FeatureVirtualization]>; - -def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17, - FeatureHasRetAddrStack, - FeatureTrustZone, - FeatureMP, - FeatureVMLxForwarding, - FeatureVFP4, - FeatureAvoidPartialCPSR, - FeatureVirtualization]>; - -// FIXME: krait has currently the same features as A9 plus VFP4 and HWDiv -def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait, - FeatureHasRetAddrStack, - FeatureMuxedUnits, - FeatureCheckVLDnAlign, - FeatureVMLxForwarding, - FeatureFP16, - FeatureAvoidPartialCPSR, - FeatureVFP4, - FeatureHWDivThumb, - FeatureHWDivARM]>; - -def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, - FeatureHasRetAddrStack, - FeatureNEONForFP, - FeatureVFP4, - FeatureUseWideStrideVFP, - FeatureMP, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureAvoidPartialCPSR, - FeatureAvoidMOVsShOp, - FeatureHasSlowFPVMLx, - FeatureHasSlowFPVFMx, - FeatureHasVMLxHazards, - FeatureProfUnpredicate, - FeaturePrefISHSTBarrier, - FeatureSlowOddRegister, - FeatureSlowLoadDSubreg, - FeatureSlowVGETLNi32, - FeatureSlowVDUP32, - FeatureUseMISched, - FeatureNoPostRASched]>; - -def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4, - FeatureHasRetAddrStack, - FeatureAvoidPartialCPSR]>; - -def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4, - FeatureHasRetAddrStack, - FeatureSlowFPBrcc, - FeatureHasSlowFPVMLx, - FeatureHasSlowFPVFMx, - FeatureVFP3_D16, - FeatureAvoidPartialCPSR]>; - -def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5, - FeatureHasRetAddrStack, - FeatureVFP3_D16, - FeatureSlowFPBrcc, - FeatureHWDivARM, - FeatureHasSlowFPVMLx, - FeatureHasSlowFPVFMx, - FeatureAvoidPartialCPSR]>; - -def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7, - FeatureHasRetAddrStack, - FeatureVFP3_D16, - FeatureFP16, - FeatureMP, - FeatureSlowFPBrcc, - FeatureHWDivARM, - FeatureHasSlowFPVMLx, - FeatureHasSlowFPVFMx, - FeatureAvoidPartialCPSR]>; - -def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r, - FeatureHasRetAddrStack, - FeatureVFP3_D16, - FeatureFP16, - FeatureMP, - FeatureSlowFPBrcc, - FeatureHWDivARM, - FeatureHasSlowFPVMLx, - FeatureHasSlowFPVFMx, - FeatureAvoidPartialCPSR]>; - -def : ProcessorModel<"cortex-m3", CortexM4Model, [ARMv7m, - ProcM3, - FeaturePrefLoopAlign32, - FeatureUseMISched, - FeatureHasNoBranchPredictor]>; - -def : ProcessorModel<"sc300", CortexM4Model, [ARMv7m, - ProcM3, - FeatureUseMISched, - FeatureHasNoBranchPredictor]>; - -def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em, - FeatureVFP4_D16_SP, - FeaturePrefLoopAlign32, - FeatureHasSlowFPVMLx, - FeatureHasSlowFPVFMx, - FeatureUseMISched, - FeatureHasNoBranchPredictor]>; - -def : ProcessorModel<"cortex-m7", CortexM7Model, [ARMv7em, - ProcM7, - FeatureFPARMv8_D16, - FeatureUseMIPipeliner, - FeatureUseMISched]>; - -def : ProcNoItin<"cortex-m23", [ARMv8mBaseline, - FeatureNoMovt, - FeatureHasNoBranchPredictor]>; - -def : ProcessorModel<"cortex-m33", CortexM4Model, [ARMv8mMainline, - FeatureDSP, - FeatureFPARMv8_D16_SP, - FeaturePrefLoopAlign32, - FeatureHasSlowFPVMLx, - FeatureHasSlowFPVFMx, - FeatureUseMISched, - FeatureHasNoBranchPredictor, - FeatureFixCMSE_CVE_2021_35465]>; - -def : ProcessorModel<"cortex-m35p", CortexM4Model, [ARMv8mMainline, - FeatureDSP, - FeatureFPARMv8_D16_SP, - FeaturePrefLoopAlign32, - FeatureHasSlowFPVMLx, - FeatureHasSlowFPVFMx, - FeatureUseMISched, - FeatureHasNoBranchPredictor, - FeatureFixCMSE_CVE_2021_35465]>; - -def : ProcessorModel<"cortex-m55", CortexM55Model, [ARMv81mMainline, - FeatureDSP, - FeatureFPARMv8_D16, - FeatureUseMISched, - FeatureHasNoBranchPredictor, - FeaturePrefLoopAlign32, - FeatureHasSlowFPVMLx, - HasMVEFloatOps, - FeatureFixCMSE_CVE_2021_35465]>; - -def : ProcessorModel<"cortex-m85", CortexM85Model, [ARMv81mMainline, - FeatureDSP, - FeatureFPARMv8_D16, - FeaturePACBTI, - FeatureUseMISched, - HasMVEFloatOps]>; - -def : ProcessorModel<"cortex-m52", CortexM55Model, [ARMv81mMainline, - FeatureDSP, - FeatureFPARMv8_D16, - FeatureHasNoBranchPredictor, - FeaturePACBTI, - FeatureUseMISched, - FeaturePrefLoopAlign32, - FeatureHasSlowFPVMLx, - FeatureMVEVectorCostFactor1, - HasMVEFloatOps]>; - -def : ProcNoItin<"cortex-a32", [ARMv8a, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureFPAO]>; - -def : ProcNoItin<"cortex-a55", [ARMv82a, ProcA55, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureDotProd]>; - -def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureFPAO, - FeatureAvoidPartialCPSR, - FeatureCheapPredicableCPSR, - FeatureFixCortexA57AES1742098]>; - -def : ProcessorModel<"cortex-a72", CortexA57Model, [ARMv8a, ProcA72, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureFixCortexA57AES1742098]>; - -def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcNoItin<"cortex-a75", [ARMv82a, ProcA75, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureDotProd]>; - -def : ProcNoItin<"cortex-a76", [ARMv82a, ProcA76, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureFullFP16, - FeatureDotProd]>; - -def : ProcNoItin<"cortex-a76ae", [ARMv82a, ProcA76, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureFullFP16, - FeatureDotProd]>; - -def : ProcNoItin<"cortex-a77", [ARMv82a, ProcA77, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureFullFP16, - FeatureDotProd]>; - -def : ProcNoItin<"cortex-a78", [ARMv82a, ProcA78, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureFullFP16, - FeatureDotProd]>; - -def : ProcNoItin<"cortex-a78ae", [ARMv82a, ProcA78AE, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureFullFP16, - FeatureDotProd]>; - -def : ProcNoItin<"cortex-a78c", [ARMv82a, ProcA78C, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureDotProd, - FeatureFullFP16]>; - -def : ProcNoItin<"cortex-a710", [ARMv9a, ProcA710, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureFP16FML, - FeatureBF16, - FeatureMatMulInt8, - FeatureSB]>; - -def : ProcNoItin<"cortex-x1", [ARMv82a, ProcX1, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureFullFP16, - FeatureDotProd]>; - -def : ProcNoItin<"cortex-x1c", [ARMv82a, ProcX1C, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureFullFP16, - FeatureDotProd]>; - -def : ProcNoItin<"neoverse-v1", [ARMv84a, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureFullFP16, - FeatureBF16, - FeatureMatMulInt8]>; - -def : ProcNoItin<"neoverse-n1", [ARMv82a, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureDotProd]>; - -def : ProcNoItin<"neoverse-n2", [ARMv9a, - FeatureBF16, - FeatureFP16FML, - FeatureMatMulInt8]>; - -def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, - FeatureHasRetAddrStack, - FeatureNEONForFP, - FeatureVFP4, - FeatureMP, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureAvoidPartialCPSR, - FeatureAvoidMOVsShOp, - FeatureHasSlowFPVMLx, - FeatureHasSlowFPVFMx, - FeatureCrypto, - FeatureUseMISched, - FeatureZCZeroing, - FeatureNoPostRASched]>; - -def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynos]>; -def : ProcNoItin<"exynos-m4", [ARMv82a, ProcExynos, - FeatureFullFP16, - FeatureDotProd]>; -def : ProcNoItin<"exynos-m5", [ARMv82a, ProcExynos, - FeatureFullFP16, - FeatureDotProd]>; - -def : ProcNoItin<"kryo", [ARMv8a, ProcKryo, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52, - FeatureUseMISched, - FeatureFPAO]>; +include "ARMProcessors.td" //===----------------------------------------------------------------------===// // Declare the target which we are implementing diff --git a/llvm/lib/Target/ARM/ARMArchitectures.td b/llvm/lib/Target/ARM/ARMArchitectures.td new file mode 100644 index 00000000000000..daf54f457b3be8 --- /dev/null +++ b/llvm/lib/Target/ARM/ARMArchitectures.td @@ -0,0 +1,340 @@ +class Architecture features> + : SubtargetFeature; + +//===----------------------------------------------------------------------===// +// ARM architectures +// + +def ARMv4 : Architecture<"armv4", "ARMv4", []>; + +def ARMv4t : Architecture<"armv4t", "ARMv4t", [HasV4TOps]>; + +def ARMv5t : Architecture<"armv5t", "ARMv5t", [HasV5TOps]>; + +def ARMv5te : Architecture<"armv5te", "ARMv5te", [HasV5TEOps]>; + +def ARMv5tej : Architecture<"armv5tej", "ARMv5tej", [HasV5TEOps]>; + +def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops, + FeatureDSP]>; + +def ARMv6t2 : Architecture<"armv6t2", "ARMv6t2", [HasV6T2Ops, + FeatureDSP]>; + +def ARMv6k : Architecture<"armv6k", "ARMv6k", [HasV6KOps]>; + +def ARMv6kz : Architecture<"armv6kz", "ARMv6kz", [HasV6KOps, + FeatureTrustZone]>; + +def ARMv6m : Architecture<"armv6-m", "ARMv6m", [HasV6MOps, + FeatureNoARM, + ModeThumb, + FeatureDB, + FeatureMClass, + FeatureStrictAlign]>; + +def ARMv6sm : Architecture<"armv6s-m", "ARMv6sm", [HasV6MOps, + FeatureNoARM, + ModeThumb, + FeatureDB, + FeatureMClass, + FeatureStrictAlign]>; + +def ARMv7a : Architecture<"armv7-a", "ARMv7a", [HasV7Ops, + FeatureNEON, + FeatureDB, + FeatureDSP, + FeatureAClass, + FeaturePerfMon]>; + +def ARMv7ve : Architecture<"armv7ve", "ARMv7ve", [HasV7Ops, + FeatureNEON, + FeatureDB, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureAClass, + FeaturePerfMon]>; + +def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops, + FeatureDB, + FeatureDSP, + FeatureHWDivThumb, + FeatureRClass, + FeaturePerfMon]>; + +def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops, + FeatureThumb2, + FeatureNoARM, + ModeThumb, + FeatureDB, + FeatureHWDivThumb, + FeatureMClass]>; + +def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops, + FeatureThumb2, + FeatureNoARM, + ModeThumb, + FeatureDB, + FeatureHWDivThumb, + FeatureMClass, + FeatureDSP]>; + +def ARMv8a : Architecture<"armv8-a", "ARMv8a", [HasV8Ops, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC]>; + +def ARMv81a : Architecture<"armv8.1-a", "ARMv81a", [HasV8_1aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC]>; + +def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC, + FeatureRAS]>; + +def ARMv83a : Architecture<"armv8.3-a", "ARMv83a", [HasV8_3aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC, + FeatureRAS]>; + +def ARMv84a : Architecture<"armv8.4-a", "ARMv84a", [HasV8_4aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC, + FeatureRAS, + FeatureDotProd]>; + +def ARMv85a : Architecture<"armv8.5-a", "ARMv85a", [HasV8_5aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC, + FeatureRAS, + FeatureDotProd]>; +def ARMv86a : Architecture<"armv8.6-a", "ARMv86a", [HasV8_6aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC, + FeatureRAS, + FeatureDotProd]>; +def ARMv87a : Architecture<"armv8.7-a", "ARMv87a", [HasV8_7aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC, + FeatureRAS, + FeatureDotProd]>; +def ARMv88a : Architecture<"armv8.8-a", "ARMv88a", [HasV8_8aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC, + FeatureRAS, + FeatureDotProd]>; +def ARMv89a : Architecture<"armv8.9-a", "ARMv89a", [HasV8_9aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC, + FeatureRAS, + FeatureDotProd]>; + +def ARMv9a : Architecture<"armv9-a", "ARMv9a", [HasV9_0aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCRC, + FeatureRAS, + FeatureDotProd]>; +def ARMv91a : Architecture<"armv9.1-a", "ARMv91a", [HasV9_1aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCRC, + FeatureRAS, + FeatureDotProd]>; +def ARMv92a : Architecture<"armv9.2-a", "ARMv92a", [HasV9_2aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCRC, + FeatureRAS, + FeatureDotProd]>; +def ARMv93a : Architecture<"armv9.3-a", "ARMv93a", [HasV9_3aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC, + FeatureRAS, + FeatureDotProd]>; +def ARMv94a : Architecture<"armv9.4-a", "ARMv94a", [HasV9_4aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCRC, + FeatureRAS, + FeatureDotProd]>; +def ARMv95a : Architecture<"armv9.5-a", "ARMv95a", [HasV9_5aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCRC, + FeatureRAS, + FeatureDotProd]>; + +def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops, + FeatureRClass, + FeatureDB, + FeatureDFB, + FeatureDSP, + FeatureCRC, + FeatureMP, + FeatureVirtualization, + FeatureFPARMv8, + FeatureNEON]>; + +def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline", + [HasV8MBaselineOps, + FeatureNoARM, + ModeThumb, + FeatureDB, + FeatureHWDivThumb, + FeatureV7Clrex, + Feature8MSecExt, + FeatureAcquireRelease, + FeatureMClass, + FeatureStrictAlign]>; + +def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline", + [HasV8MMainlineOps, + FeatureNoARM, + ModeThumb, + FeatureDB, + FeatureHWDivThumb, + Feature8MSecExt, + FeatureAcquireRelease, + FeatureMClass]>; + +def ARMv81mMainline : Architecture<"armv8.1-m.main", "ARMv81mMainline", + [HasV8_1MMainlineOps, + FeatureNoARM, + ModeThumb, + FeatureDB, + FeatureHWDivThumb, + Feature8MSecExt, + FeatureAcquireRelease, + FeatureMClass, + FeatureRAS, + FeatureLOB]>; + +// Aliases +def IWMMXT : Architecture<"iwmmxt", "ARMv5te", [ARMv5te]>; +def IWMMXT2 : Architecture<"iwmmxt2", "ARMv5te", [ARMv5te]>; +def XScale : Architecture<"xscale", "ARMv5te", [ARMv5te]>; +def ARMv6j : Architecture<"armv6j", "ARMv7a", [ARMv6]>; +def ARMv7k : Architecture<"armv7k", "ARMv7a", [ARMv7a]>; +def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>; diff --git a/llvm/lib/Target/ARM/ARMFeatures.td b/llvm/lib/Target/ARM/ARMFeatures.td new file mode 100644 index 00000000000000..111c87838291f6 --- /dev/null +++ b/llvm/lib/Target/ARM/ARMFeatures.td @@ -0,0 +1,753 @@ +//===----------------------------------------------------------------------===// +// ARM Subtarget state. +// + +// True if compiling for Thumb, false for ARM. +def ModeThumb : SubtargetFeature<"thumb-mode", "IsThumb", + "true", "Thumb mode">; + +// True if we're using software floating point features. +def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat", + "true", "Use software floating " + "point features.">; + +//===----------------------------------------------------------------------===// +// ARM Subtarget features. +// + +// Floating Point, HW Division and Neon Support + +// FP loads/stores/moves, shared between VFP and MVE (even in the integer-only +// version). +def FeatureFPRegs : SubtargetFeature<"fpregs", "HasFPRegs", "true", + "Enable FP registers">; + +// 16-bit FP loads/stores/moves, shared between VFP (with the v8.2A FP16 +// extension) and MVE (even in the integer-only version). +def FeatureFPRegs16 : SubtargetFeature<"fpregs16", "HasFPRegs16", "true", + "Enable 16-bit FP registers", + [FeatureFPRegs]>; + +def FeatureFPRegs64 : SubtargetFeature<"fpregs64", "HasFPRegs64", "true", + "Enable 64-bit FP registers", + [FeatureFPRegs]>; + +// True if the floating point unit supports double precision. +def FeatureFP64 : SubtargetFeature<"fp64", "HasFP64", "true", + "Floating point unit supports " + "double precision", + [FeatureFPRegs64]>; + +// True if subtarget has the full 32 double precision FP registers for VFPv3. +def FeatureD32 : SubtargetFeature<"d32", "HasD32", "true", + "Extend FP to 32 double registers">; + +/// Versions of the VFP flags restricted to single precision, or to +/// 16 d-registers, or both. +multiclass VFPver prev, + list otherimplies, + list vfp2prev = []> { + def _D16_SP: SubtargetFeature< + name#"d16sp", query#"D16SP", "true", + description#" with only 16 d-registers and no double precision", + !foreach(v, prev, !cast(v # "_D16_SP")) # + !foreach(v, vfp2prev, !cast(v # "_SP")) # + otherimplies>; + def _SP: SubtargetFeature< + name#"sp", query#"SP", "true", + description#" with no double precision", + !foreach(v, prev, !cast(v # "_SP")) # + otherimplies # [FeatureD32, !cast(NAME # "_D16_SP")]>; + def _D16: SubtargetFeature< + name#"d16", query#"D16", "true", + description#" with only 16 d-registers", + !foreach(v, prev, !cast(v # "_D16")) # + vfp2prev # + otherimplies # [FeatureFP64, !cast(NAME # "_D16_SP")]>; + def "": SubtargetFeature< + name, query, "true", description, + prev # otherimplies # [ + !cast(NAME # "_D16"), + !cast(NAME # "_SP")]>; +} + +def FeatureVFP2_SP : SubtargetFeature<"vfp2sp", "HasVFPv2SP", "true", + "Enable VFP2 instructions with " + "no double precision", + [FeatureFPRegs]>; + +def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", + "Enable VFP2 instructions", + [FeatureFP64, FeatureVFP2_SP]>; + +defm FeatureVFP3: VFPver<"vfp3", "HasVFPv3", "Enable VFP3 instructions", + [], [], [FeatureVFP2]>; + +def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", + "Enable NEON instructions", + [FeatureVFP3]>; + +// True if subtarget supports half-precision FP conversions. +def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", + "Enable half-precision " + "floating point">; + +defm FeatureVFP4: VFPver<"vfp4", "HasVFPv4", "Enable VFP4 instructions", + [FeatureVFP3], [FeatureFP16]>; + +defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP", + [FeatureVFP4], []>; + +// True if subtarget supports half-precision FP operations. +def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", + "Enable full half-precision " + "floating point", + [FeatureFPARMv8_D16_SP, FeatureFPRegs16]>; + +// True if subtarget supports half-precision FP fml operations. +def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true", + "Enable full half-precision " + "floating point fml instructions", + [FeatureFullFP16]>; + +// True if subtarget supports [su]div in Thumb mode. +def FeatureHWDivThumb : SubtargetFeature<"hwdiv", + "HasDivideInThumbMode", "true", + "Enable divide instructions in Thumb">; + +// True if subtarget supports [su]div in ARM mode. +def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", + "HasDivideInARMMode", "true", + "Enable divide instructions in ARM mode">; + +// Atomic Support + +// True if the subtarget supports DMB / DSB data barrier instructions. +def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", + "Has data barrier (dmb/dsb) instructions">; + +// True if the subtarget supports CLREX instructions. +def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", + "Has v7 clrex instruction">; + +// True if the subtarget supports DFB data barrier instruction. +def FeatureDFB : SubtargetFeature<"dfb", "HasFullDataBarrier", "true", + "Has full data barrier (dfb) instruction">; + +// True if the subtarget supports v8 atomics (LDA/LDAEX etc) instructions. +def FeatureAcquireRelease : SubtargetFeature<"acquire-release", + "HasAcquireRelease", "true", + "Has v8 acquire/release (lda/ldaex " + " etc) instructions">; + + +// True if floating point compare + branch is slow. +def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "IsFPBrccSlow", "true", + "FP compare + branch is slow">; + +// True if the processor supports the Performance Monitor Extensions. These +// include a generic cycle-counter as well as more fine-grained (often +// implementation-specific) events. +def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", + "Enable support for Performance " + "Monitor extensions">; + + +// TrustZone Security Extensions + +// True if processor supports TrustZone security extensions. +def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", + "Enable support for TrustZone " + "security extensions">; + +// True if processor supports ARMv8-M Security Extensions. +def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true", + "Enable support for ARMv8-M " + "Security Extensions">; + +// True if processor supports SHA1 and SHA256. +def FeatureSHA2 : SubtargetFeature<"sha2", "HasSHA2", "true", + "Enable SHA1 and SHA256 support", [FeatureNEON]>; + +def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", + "Enable AES support", [FeatureNEON]>; + +// True if processor supports Cryptography extensions. +def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", + "Enable support for " + "Cryptography extensions", + [FeatureNEON, FeatureSHA2, FeatureAES]>; + +// True if processor supports CRC instructions. +def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", + "Enable support for CRC instructions">; + +// True if the ARMv8.2A dot product instructions are supported. +def FeatureDotProd : SubtargetFeature<"dotprod", "HasDotProd", "true", + "Enable support for dot product instructions", + [FeatureNEON]>; + +// True if the processor supports RAS extensions. +// Not to be confused with FeatureHasRetAddrStack (return address stack). +def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", + "Enable Reliability, Availability " + "and Serviceability extensions">; + +// Fast computation of non-negative address offsets. +// True if processor does positive address offset computation faster. +def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", + "Enable fast computation of " + "positive address offsets">; + +// Fast execution of AES crypto operations. +// True if processor executes back to back AES instruction pairs faster. +def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true", + "CPU fuses AES crypto operations">; + +// Fast execution of bottom and top halves of literal generation. +// True if processor executes back to back bottom and top halves of literal generation faster. +def FeatureFuseLiterals : SubtargetFeature<"fuse-literals", "HasFuseLiterals", "true", + "CPU fuses literal generation operations">; + +// Choice of hardware register to use as the thread pointer, if any. +def FeatureReadTpTPIDRURW : SubtargetFeature<"read-tp-tpidrurw", "IsReadTPTPIDRURW", "true", + "Reading thread pointer from TPIDRURW register">; +def FeatureReadTpTPIDRURO : SubtargetFeature<"read-tp-tpidruro", "IsReadTPTPIDRURO", "true", + "Reading thread pointer from TPIDRURO register">; +def FeatureReadTpTPIDRPRW : SubtargetFeature<"read-tp-tpidrprw", "IsReadTPTPIDRPRW", "true", + "Reading thread pointer from TPIDRPRW register">; + +// Cyclone can zero VFP registers in 0 cycles. +// True if the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are +// particularly effective at zeroing a VFP register. +def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", + "Has zero-cycle zeroing instructions">; + +// Whether it is profitable to unpredicate certain instructions during if-conversion. +// True if if conversion may decide to leave some instructions unpredicated. +def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr", + "IsProfitableToUnpredicate", "true", + "Is profitable to unpredicate">; + +// Some targets (e.g. Swift) have microcoded VGETLNi32. +// True if VMOV will be favored over VGETLNi32. +def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32", + "HasSlowVGETLNi32", "true", + "Has slow VGETLNi32 - prefer VMOV">; + +// Some targets (e.g. Swift) have microcoded VDUP32. +// True if VMOV will be favored over VDUP. +def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", + "true", + "Has slow VDUP32 - prefer VMOV">; + +// Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON +// for scalar FP, as this allows more effective execution domain optimization. +// True if VMOVSR will be favored over VMOVDRR. +def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR", + "true", "Prefer VMOVSR">; + +// Swift has ISHST barriers compatible with Atomic Release semantics but weaker +// than ISH. +// True if ISHST barriers will be used for Release semantics. +def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHSTBarriers", + "true", "Prefer ISHST barriers">; + +// Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU. +// True if the AGU and NEON/FPU units are multiplexed. +def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", + "true", + "Has muxed AGU and NEON/FPU">; + +// Whether VLDM/VSTM starting with odd register number need more microops +// than single VLDRS. +// True if a VLDM/VSTM starting with an odd register number is considered to +// take more microops than single VLDRS/VSTRS. +def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "HasSlowOddRegister", + "true", "VLDM/VSTM starting " + "with an odd register is slow">; + +// Some targets have a renaming dependency when loading into D subregisters. +// True if loading into a D subregister will be penalized. +def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg", + "HasSlowLoadDSubregister", "true", + "Loading into D subregs is slow">; + +// True if use a wider stride when allocating VFP registers. +def FeatureUseWideStrideVFP : SubtargetFeature<"wide-stride-vfp", + "UseWideStrideVFP", "true", + "Use a wide stride when allocating VFP registers">; + +// Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD. +// True if VMOVS will never be widened to VMOVD. +def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs", + "DontWidenVMOVS", "true", + "Don't widen VMOVS to VMOVD">; + +// Some targets (e.g. Cortex-A15) prefer to avoid mixing operations on different +// VFP register widths. +// True if splat a register between VFP and NEON instructions. +def FeatureSplatVFPToNeon : SubtargetFeature<"splat-vfp-neon", + "UseSplatVFPToNeon", "true", + "Splat register from VFP to NEON", + [FeatureDontWidenVMOVS]>; + +// Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions. +// True if run the MLx expansion pass. +def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", + "ExpandMLx", "true", + "Expand VFP/NEON MLA/MLS instructions">; + +// Some targets have special RAW hazards for VFP/NEON VMLA/VMLS. +// True if VFP/NEON VMLA/VMLS have special RAW hazards. +def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards", + "true", "Has VMLx hazards">; + +// Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from +// VFP to NEON, as an execution domain optimization. +// True if VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON. +def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", + "UseNEONForFPMovs", "true", + "Convert VMOVSR, VMOVRS, " + "VMOVS to NEON">; + +// Some processors benefit from using NEON instructions for scalar +// single-precision FP operations. This affects instruction selection and should +// only be enabled if the handling of denormals is not important. +// Use the method useNEONForSinglePrecisionFP() to determine if NEON should actually be used. +def FeatureNEONForFP : SubtargetFeature<"neonfp", + "HasNEONForFP", + "true", + "Use NEON for single precision FP">; + +// On some processors, VLDn instructions that access unaligned data take one +// extra cycle. Take that into account when computing operand latencies. +// True if VLDn instructions take an extra cycle for unaligned accesses. +def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAccessAlignment", + "true", + "Check for VLDn unaligned access">; + +// Some processors have a nonpipelined VFP coprocessor. +// True if VFP instructions are not pipelined. +def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp", + "NonpipelinedVFP", "true", + "VFP instructions are not pipelined">; + +// Some processors have FP multiply-accumulate instructions that don't +// play nicely with other VFP / NEON instructions, and it's generally better +// to just not use them. +// If the VFP2 / NEON instructions are available, indicates +// whether the FP VML[AS] instructions are slow (if so, don't use them). +def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", + "Disable VFP / NEON MAC instructions">; + +// VFPv4 added VFMA instructions that can similarly be fast or slow. +// If the VFP4 / NEON instructions are available, indicates +// whether the FP VFM[AS] instructions are slow (if so, don't use them). +def FeatureHasSlowFPVFMx : SubtargetFeature<"slowfpvfmx", "SlowFPVFMx", "true", + "Disable VFP / NEON FMA instructions">; + +// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding. +/// True if NEON has special multiplier accumulator +/// forwarding to allow mul + mla being issued back to back. +def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", + "HasVMLxForwarding", "true", + "Has multiplier accumulator forwarding">; + +// Disable 32-bit to 16-bit narrowing for experimentation. +// True if codegen would prefer 32-bit Thumb instructions over 16-bit ones. +def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Prefers32BitThumb", "true", + "Prefer 32-bit Thumb instrs">; + +def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopLogAlignment","2", + "Prefer 32-bit alignment for loops">; + +def FeatureMVEVectorCostFactor1 : SubtargetFeature<"mve1beat", "MVEVectorCostFactor", "4", + "Model MVE instructions as a 1 beat per tick architecture">; + +def FeatureMVEVectorCostFactor2 : SubtargetFeature<"mve2beat", "MVEVectorCostFactor", "2", + "Model MVE instructions as a 2 beats per tick architecture">; + +def FeatureMVEVectorCostFactor4 : SubtargetFeature<"mve4beat", "MVEVectorCostFactor", "1", + "Model MVE instructions as a 4 beats per tick architecture">; + +/// Some instructions update CPSR partially, which can add false dependency for +/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is +/// mapped to a separate physical register. Avoid partial CPSR update for these +/// processors. +/// True if codegen would avoid using instructions +/// that partially update CPSR and add false dependency on the previous +/// CPSR setting instruction. +def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", + "AvoidCPSRPartialUpdate", "true", + "Avoid CPSR partial update for OOO execution">; + +/// Disable +1 predication cost for instructions updating CPSR. +/// Enabled for Cortex-A57. +/// True if disable +1 predication cost for instructions updating CPSR. Enabled for Cortex-A57. +def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr", + "CheapPredicableCPSRDef", + "true", + "Disable +1 predication cost for instructions updating CPSR">; + +// True if codegen should avoid using flag setting movs with shifter operand (i.e. asr, lsl, lsr). +def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", + "AvoidMOVsShifterOperand", "true", + "Avoid movs instructions with " + "shifter operand">; + +// Some processors perform return stack prediction. CodeGen should avoid issue +// "normal" call instructions to callees which do not return. +def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", + "HasRetAddrStack", "true", + "Has return address stack">; + +// Some processors have no branch predictor, which changes the expected cost of +// taking a branch which affects the choice of whether to use predicated +// instructions. +// True if the subtarget has a branch predictor. Having +// a branch predictor or not changes the expected cost of taking a branch +// which affects the choice of whether to use predicated instructions. +def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor", + "HasBranchPredictor", "false", + "Has no branch predictor">; + +/// DSP extension. +/// True if the subtarget supports the DSP (saturating arith and such) instructions. +def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", + "Supports DSP instructions in " + "ARM and/or Thumb2">; + +// True if the subtarget supports Multiprocessing extension (ARMv7 only). +def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", + "Supports Multiprocessing extension">; + +// Virtualization extension - requires HW divide (ARMv7-AR ARMARM - 4.4.8). +def FeatureVirtualization : SubtargetFeature<"virtualization", + "HasVirtualization", "true", + "Supports Virtualization extension", + [FeatureHWDivThumb, FeatureHWDivARM]>; + +// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. +// See ARMInstrInfo.td for details. +// True if NaCl TRAP instruction is generated instead of the regular TRAP. +def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", + "NaCl trap">; + +// True if the subtarget disallows unaligned memory +// accesses for some types. For details, see +// ARMTargetLowering::allowsMisalignedMemoryAccesses(). +def FeatureStrictAlign : SubtargetFeature<"strict-align", + "StrictAlign", "true", + "Disallow all unaligned memory " + "access">; + +// Generate calls via indirect call instructions. +def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", + "Generate calls via indirect call " + "instructions">; + +// Generate code that does not contain data access to code sections. +def FeatureExecuteOnly : SubtargetFeature<"execute-only", + "GenExecuteOnly", "true", + "Enable the generation of " + "execute only code.">; + +// True if R9 is not available as a general purpose register. +def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", + "Reserve R9, making it unavailable" + " as GPR">; + +// True if MOVT / MOVW pairs are not used for materialization of +// 32-bit imms (including global addresses). +def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", + "Don't use movt/movw pairs for " + "32-bit imms">; + +/// Implicitly convert an instruction to a different one if its immediates +/// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1. +def FeatureNoNegativeImmediates + : SubtargetFeature<"no-neg-immediates", + "NegativeImmediates", "false", + "Convert immediates and instructions " + "to their negated or complemented " + "equivalent when the immediate does " + "not fit in the encoding.">; + +// Use the MachineScheduler for instruction scheduling for the subtarget. +def FeatureUseMISched: SubtargetFeature<"use-misched", "UseMISched", "true", + "Use the MachineScheduler">; + +// Use the MachinePipeliner for instruction scheduling for the subtarget. +def FeatureUseMIPipeliner: SubtargetFeature<"use-mipipeliner", "UseMIPipeliner", "true", + "Use the MachinePipeliner">; + +// False if scheduling should happen again after register allocation. +def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler", + "DisablePostRAScheduler", "true", + "Don't schedule again after register allocation">; + +// Armv8.5-A extensions + +// Has speculation barrier. +def FeatureSB : SubtargetFeature<"sb", "HasSB", "true", + "Enable v8.5a Speculation Barrier" >; + +// Armv8.6-A extensions + +// True if subtarget supports BFloat16 floating point operations. +def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", "true", + "Enable support for BFloat16 instructions", [FeatureNEON]>; + +// True if subtarget supports 8-bit integer matrix multiply. +def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8", + "true", "Enable Matrix Multiply Int8 Extension", [FeatureNEON]>; + +// Armv8.1-M extensions + +// True if the processor supports the Low Overhead Branch extension. +def FeatureLOB : SubtargetFeature<"lob", "HasLOB", "true", + "Enable Low Overhead Branch " + "extensions">; + +// Mitigate against the cve-2021-35465 security vulnurability. +def FeatureFixCMSE_CVE_2021_35465 : SubtargetFeature<"fix-cmse-cve-2021-35465", + "FixCMSE_CVE_2021_35465", "true", + "Mitigate against the cve-2021-35465 " + "security vulnurability">; + +def FeaturePACBTI : SubtargetFeature<"pacbti", "HasPACBTI", "true", + "Enable Pointer Authentication and Branch " + "Target Identification">; + +/// Don't place a BTI instruction after return-twice constructs (setjmp). +def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice", + "NoBTIAtReturnTwice", "true", + "Don't place a BTI instruction " + "after a return-twice">; + +// Armv8.9-A/Armv9.4-A 2022 Architecture Extensions +def FeatureCLRBHB : SubtargetFeature<"clrbhb", "HasCLRBHB", "true", + "Enable Clear BHB instruction">; + + +def FeatureFixCortexA57AES1742098 : SubtargetFeature<"fix-cortex-a57-aes-1742098", + "FixCortexA57AES1742098", "true", + "Work around Cortex-A57 Erratum 1742098 / Cortex-A72 Erratum 1655431 (AES)">; + +def FeatureAAPCSFrameChain : SubtargetFeature<"aapcs-frame-chain", + "CreateAAPCSFrameChain", "true", + "Create an AAPCS compliant frame chain">; + +def FeatureAAPCSFrameChainLeaf : SubtargetFeature<"aapcs-frame-chain-leaf", + "CreateAAPCSFrameChainLeaf", "true", + "Create an AAPCS compliant frame chain " + "for leaf functions", + [FeatureAAPCSFrameChain]>; + +// Assume that lock-free 32-bit atomics are available, even if the target +// and operating system combination would not usually provide them. The user +// is responsible for providing any necessary __sync implementations. Code +// built with this feature is not ABI-compatible with code built without this +// feature, if atomic variables are exposed across the ABI boundary. +def FeatureAtomics32 : SubtargetFeature< + "atomics-32", "HasForced32BitAtomics", "true", + "Assume that lock-free 32-bit atomics are available">; + +//===----------------------------------------------------------------------===// +// ARM architecture class +// + +// A-series ISA +def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass", + "Is application profile ('A' series)">; + +// R-series ISA +def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass", + "Is realtime profile ('R' series)">; + +// M-series ISA +def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", + "Is microcontroller profile ('M' series)">; + +// True if Thumb2 instructions are supported. +def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", + "Enable Thumb2 instructions">; + +// True if subtarget does not support ARM mode execution. +def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", + "Does not support ARM mode execution">; + +//===----------------------------------------------------------------------===// +// ARM ISAa. +// +// Specify whether target support specific ARM ISA variants. + +def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", + "Support ARM v4T instructions">; + +def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true", + "Support ARM v5T instructions", + [HasV4TOps]>; + +def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true", + "Support ARM v5TE, v5TEj, and " + "v5TExp instructions", + [HasV5TOps]>; + +def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true", + "Support ARM v6 instructions", + [HasV5TEOps]>; + +def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true", + "Support ARM v6M instructions", + [HasV6Ops]>; + +def HasV8MBaselineOps : SubtargetFeature<"v8m", "HasV8MBaselineOps", "true", + "Support ARM v8M Baseline instructions", + [HasV6MOps]>; + +def HasV6KOps : SubtargetFeature<"v6k", "HasV6KOps", "true", + "Support ARM v6k instructions", + [HasV6Ops]>; + +def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", + "Support ARM v6t2 instructions", + [HasV8MBaselineOps, HasV6KOps, FeatureThumb2]>; + +def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", + "Support ARM v7 instructions", + [HasV6T2Ops, FeatureV7Clrex]>; + +def HasV8MMainlineOps : + SubtargetFeature<"v8m.main", "HasV8MMainlineOps", "true", + "Support ARM v8M Mainline instructions", + [HasV7Ops]>; + +def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", + "Support ARM v8 instructions", + [HasV7Ops, FeaturePerfMon, FeatureAcquireRelease]>; + +def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", + "Support ARM v8.1a instructions", + [HasV8Ops]>; + +def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", + "Support ARM v8.2a instructions", + [HasV8_1aOps]>; + +def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true", + "Support ARM v8.3a instructions", + [HasV8_2aOps]>; + +def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true", + "Support ARM v8.4a instructions", + [HasV8_3aOps, FeatureDotProd]>; + +def HasV8_5aOps : SubtargetFeature<"v8.5a", "HasV8_5aOps", "true", + "Support ARM v8.5a instructions", + [HasV8_4aOps, FeatureSB]>; + +def HasV8_6aOps : SubtargetFeature<"v8.6a", "HasV8_6aOps", "true", + "Support ARM v8.6a instructions", + [HasV8_5aOps, FeatureBF16, + FeatureMatMulInt8]>; + +def HasV8_7aOps : SubtargetFeature<"v8.7a", "HasV8_7aOps", "true", + "Support ARM v8.7a instructions", + [HasV8_6aOps]>; + +def HasV8_8aOps : SubtargetFeature<"v8.8a", "HasV8_8aOps", "true", + "Support ARM v8.8a instructions", + [HasV8_7aOps]>; + +def HasV8_9aOps : SubtargetFeature<"v8.9a", "HasV8_9aOps", "true", + "Support ARM v8.9a instructions", + [HasV8_8aOps, FeatureCLRBHB]>; + +def HasV9_0aOps : SubtargetFeature<"v9a", "HasV9_0aOps", "true", + "Support ARM v9a instructions", + [HasV8_5aOps]>; + +def HasV9_1aOps : SubtargetFeature<"v9.1a", "HasV9_1aOps", "true", + "Support ARM v9.1a instructions", + [HasV8_6aOps, HasV9_0aOps]>; + +def HasV9_2aOps : SubtargetFeature<"v9.2a", "HasV9_2aOps", "true", + "Support ARM v9.2a instructions", + [HasV8_7aOps, HasV9_1aOps]>; + +def HasV9_3aOps : SubtargetFeature<"v9.3a", "HasV9_3aOps", "true", + "Support ARM v9.3a instructions", + [HasV8_8aOps, HasV9_2aOps]>; + +def HasV9_4aOps : SubtargetFeature<"v9.4a", "HasV9_4aOps", "true", + "Support ARM v9.4a instructions", + [HasV8_9aOps, HasV9_3aOps]>; + +// Armv9.5-A is a v9-only architecture. From v9.5-A onwards there's no mapping +// to an equivalent v8.x version. +def HasV9_5aOps : SubtargetFeature<"v9.5a", "HasV9_5aOps", "true", + "Support ARM v9.5a instructions", + [HasV9_4aOps]>; + +def HasV8_1MMainlineOps : SubtargetFeature< + "v8.1m.main", "HasV8_1MMainlineOps", "true", + "Support ARM v8-1M Mainline instructions", + [HasV8MMainlineOps]>; +def HasMVEIntegerOps : SubtargetFeature< + "mve", "HasMVEIntegerOps", "true", + "Support M-Class Vector Extension with integer ops", + [HasV8_1MMainlineOps, FeatureDSP, FeatureFPRegs16, FeatureFPRegs64]>; +def HasMVEFloatOps : SubtargetFeature< + "mve.fp", "HasMVEFloatOps", "true", + "Support M-Class Vector Extension with integer and floating ops", + [HasMVEIntegerOps, FeatureFPARMv8_D16_SP, FeatureFullFP16]>; + +def HasCDEOps : SubtargetFeature<"cde", "HasCDEOps", "true", + "Support CDE instructions", + [HasV8MMainlineOps]>; + +foreach i = {0-7} in + def FeatureCoprocCDE#i : SubtargetFeature<"cdecp"#i, + "CoprocCDE["#i#"]", "true", + "Coprocessor "#i#" ISA is CDEv1", + [HasCDEOps]>; + +//===----------------------------------------------------------------------===// +// Control codegen mitigation against Straight Line Speculation vulnerability. +//===----------------------------------------------------------------------===// + +/// Harden against Straight Line Speculation for Returns and Indirect Branches. +def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr", + "HardenSlsRetBr", "true", + "Harden against straight line speculation across RETurn and BranchRegister " + "instructions">; +/// Harden against Straight Line Speculation for indirect calls. +def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr", + "HardenSlsBlr", "true", + "Harden against straight line speculation across indirect calls">; +/// Generate thunk code for SLS mitigation in the normal text section. +def FeatureHardenSlsNoComdat : SubtargetFeature<"harden-sls-nocomdat", + "HardenSlsNoComdat", "true", + "Generate thunk code for SLS mitigation in the normal text section">; + +//===----------------------------------------------------------------------===// +// Endianness of instruction encodings in memory. +// +// In the current Arm architecture, this is usually little-endian regardless of +// data endianness. But before Armv7 it was typical for instruction endianness +// to match data endianness, so that a big-endian system was consistently big- +// endian. And Armv7-R can be configured to use big-endian instructions. +// +// Additionally, even when targeting Armv7-A, big-endian instructions can be +// found in relocatable object files, because the Arm ABI specifies that the +// linker byte-reverses them depending on the target architecture. +// +// So we have a feature here to indicate that instructions are stored big- +// endian, which you can set when instantiating an MCDisassembler. +def ModeBigEndianInstructions : SubtargetFeature<"big-endian-instructions", + "BigEndianInstructions", "true", + "Expect instructions to be stored big-endian.">; + diff --git a/llvm/lib/Target/ARM/ARMProcessors.td b/llvm/lib/Target/ARM/ARMProcessors.td new file mode 100644 index 00000000000000..2c559497640037 --- /dev/null +++ b/llvm/lib/Target/ARM/ARMProcessors.td @@ -0,0 +1,577 @@ +class ProcNoItin Features> + : Processor; + +//===----------------------------------------------------------------------===// +// ARM Processor subtarget features. +// + +def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5", + "Cortex-A5 ARM processors", []>; +def ProcA7 : SubtargetFeature<"a7", "ARMProcFamily", "CortexA7", + "Cortex-A7 ARM processors", []>; +def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", + "Cortex-A8 ARM processors", []>; +def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", + "Cortex-A9 ARM processors", []>; +def ProcA12 : SubtargetFeature<"a12", "ARMProcFamily", "CortexA12", + "Cortex-A12 ARM processors", []>; +def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", + "Cortex-A15 ARM processors", []>; +def ProcA17 : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17", + "Cortex-A17 ARM processors", []>; +def ProcA32 : SubtargetFeature<"a32", "ARMProcFamily", "CortexA32", + "Cortex-A32 ARM processors", []>; +def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", + "Cortex-A35 ARM processors", []>; +def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", + "Cortex-A53 ARM processors", []>; +def ProcA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55", + "Cortex-A55 ARM processors", []>; +def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", + "Cortex-A57 ARM processors", []>; +def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", + "Cortex-A72 ARM processors", []>; +def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", + "Cortex-A73 ARM processors", []>; +def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", + "Cortex-A75 ARM processors", []>; +def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76", + "Cortex-A76 ARM processors", []>; +def ProcA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77", + "Cortex-A77 ARM processors", []>; +def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily", "CortexA78", + "Cortex-A78 ARM processors", []>; +def ProcA78AE : SubtargetFeature<"cortex-a78ae", "ARMProcFamily", "CortexA78AE", + "Cortex-A78AE ARM processors", []>; +def ProcA78C : SubtargetFeature<"a78c", "ARMProcFamily", "CortexA78C", + "Cortex-A78C ARM processors", []>; +def ProcA710 : SubtargetFeature<"cortex-a710", "ARMProcFamily", + "CortexA710", "Cortex-A710 ARM processors", []>; +def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1", + "Cortex-X1 ARM processors", []>; +def ProcX1C : SubtargetFeature<"cortex-x1c", "ARMProcFamily", "CortexX1C", + "Cortex-X1C ARM processors", []>; + +def ProcV1 : SubtargetFeature<"neoverse-v1", "ARMProcFamily", + "NeoverseV1", "Neoverse-V1 ARM processors", []>; + +def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait", + "Qualcomm Krait processors", []>; +def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", + "Qualcomm Kryo processors", []>; +def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", + "Swift ARM processors", []>; + +def ProcExynos : SubtargetFeature<"exynos", "ARMProcFamily", "Exynos", + "Samsung Exynos processors", + [FeatureZCZeroing, + FeatureUseWideStrideVFP, + FeatureSplatVFPToNeon, + FeatureSlowVGETLNi32, + FeatureSlowVDUP32, + FeatureSlowFPBrcc, + FeatureProfUnpredicate, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureHasSlowFPVMLx, + FeatureHasSlowFPVFMx, + FeatureHasRetAddrStack, + FeatureFuseLiterals, + FeatureFuseAES, + FeatureExpandMLx, + FeatureCrypto, + FeatureCRC]>; + +def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4", + "Cortex-R4 ARM processors", []>; +def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", + "Cortex-R5 ARM processors", []>; +def ProcR7 : SubtargetFeature<"r7", "ARMProcFamily", "CortexR7", + "Cortex-R7 ARM processors", []>; +def ProcR52 : SubtargetFeature<"r52", "ARMProcFamily", "CortexR52", + "Cortex-R52 ARM processors", []>; + +def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3", + "Cortex-M3 ARM processors", []>; +def ProcM7 : SubtargetFeature<"m7", "ARMProcFamily", "CortexM7", + "Cortex-M7 ARM processors", []>; + +//===----------------------------------------------------------------------===// +// ARM processors +// +// Dummy CPU, used to target architectures +def : ProcessorModel<"generic", CortexA8Model, []>; + +// FIXME: Several processors below are not using their own scheduler +// model, but one of similar/previous processor. These should be fixed. + +def : ProcNoItin<"arm8", [ARMv4]>; +def : ProcNoItin<"arm810", [ARMv4]>; +def : ProcNoItin<"strongarm", [ARMv4]>; +def : ProcNoItin<"strongarm110", [ARMv4]>; +def : ProcNoItin<"strongarm1100", [ARMv4]>; +def : ProcNoItin<"strongarm1110", [ARMv4]>; + +def : ProcNoItin<"arm7tdmi", [ARMv4t]>; +def : ProcNoItin<"arm7tdmi-s", [ARMv4t]>; +def : ProcNoItin<"arm710t", [ARMv4t]>; +def : ProcNoItin<"arm720t", [ARMv4t]>; +def : ProcNoItin<"arm9", [ARMv4t]>; +def : ProcNoItin<"arm9tdmi", [ARMv4t]>; +def : ProcNoItin<"arm920", [ARMv4t]>; +def : ProcNoItin<"arm920t", [ARMv4t]>; +def : ProcNoItin<"arm922t", [ARMv4t]>; +def : ProcNoItin<"arm940t", [ARMv4t]>; +def : ProcNoItin<"ep9312", [ARMv4t]>; + +def : ProcNoItin<"arm10tdmi", [ARMv5t]>; +def : ProcNoItin<"arm1020t", [ARMv5t]>; + +def : ProcNoItin<"arm9e", [ARMv5te]>; +def : ProcNoItin<"arm926ej-s", [ARMv5te]>; +def : ProcNoItin<"arm946e-s", [ARMv5te]>; +def : ProcNoItin<"arm966e-s", [ARMv5te]>; +def : ProcNoItin<"arm968e-s", [ARMv5te]>; +def : ProcNoItin<"arm10e", [ARMv5te]>; +def : ProcNoItin<"arm1020e", [ARMv5te]>; +def : ProcNoItin<"arm1022e", [ARMv5te]>; +def : ProcNoItin<"xscale", [ARMv5te]>; +def : ProcNoItin<"iwmmxt", [ARMv5te]>; + +def : Processor<"arm1136j-s", ARMV6Itineraries, [ARMv6]>; +def : Processor<"arm1136jf-s", ARMV6Itineraries, [ARMv6, + FeatureVFP2, + FeatureHasSlowFPVMLx]>; + +def : Processor<"cortex-m0", ARMV6Itineraries, [ARMv6m, + FeatureHasNoBranchPredictor]>; +def : Processor<"cortex-m0plus", ARMV6Itineraries, [ARMv6m, + FeatureHasNoBranchPredictor]>; +def : Processor<"cortex-m1", ARMV6Itineraries, [ARMv6m, + FeatureHasNoBranchPredictor]>; +def : Processor<"sc000", ARMV6Itineraries, [ARMv6m, + FeatureHasNoBranchPredictor]>; + +def : Processor<"arm1176jz-s", ARMV6Itineraries, [ARMv6kz]>; +def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ARMv6kz, + FeatureVFP2, + FeatureHasSlowFPVMLx]>; + +def : Processor<"mpcorenovfp", ARMV6Itineraries, [ARMv6k]>; +def : Processor<"mpcore", ARMV6Itineraries, [ARMv6k, + FeatureVFP2, + FeatureHasSlowFPVMLx]>; + +def : Processor<"arm1156t2-s", ARMV6Itineraries, [ARMv6t2]>; +def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ARMv6t2, + FeatureVFP2, + FeatureHasSlowFPVMLx]>; + +def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5, + FeatureHasRetAddrStack, + FeatureTrustZone, + FeatureSlowFPBrcc, + FeatureHasSlowFPVMLx, + FeatureHasSlowFPVFMx, + FeatureVMLxForwarding, + FeatureMP, + FeatureVFP4]>; + +def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7, + FeatureHasRetAddrStack, + FeatureTrustZone, + FeatureSlowFPBrcc, + FeatureHasVMLxHazards, + FeatureHasSlowFPVMLx, + FeatureHasSlowFPVFMx, + FeatureVMLxForwarding, + FeatureMP, + FeatureVFP4, + FeatureVirtualization]>; + +def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8, + FeatureHasRetAddrStack, + FeatureNonpipelinedVFP, + FeatureTrustZone, + FeatureSlowFPBrcc, + FeatureHasVMLxHazards, + FeatureHasSlowFPVMLx, + FeatureHasSlowFPVFMx, + FeatureVMLxForwarding]>; + +def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9, + FeatureHasRetAddrStack, + FeatureTrustZone, + FeatureHasVMLxHazards, + FeatureVMLxForwarding, + FeatureFP16, + FeatureAvoidPartialCPSR, + FeatureExpandMLx, + FeaturePreferVMOVSR, + FeatureMuxedUnits, + FeatureNEONForFPMovs, + FeatureCheckVLDnAlign, + FeatureMP]>; + +def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12, + FeatureHasRetAddrStack, + FeatureTrustZone, + FeatureVMLxForwarding, + FeatureVFP4, + FeatureAvoidPartialCPSR, + FeatureVirtualization, + FeatureMP]>; + +def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15, + FeatureDontWidenVMOVS, + FeatureSplatVFPToNeon, + FeatureHasRetAddrStack, + FeatureMuxedUnits, + FeatureTrustZone, + FeatureVFP4, + FeatureMP, + FeatureCheckVLDnAlign, + FeatureAvoidPartialCPSR, + FeatureVirtualization]>; + +def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17, + FeatureHasRetAddrStack, + FeatureTrustZone, + FeatureMP, + FeatureVMLxForwarding, + FeatureVFP4, + FeatureAvoidPartialCPSR, + FeatureVirtualization]>; + +// FIXME: krait has currently the same features as A9 plus VFP4 and HWDiv +def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait, + FeatureHasRetAddrStack, + FeatureMuxedUnits, + FeatureCheckVLDnAlign, + FeatureVMLxForwarding, + FeatureFP16, + FeatureAvoidPartialCPSR, + FeatureVFP4, + FeatureHWDivThumb, + FeatureHWDivARM]>; + +def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, + FeatureHasRetAddrStack, + FeatureNEONForFP, + FeatureVFP4, + FeatureUseWideStrideVFP, + FeatureMP, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureAvoidPartialCPSR, + FeatureAvoidMOVsShOp, + FeatureHasSlowFPVMLx, + FeatureHasSlowFPVFMx, + FeatureHasVMLxHazards, + FeatureProfUnpredicate, + FeaturePrefISHSTBarrier, + FeatureSlowOddRegister, + FeatureSlowLoadDSubreg, + FeatureSlowVGETLNi32, + FeatureSlowVDUP32, + FeatureUseMISched, + FeatureNoPostRASched]>; + +def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4, + FeatureHasRetAddrStack, + FeatureAvoidPartialCPSR]>; + +def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4, + FeatureHasRetAddrStack, + FeatureSlowFPBrcc, + FeatureHasSlowFPVMLx, + FeatureHasSlowFPVFMx, + FeatureVFP3_D16, + FeatureAvoidPartialCPSR]>; + +def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5, + FeatureHasRetAddrStack, + FeatureVFP3_D16, + FeatureSlowFPBrcc, + FeatureHWDivARM, + FeatureHasSlowFPVMLx, + FeatureHasSlowFPVFMx, + FeatureAvoidPartialCPSR]>; + +def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7, + FeatureHasRetAddrStack, + FeatureVFP3_D16, + FeatureFP16, + FeatureMP, + FeatureSlowFPBrcc, + FeatureHWDivARM, + FeatureHasSlowFPVMLx, + FeatureHasSlowFPVFMx, + FeatureAvoidPartialCPSR]>; + +def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r, + FeatureHasRetAddrStack, + FeatureVFP3_D16, + FeatureFP16, + FeatureMP, + FeatureSlowFPBrcc, + FeatureHWDivARM, + FeatureHasSlowFPVMLx, + FeatureHasSlowFPVFMx, + FeatureAvoidPartialCPSR]>; + +def : ProcessorModel<"cortex-m3", CortexM4Model, [ARMv7m, + ProcM3, + FeaturePrefLoopAlign32, + FeatureUseMISched, + FeatureHasNoBranchPredictor]>; + +def : ProcessorModel<"sc300", CortexM4Model, [ARMv7m, + ProcM3, + FeatureUseMISched, + FeatureHasNoBranchPredictor]>; + +def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em, + FeatureVFP4_D16_SP, + FeaturePrefLoopAlign32, + FeatureHasSlowFPVMLx, + FeatureHasSlowFPVFMx, + FeatureUseMISched, + FeatureHasNoBranchPredictor]>; + +def : ProcessorModel<"cortex-m7", CortexM7Model, [ARMv7em, + ProcM7, + FeatureFPARMv8_D16, + FeatureUseMIPipeliner, + FeatureUseMISched]>; + +def : ProcNoItin<"cortex-m23", [ARMv8mBaseline, + FeatureNoMovt, + FeatureHasNoBranchPredictor]>; + +def : ProcessorModel<"cortex-m33", CortexM4Model, [ARMv8mMainline, + FeatureDSP, + FeatureFPARMv8_D16_SP, + FeaturePrefLoopAlign32, + FeatureHasSlowFPVMLx, + FeatureHasSlowFPVFMx, + FeatureUseMISched, + FeatureHasNoBranchPredictor, + FeatureFixCMSE_CVE_2021_35465]>; + +def : ProcessorModel<"cortex-m35p", CortexM4Model, [ARMv8mMainline, + FeatureDSP, + FeatureFPARMv8_D16_SP, + FeaturePrefLoopAlign32, + FeatureHasSlowFPVMLx, + FeatureHasSlowFPVFMx, + FeatureUseMISched, + FeatureHasNoBranchPredictor, + FeatureFixCMSE_CVE_2021_35465]>; + +def : ProcessorModel<"cortex-m55", CortexM55Model, [ARMv81mMainline, + FeatureDSP, + FeatureFPARMv8_D16, + FeatureUseMISched, + FeatureHasNoBranchPredictor, + FeaturePrefLoopAlign32, + FeatureHasSlowFPVMLx, + HasMVEFloatOps, + FeatureFixCMSE_CVE_2021_35465]>; + +def : ProcessorModel<"cortex-m85", CortexM85Model, [ARMv81mMainline, + FeatureDSP, + FeatureFPARMv8_D16, + FeaturePACBTI, + FeatureUseMISched, + HasMVEFloatOps]>; + +def : ProcessorModel<"cortex-m52", CortexM55Model, [ARMv81mMainline, + FeatureDSP, + FeatureFPARMv8_D16, + FeatureHasNoBranchPredictor, + FeaturePACBTI, + FeatureUseMISched, + FeaturePrefLoopAlign32, + FeatureHasSlowFPVMLx, + FeatureMVEVectorCostFactor1, + HasMVEFloatOps]>; + +def : ProcNoItin<"cortex-a32", [ARMv8a, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFPAO]>; + +def : ProcNoItin<"cortex-a55", [ARMv82a, ProcA55, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureDotProd]>; + +def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFPAO, + FeatureAvoidPartialCPSR, + FeatureCheapPredicableCPSR, + FeatureFixCortexA57AES1742098]>; + +def : ProcessorModel<"cortex-a72", CortexA57Model, [ARMv8a, ProcA72, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFixCortexA57AES1742098]>; + +def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"cortex-a75", [ARMv82a, ProcA75, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureDotProd]>; + +def : ProcNoItin<"cortex-a76", [ARMv82a, ProcA76, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFullFP16, + FeatureDotProd]>; + +def : ProcNoItin<"cortex-a76ae", [ARMv82a, ProcA76, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFullFP16, + FeatureDotProd]>; + +def : ProcNoItin<"cortex-a77", [ARMv82a, ProcA77, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFullFP16, + FeatureDotProd]>; + +def : ProcNoItin<"cortex-a78", [ARMv82a, ProcA78, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFullFP16, + FeatureDotProd]>; + +def : ProcNoItin<"cortex-a78ae", [ARMv82a, ProcA78AE, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFullFP16, + FeatureDotProd]>; + +def : ProcNoItin<"cortex-a78c", [ARMv82a, ProcA78C, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureDotProd, + FeatureFullFP16]>; + +def : ProcNoItin<"cortex-a710", [ARMv9a, ProcA710, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureFP16FML, + FeatureBF16, + FeatureMatMulInt8, + FeatureSB]>; + +def : ProcNoItin<"cortex-x1", [ARMv82a, ProcX1, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFullFP16, + FeatureDotProd]>; + +def : ProcNoItin<"cortex-x1c", [ARMv82a, ProcX1C, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFullFP16, + FeatureDotProd]>; + +def : ProcNoItin<"neoverse-v1", [ARMv84a, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFullFP16, + FeatureBF16, + FeatureMatMulInt8]>; + +def : ProcNoItin<"neoverse-n1", [ARMv82a, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureDotProd]>; + +def : ProcNoItin<"neoverse-n2", [ARMv9a, + FeatureBF16, + FeatureFP16FML, + FeatureMatMulInt8]>; + +def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, + FeatureHasRetAddrStack, + FeatureNEONForFP, + FeatureVFP4, + FeatureMP, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureAvoidPartialCPSR, + FeatureAvoidMOVsShOp, + FeatureHasSlowFPVMLx, + FeatureHasSlowFPVFMx, + FeatureCrypto, + FeatureUseMISched, + FeatureZCZeroing, + FeatureNoPostRASched]>; + +def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynos]>; +def : ProcNoItin<"exynos-m4", [ARMv82a, ProcExynos, + FeatureFullFP16, + FeatureDotProd]>; +def : ProcNoItin<"exynos-m5", [ARMv82a, ProcExynos, + FeatureFullFP16, + FeatureDotProd]>; + +def : ProcNoItin<"kryo", [ARMv8a, ProcKryo, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52, + FeatureUseMISched, + FeatureFPAO]>;