From 74f7f9184a5e94fa3f8d234a8a39aabf996f168f Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 19 Jun 2024 08:41:01 -0700 Subject: [PATCH] JIT: fix permute node type in AVX simd sum (#103680) In `gtNewSimdSumNode` we need to type permute nodes as `simd16`, not the sum's type. If we split the tree at the permute we spill to the wrong typed temp. Also provide the ability to halt stress tree splitting after some number of splits. Fixes #102335. --- src/coreclr/jit/compiler.cpp | 14 +++++++++++++- src/coreclr/jit/gentree.cpp | 6 +++--- src/coreclr/jit/jitconfigvalues.h | 3 +++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 659f727e4f293..9b035278d66d5 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5701,12 +5701,20 @@ void Compiler::SplitTreesRandomly() rng.Init(info.compMethodHash() ^ 0x077cc4d4); // Splitting creates a lot of new locals. Set a limit on how many we end up creating here. - unsigned maxLvaCount = max(lvaCount * 2, 50000u); + unsigned maxLvaCount = max(lvaCount * 2, 50000u); + int numSplit = 0; + const int splitLimit = JitConfig.JitStressSplitTreeLimit(); for (BasicBlock* block : Blocks()) { for (Statement* stmt : block->NonPhiStatements()) { + if ((splitLimit >= 0) && (numSplit >= splitLimit)) + { + JITDUMP("Reached split limit (%d) -- stopping\n", splitLimit); + return; + } + int numTrees = 0; for (GenTree* tree : stmt->TreeList()) { @@ -5739,6 +5747,7 @@ void Compiler::SplitTreesRandomly() fgMorphStmtBlockOps(block, stmt); gtUpdateStmtSideEffects(stmt); + numSplit++; } break; @@ -5754,6 +5763,9 @@ void Compiler::SplitTreesRandomly() } } } + + JITDUMP("Split %d trees\n", numSplit); + #endif } diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index caaeb7116ee0a..f8757b26f356e 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -26319,13 +26319,13 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si { assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); // The permute below gives us [0, 1, 2, 3] -> [1, 0, 3, 2] - op1 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode((int)0b10110001, TYP_INT), NI_AVX_Permute, + op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, gtNewIconNode((int)0b10110001, TYP_INT), NI_AVX_Permute, simdBaseJitType, simdSize); // The add below now results in [0 + 1, 1 + 0, 2 + 3, 3 + 2] op1 = gtNewSimdBinOpNode(GT_ADD, TYP_SIMD16, op1, op1Shuffled, simdBaseJitType, simdSize); op1Shuffled = fgMakeMultiUse(&op1); // The permute below gives us [0 + 1, 1 + 0, 2 + 3, 3 + 2] -> [2 + 3, 3 + 2, 0 + 1, 1 + 0] - op1 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode((int)0b01001110, TYP_INT), NI_AVX_Permute, + op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, gtNewIconNode((int)0b01001110, TYP_INT), NI_AVX_Permute, simdBaseJitType, simdSize); } else @@ -26357,7 +26357,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si { assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); // The permute below gives us [0, 1] -> [1, 0] - op1 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode((int)0b0001, TYP_INT), NI_AVX_Permute, + op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, gtNewIconNode((int)0b0001, TYP_INT), NI_AVX_Permute, simdBaseJitType, simdSize); } else diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index dbd56791d88d8..97322e200a3fe 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -190,6 +190,9 @@ CONFIG_INTEGER(JitStressProcedureSplitting, W("JitStressProcedureSplitting"), 0) CONFIG_INTEGER(JitStressRegs, W("JitStressRegs"), 0) CONFIG_STRING(JitStressRegsRange, W("JitStressRegsRange")) // Only apply JitStressRegs to methods in this hash range +// If non-negative value N, only stress split the first N trees. +CONFIG_INTEGER(JitStressSplitTreeLimit, W("JitStressSplitTreeLimit"), -1) + // If non-zero, assert if # of VNF_MapSelect applications considered reaches this. CONFIG_INTEGER(JitVNMapSelLimit, W("JitVNMapSelLimit"), 0)