From 65b4ffd546e182071689f356f69b24201bac16e7 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 18 Jun 2024 19:35:54 -0700 Subject: [PATCH] JIT: fix permute node type in AVX simd sum In `gtNewSimdSumNode` we need to type permute nodes as `simd16`, not the sum's type. If we split the tree at the permute we spill to the wrong typed temp. Also provide the ability to halt stress tree splitting after some number of splits. Fixes #102335. --- src/coreclr/jit/compiler.cpp | 14 +++++++++++++- src/coreclr/jit/gentree.cpp | 6 +++--- src/coreclr/jit/jitconfigvalues.h | 3 +++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 659f727e4f293..9b035278d66d5 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5701,12 +5701,20 @@ void Compiler::SplitTreesRandomly() rng.Init(info.compMethodHash() ^ 0x077cc4d4); // Splitting creates a lot of new locals. Set a limit on how many we end up creating here. - unsigned maxLvaCount = max(lvaCount * 2, 50000u); + unsigned maxLvaCount = max(lvaCount * 2, 50000u); + int numSplit = 0; + const int splitLimit = JitConfig.JitStressSplitTreeLimit(); for (BasicBlock* block : Blocks()) { for (Statement* stmt : block->NonPhiStatements()) { + if ((splitLimit >= 0) && (numSplit >= splitLimit)) + { + JITDUMP("Reached split limit (%d) -- stopping\n", splitLimit); + return; + } + int numTrees = 0; for (GenTree* tree : stmt->TreeList()) { @@ -5739,6 +5747,7 @@ void Compiler::SplitTreesRandomly() fgMorphStmtBlockOps(block, stmt); gtUpdateStmtSideEffects(stmt); + numSplit++; } break; @@ -5754,6 +5763,9 @@ void Compiler::SplitTreesRandomly() } } } + + JITDUMP("Split %d trees\n", numSplit); + #endif } diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 335eb6e60d8ef..cf8e72996242a 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -26307,13 +26307,13 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si { assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); // The permute below gives us [0, 1, 2, 3] -> [1, 0, 3, 2] - op1 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode((int)0b10110001, TYP_INT), NI_AVX_Permute, + op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, gtNewIconNode((int)0b10110001, TYP_INT), NI_AVX_Permute, simdBaseJitType, simdSize); // The add below now results in [0 + 1, 1 + 0, 2 + 3, 3 + 2] op1 = gtNewSimdBinOpNode(GT_ADD, TYP_SIMD16, op1, op1Shuffled, simdBaseJitType, simdSize); op1Shuffled = fgMakeMultiUse(&op1); // The permute below gives us [0 + 1, 1 + 0, 2 + 3, 3 + 2] -> [2 + 3, 3 + 2, 0 + 1, 1 + 0] - op1 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode((int)0b01001110, TYP_INT), NI_AVX_Permute, + op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, gtNewIconNode((int)0b01001110, TYP_INT), NI_AVX_Permute, simdBaseJitType, simdSize); } else @@ -26345,7 +26345,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si { assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); // The permute below gives us [0, 1] -> [1, 0] - op1 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode((int)0b0001, TYP_INT), NI_AVX_Permute, + op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, gtNewIconNode((int)0b0001, TYP_INT), NI_AVX_Permute, simdBaseJitType, simdSize); } else diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index dbd56791d88d8..97322e200a3fe 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -190,6 +190,9 @@ CONFIG_INTEGER(JitStressProcedureSplitting, W("JitStressProcedureSplitting"), 0) CONFIG_INTEGER(JitStressRegs, W("JitStressRegs"), 0) CONFIG_STRING(JitStressRegsRange, W("JitStressRegsRange")) // Only apply JitStressRegs to methods in this hash range +// If non-negative value N, only stress split the first N trees. +CONFIG_INTEGER(JitStressSplitTreeLimit, W("JitStressSplitTreeLimit"), -1) + // If non-zero, assert if # of VNF_MapSelect applications considered reaches this. CONFIG_INTEGER(JitVNMapSelLimit, W("JitVNMapSelLimit"), 0)