Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve loop cloning, with debugging improvements #55299

Merged
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/coreclr/jit/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "jithashtable.h"

/*****************************************************************************/
typedef BitVec EXPSET_TP;
typedef BitVec EXPSET_TP;
typedef BitVec_ValArg_T EXPSET_VALARG_TP;
typedef BitVec_ValRet_T EXPSET_VALRET_TP;

#if LARGE_EXPSET
#define EXPSET_SZ 64
#else
Expand Down
51 changes: 51 additions & 0 deletions src/coreclr/jit/clrjit.natvis
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ Licensed to the .NET Foundation under one or more agreements.
The .NET Foundation licenses this file to you under the MIT license.
-->

<!--
Visual Studio debugger visualizers for RyuJIT.

Documentation for VS natvis format: https://docs.microsoft.com/en-us/visualstudio/debugger/create-custom-views-of-native-objects?view=vs-2019

Documentation for VS debugger format specifiers: https://docs.microsoft.com/en-us/visualstudio/debugger/format-specifiers-in-cpp?view=vs-2019
-->

<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">

Expand Down Expand Up @@ -183,4 +190,48 @@ The .NET Foundation licenses this file to you under the MIT license.
</Expand>
</Type>

<Type Name="jitstd::vector&lt;*&gt;">
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Glad to see that this file is getting longer :)

<DisplayString Condition="m_nSize > 0">size={m_nSize,d} capacity={m_nCapacity,d}</DisplayString>
<DisplayString Condition="m_nSize == 0">Empty</DisplayString>
<Expand>
<ArrayItems>
<Size>m_nSize</Size>
<ValuePointer>m_pArray</ValuePointer>
</ArrayItems>
</Expand>
</Type>

<Type Name="JitExpandArray&lt;*&gt;">
<DisplayString Condition="m_size > 0">size={m_size,d}</DisplayString>
<DisplayString Condition="m_size == 0">Empty</DisplayString>
<Expand>
<ArrayItems>
<Size>m_size</Size>
<ValuePointer>m_members</ValuePointer>
</ArrayItems>
</Expand>
</Type>

<Type Name="JitExpandArrayStack&lt;*&gt;">
<DisplayString Condition="m_size > 0">size={m_size,d} used={m_used,d}</DisplayString>
<DisplayString Condition="m_size == 0">Empty</DisplayString>
<Expand>
<ArrayItems>
<Size>m_used</Size>
<ValuePointer>m_members</ValuePointer>
</ArrayItems>
</Expand>
</Type>

<!-- Loop cloning -->

<!-- LcOptInfo is really one of its derived types, so figure out which one. Set Inheritable=false to prevent recursion. -->
<Type Name="LcOptInfo" Inheritable="false">
<DisplayString>{optType,en}</DisplayString>
<Expand>
<ExpandedItem Condition="optType == LcOptInfo::OptType::LcJaggedArray">(LcJaggedArrayOptInfo*)this,nd</ExpandedItem>
<ExpandedItem Condition="optType == LcOptInfo::OptType::LcMdArray">(LcMdArrayOptInfo*)this,nd</ExpandedItem>
</Expand>
</Type>

</AutoVisualizer>
5 changes: 1 addition & 4 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ class CodeGen final : public CodeGenInterface
unsigned genCurDispOffset;

static const char* genInsName(instruction ins);
const char* genInsDisplayName(emitter::instrDesc* id);
#endif // DEBUG

//-------------------------------------------------------------------------
Expand Down Expand Up @@ -1503,10 +1504,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

void instGen_Store_Reg_Into_Lcl(var_types dstType, regNumber srcReg, int varNum, int offs);

#ifdef DEBUG
void __cdecl instDisp(instruction ins, bool noNL, const char* fmt, ...);
#endif

#ifdef TARGET_XARCH
instruction genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue);
#endif // TARGET_XARCH
Expand Down
7 changes: 3 additions & 4 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1070,7 +1070,7 @@ void CodeGen::genDefineTempLabel(BasicBlock* label)
{
genLogLabel(label);
label->bbEmitCookie = GetEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
gcInfo.gcRegByrefSetCur, false DEBUG_ARG(label->bbNum));
gcInfo.gcRegByrefSetCur, false DEBUG_ARG(label));
}

// genDefineInlineTempLabel: Define an inline label that does not affect the GC
Expand Down Expand Up @@ -2064,9 +2064,8 @@ void CodeGen::genInsertNopForUnwinder(BasicBlock* block)
// block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
// would be executed, which we would prefer not to do.

block->bbUnwindNopEmitCookie =
GetEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
false DEBUG_ARG(block->bbNum));
block->bbUnwindNopEmitCookie = GetEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
gcInfo.gcRegByrefSetCur, false DEBUG_ARG(block));

instGen(INS_nop);
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ void CodeGen::genCodeForBBlist()
// Mark a label and update the current set of live GC refs

block->bbEmitCookie = GetEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
gcInfo.gcRegByrefSetCur, false DEBUG_ARG(block->bbNum));
gcInfo.gcRegByrefSetCur, false DEBUG_ARG(block));
}

if (block == compiler->fgFirstColdBlock)
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2988,6 +2988,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
opts.disAsmSpilled = false;
opts.disDiffable = false;
opts.disAddr = false;
opts.disAlignment = false;
opts.dspCode = false;
opts.dspEHTable = false;
opts.dspDebugInfo = false;
Expand Down Expand Up @@ -3136,6 +3137,11 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
opts.disAddr = true;
}

if (JitConfig.JitDasmWithAlignmentBoundaries() != 0)
{
opts.disAlignment = true;
}

if (JitConfig.JitLongAddress() != 0)
{
opts.compLongAddress = true;
Expand Down
65 changes: 50 additions & 15 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6209,9 +6209,9 @@ class Compiler
public:
void optInit();

GenTree* Compiler::optRemoveRangeCheck(GenTreeBoundsChk* check, GenTree* comma, Statement* stmt);
GenTree* Compiler::optRemoveStandaloneRangeCheck(GenTreeBoundsChk* check, Statement* stmt);
void Compiler::optRemoveCommaBasedRangeCheck(GenTree* comma, Statement* stmt);
GenTree* optRemoveRangeCheck(GenTreeBoundsChk* check, GenTree* comma, Statement* stmt);
GenTree* optRemoveStandaloneRangeCheck(GenTreeBoundsChk* check, Statement* stmt);
void optRemoveCommaBasedRangeCheck(GenTree* comma, Statement* stmt);
bool optIsRangeCheckRemovable(GenTree* tree);

protected:
Expand Down Expand Up @@ -6700,7 +6700,7 @@ class Compiler
// BitVec trait information for computing CSE availability using the CSE_DataFlow algorithm.
// Two bits are allocated per CSE candidate to compute CSE availability
// plus an extra bit to handle the initial unvisited case.
// (See CSE_DataFlow::EndMerge for an explaination of why this is necessary)
// (See CSE_DataFlow::EndMerge for an explanation of why this is necessary.)
//
// The two bits per CSE candidate have the following meanings:
// 11 - The CSE is available, and is also available when considering calls as killing availability.
Expand All @@ -6710,6 +6710,37 @@ class Compiler
//
BitVecTraits* cseLivenessTraits;

//-----------------------------------------------------------------------------------------------------------------
// getCSEnum2bit: Return the normalized index to use in the EXPSET_TP for the CSE with the given CSE index.
// Each GenTree has a `gtCSEnum` field. Zero is reserved to mean this node is not a CSE, positive values indicate
// CSE uses, and negative values indicate CSE defs. The caller must pass a non-zero positive value, as from
// GET_CSE_INDEX().
//
static unsigned genCSEnum2bit(unsigned CSEnum)
{
assert((CSEnum > 0) && (CSEnum <= MAX_CSE_CNT));
return CSEnum - 1;
}

//-----------------------------------------------------------------------------------------------------------------
// getCSEAvailBit: Return the bit used by CSE dataflow sets (bbCseGen, etc.) for the availability bit for a CSE.
//
static unsigned getCSEAvailBit(unsigned CSEnum)
{
return genCSEnum2bit(CSEnum) * 2;
}

//-----------------------------------------------------------------------------------------------------------------
// getCSEAvailCrossCallBit: Return the bit used by CSE dataflow sets (bbCseGen, etc.) for the availability bit
// for a CSE considering calls as killing availability bit (see description above).
//
static unsigned getCSEAvailCrossCallBit(unsigned CSEnum)
{
return getCSEAvailBit(CSEnum) + 1;
}

void optPrintCSEDataFlowSet(EXPSET_VALARG_TP cseDataFlowSet, bool includeBits = true);

EXPSET_TP cseCallKillsMask; // Computed once - A mask that is used to kill available CSEs at callsites

/* Generic list of nodes - used by the CSE logic */
Expand Down Expand Up @@ -6844,26 +6875,28 @@ class Compiler
return (enckey & ~TARGET_SIGN_BIT) << CSE_CONST_SHARED_LOW_BITS;
}

/**************************************************************************
* Value Number based CSEs
*************************************************************************/
/**************************************************************************
* Value Number based CSEs
*************************************************************************/

// String to use for formatting CSE numbers. Note that this is the positive number, e.g., from GET_CSE_INDEX().
#define FMT_CSE "CSE #%02u"

public:
void optOptimizeValnumCSEs();

protected:
void optValnumCSE_Init();
unsigned optValnumCSE_Index(GenTree* tree, Statement* stmt);
unsigned optValnumCSE_Locate();
void optValnumCSE_InitDataFlow();
void optValnumCSE_DataFlow();
void optValnumCSE_Availablity();
void optValnumCSE_Heuristic();
bool optValnumCSE_Locate();
void optValnumCSE_InitDataFlow();
void optValnumCSE_DataFlow();
void optValnumCSE_Availablity();
void optValnumCSE_Heuristic();

bool optDoCSE; // True when we have found a duplicate CSE tree
bool optValnumCSE_phase; // True when we are executing the optValnumCSE_phase
unsigned optCSECandidateTotal; // Grand total of CSE candidates for both Lexical and ValNum
unsigned optCSECandidateCount; // Count of CSE's candidates, reset for Lexical and ValNum CSE's
bool optValnumCSE_phase; // True when we are executing the optOptimizeValnumCSEs() phase
unsigned optCSECandidateCount; // Count of CSE's candidates
unsigned optCSEstart; // The first local variable number that is a CSE
unsigned optCSEcount; // The total count of CSE's introduced.
BasicBlock::weight_t optCSEweight; // The weight of the current block when we are doing PerformCSE
Expand All @@ -6888,6 +6921,7 @@ class Compiler
bool optConfigDisableCSE();
bool optConfigDisableCSE2();
#endif

void optOptimizeCSEs();

struct isVarAssgDsc
Expand Down Expand Up @@ -9304,6 +9338,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
bool disasmWithGC; // Display GC info interleaved with disassembly.
bool disDiffable; // Makes the Disassembly code 'diff-able'
bool disAddr; // Display process address next to each instruction in disassembly code
bool disAlignment; // Display alignment boundaries in disassembly code
bool disAsm2; // Display native code after it is generated using external disassembler
bool dspOrder; // Display names of each of the methods that we ngen/jit
bool dspUnwind; // Display the unwind info output
Expand Down
19 changes: 0 additions & 19 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -763,25 +763,6 @@ inline double getR8LittleEndian(const BYTE* ptr)
return *(double*)&val;
}

/*****************************************************************************
*
* Return the normalized index to use in the EXPSET_TP for the CSE with
* the given CSE index.
* Each GenTree has the following field:
* signed char gtCSEnum; // 0 or the CSE index (negated if def)
* So zero is reserved to mean this node is not a CSE
* and postive values indicate CSE uses and negative values indicate CSE defs.
* The caller of this method must pass a non-zero postive value.
* This precondition is checked by the assert on the first line of this method.
*/

inline unsigned int genCSEnum2bit(unsigned index)
{
assert((index > 0) && (index <= EXPSET_SZ));

return (index - 1);
}

#ifdef DEBUG
const char* genES2str(BitVecTraits* traits, EXPSET_TP set);
const char* refCntWtd2str(BasicBlock::weight_t refCntWtd);
Expand Down
Loading