Skip to content

Commit

Permalink
ARM64 intrinsic support for Vector64.Create() and Vector128.Create() (#…
Browse files Browse the repository at this point in the history
…35590)

* Make Vector64.Create() that takes multiple arguments use ARM64 intrinsic
* Make Vector128.Create() that takes multiple arguments use ARM64 intrinsic
* Intrinsify Vector64.Create() that takes single argument
* Intrinsify Vector64.Create() that takes single argument
* Fix edge case where int.MaxValue was failing if used as immediate
  • Loading branch information
kunalspathak authored May 5, 2020
1 parent 96268f3 commit d23f1a2
Show file tree
Hide file tree
Showing 11 changed files with 335 additions and 12 deletions.
5 changes: 4 additions & 1 deletion src/coreclr/src/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6984,7 +6984,6 @@ void CodeGen::genArm64EmitterUnitTests()
genDefineTempLabel(genCreateTempLabel());

theEmitter->emitIns_R(INS_br, EA_PTRSIZE, REG_R8);
theEmitter->emitIns_R(INS_blr, EA_PTRSIZE, REG_R9);
theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_R8);
theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_LR);

Expand Down Expand Up @@ -7330,6 +7329,10 @@ void CodeGen::genArm64EmitterUnitTests()
theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V30, 0xFF000000FF000000);
theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V31, 0x0, INS_OPTS_2D);

// We were not encoding immediate of movi that was int.MaxValue or int.MaxValue / 2.
theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V16, 0x7fffffff, INS_OPTS_2S);
theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V16, 0x3fffffff, INS_OPTS_2S);

theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8
theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
Expand Down
7 changes: 3 additions & 4 deletions src/coreclr/src/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2864,12 +2864,11 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt)
* 'size' specifies the size of the result (16 or 32 bits)
*/

/*static*/ INT32 emitter::emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size)
/*static*/ UINT32 emitter::emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size)
{
bool onesShift = (bsImm.immOnes == 1);
unsigned bySh = bsImm.immBY; // Num Bytes to shift 0,1,2,3
INT32 val = (INT32)bsImm.immVal; // 8-bit immediate
INT32 result = val;
unsigned bySh = bsImm.immBY; // Num Bytes to shift 0,1,2,3
UINT32 result = (UINT32)bsImm.immVal; // 8-bit immediate

if (bySh > 0)
{
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/src/jit/emitarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ union byteShiftedImm {

static emitter::byteShiftedImm emitEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL);

static INT32 emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size);
static UINT32 emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size);

/************************************************************************
*
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/src/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,8 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, va
{
case TYP_FLOAT:
return m_simdHandleCache->Vector64FloatHandle;
case TYP_DOUBLE:
return m_simdHandleCache->Vector64DoubleHandle;
case TYP_INT:
return m_simdHandleCache->Vector64IntHandle;
case TYP_USHORT:
Expand All @@ -212,6 +214,10 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, va
return m_simdHandleCache->Vector64ByteHandle;
case TYP_UINT:
return m_simdHandleCache->Vector64UIntHandle;
case TYP_LONG:
return m_simdHandleCache->Vector64LongHandle;
case TYP_ULONG:
return m_simdHandleCache->Vector64ULongHandle;
default:
assert(!"Didn't find a class handle for simdType");
}
Expand Down
32 changes: 27 additions & 5 deletions src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -516,11 +516,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
}
else if (varTypeIsFloating(intrin.baseType))
{
if (targetReg != op1Reg)
{
// fmov reg1, reg2
GetEmitter()->emitIns_R_R(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, INS_OPTS_NONE);
}
// fmov reg1, reg2
GetEmitter()->emitIns_R_R(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, INS_OPTS_NONE);
}
else
{
Expand Down Expand Up @@ -557,6 +554,31 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
GetEmitter()->emitIns_R_I(ins, emitSize, targetReg, 0, INS_OPTS_4S);
break;

case NI_Vector64_Create:
case NI_Vector128_Create:
if (intrin.op1->isContainedFltOrDblImmed())
{
const double dataValue = intrin.op1->AsDblCon()->gtDconVal;
GetEmitter()->emitIns_R_F(INS_fmov, emitSize, targetReg, dataValue, opt);
}
else if (varTypeIsFloating(intrin.baseType))
{
GetEmitter()->emitIns_R_R_I(ins, emitSize, targetReg, op1Reg, 0, opt);
}
else
{
if (intrin.op1->isContainedIntOrIImmed())
{
const ssize_t dataValue = intrin.op1->AsIntCon()->gtIconVal;
GetEmitter()->emitIns_R_I(INS_movi, emitSize, targetReg, dataValue, opt);
}
else
{
GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt);
}
}
break;

default:
unreached();
}
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/src/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ HARDWARE_INTRINSIC(Vector64, AsSByte,
HARDWARE_INTRINSIC(Vector64, AsSingle, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, AsUInt16, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, AsUInt32, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, Create, 8, 1, {INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_mov, INS_mov, INS_dup, INS_dup}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector64, CreateScalarUnsafe, 8, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_invalid, INS_invalid, INS_fmov, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector64, get_AllBitsSet, 8, 0, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector64, get_Count, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
Expand All @@ -44,6 +45,7 @@ HARDWARE_INTRINSIC(Vector128, AsSingle, 1
HARDWARE_INTRINSIC(Vector128, AsUInt16, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, AsUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, AsUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, Create, 16, 1, {INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_fmov, INS_fmov}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector128, get_Count, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/src/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -923,6 +923,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
}
}
break;

case NI_Vector64_Create:
case NI_Vector128_Create:
case NI_Vector64_CreateScalarUnsafe:
case NI_Vector128_CreateScalarUnsafe:
if (intrin.op1->IsCnsIntOrI())
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
// Metadata version: v4.0.30319
.assembly extern System.Runtime
{
.publickeytoken = (B0 3F 5F 7F 11 D5 0A 3A ) // .?_....:
.ver 5:0:0:0
}
.assembly extern System.Runtime.Intrinsics
{
.publickeytoken = (CC 7B 13 FF CD 2D DD 51 ) // .{...-.Q
.ver 5:0:0:0
}
.assembly projs { }
.module projs.dll
// MVID: {379016DB-73C2-41D4-9E5F-5B727BC70E2C}
.custom instance void [System.Runtime]System.Security.UnverifiableCodeAttribute::.ctor() = ( 01 00 00 00 )
.imagebase 0x00400000
.file alignment 0x00000200
.stackreserve 0x00100000
.subsystem 0x0003 // WINDOWS_CUI
.corflags 0x00000001 // ILONLY
// Image base: 0x00000293F3DD0000


// =============== CLASS MEMBERS DECLARATION ===================
// This bug was found when passing Vector64<long> to a method such that
// the vector is on the evaluation stack. C# sometimes assign it the vector64
// to local variable before passing it to method. In such cases, the bug
// doesn't repro.
.class public auto ansi sealed beforefieldinit projs.GitHub_35821
extends [System.Runtime]System.Object
{
.method private hidebysig static int32
Main(string[] args) cil managed
{
.entrypoint
// Code size 48 (0x30)
.maxstack 8
IL_0000: ldc.i4.s 23
IL_0002: conv.i8
IL_0003: call valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<uint64> [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64::Create(uint64)
IL_0008: call void projs.GitHub_35821::Test1(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<uint64>)
IL_000d: ldc.i4.s 23
IL_000f: conv.i8
IL_0010: call valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<int64> [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64::Create(int64)
IL_0015: call void projs.GitHub_35821::Test2(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<int64>)
IL_001a: ldc.r8 23.
IL_0023: call valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<float64> [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64::Create(float64)
IL_0028: call void projs.GitHub_35821::Test3(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<float64>)
IL_002d: ldc.i4.s 100
IL_002f: ret
} // end of method GitHub_35821::Main

.method public hidebysig static void Test1(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<uint64> data) cil managed noinlining
{
// Code size 1 (0x1)
.maxstack 8
IL_0000: ret
} // end of method GitHub_35821::Test1

.method public hidebysig static void Test2(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<int64> data) cil managed noinlining
{
// Code size 1 (0x1)
.maxstack 8
IL_0000: ret
} // end of method GitHub_35821::Test2

.method public hidebysig static void Test3(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<float64> data) cil managed noinlining
{
// Code size 1 (0x1)
.maxstack 8
IL_0000: ret
} // end of method GitHub_35821::Test3

.method public hidebysig specialname rtspecialname
instance void .ctor() cil managed
{
// Code size 7 (0x7)
.maxstack 8
IL_0000: ldarg.0
IL_0001: call instance void [System.Runtime]System.Object::.ctor()
IL_0006: ret
} // end of method GitHub_35821::.ctor

} // end of class projs.GitHub_35821


// =============================================================

// *********** DISASSEMBLY COMPLETE ***********************
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<Project Sdk="Microsoft.NET.Sdk.IL">
<PropertyGroup>
<OutputType>Exe</OutputType>
</PropertyGroup>
<PropertyGroup>
<DebugType>None</DebugType>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).il" />
</ItemGroup>
</Project>
Loading

0 comments on commit d23f1a2

Please sign in to comment.