Skip to content

Commit

Permalink
just allow CPP/CUDA/Metal to legalize their Matrix inputs.
Browse files Browse the repository at this point in the history
Memory packing still fails because cpp/cuda/metal do not manage it.
  • Loading branch information
ArielG-NV committed Jul 16, 2024
1 parent 59343c1 commit 47995ab
Show file tree
Hide file tree
Showing 7 changed files with 142 additions and 32 deletions.
6 changes: 6 additions & 0 deletions prelude/slang-cuda-prelude.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,12 @@ union Union64
double d;
};

template<typename T>
SLANG_FORCE_INLINE SLANG_CUDA_CALL float make_float(T val)
{
return (float)val;
}

SLANG_FORCE_INLINE SLANG_CUDA_CALL float _slang_fmod(float x, float y)
{
return ::fmodf(x, y);
Expand Down
2 changes: 2 additions & 0 deletions source/slang/slang-compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -1755,6 +1755,8 @@ namespace Slang
/// Are we generating code for a CUDA API (CUDA / OptiX)?
bool isCUDATarget(TargetRequest* targetReq);

// Are we generating code for a CPU target
bool isCPUTarget(TargetRequest* targetReq);

/// A request to generate output in some target format.
class TargetRequest : public RefObject
Expand Down
10 changes: 1 addition & 9 deletions source/slang/slang-emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1257,15 +1257,7 @@ Result linkAndOptimizeIR(
if (requiredLoweringPassSet.meshOutput)
legalizeMeshOutputTypes(irModule);

if (options.shouldLegalizeExistentialAndResourceTypes)
{
if (!isMetalTarget(targetRequest))
{
// We need to lower any types used in a buffer resource (e.g. ContantBuffer or StructuredBuffer) into
// a simple storage type that has target independent layout based on the kind of buffer resource.
lowerBufferElementTypeToStorageType(targetProgram, irModule);
}
}
lowerBufferElementTypeToStorageType(targetProgram, irModule);

// Rewrite functions that return arrays to return them via `out` parameter,
// since our target languages doesn't allow returning arrays.
Expand Down
4 changes: 3 additions & 1 deletion source/slang/slang-ir-lower-buffer-element-type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -877,7 +877,9 @@ namespace Slang
void lowerBufferElementTypeToStorageType(TargetProgram* target, IRModule* module, bool lowerBufferPointer)
{
SlangMatrixLayoutMode defaultMatrixMode = (SlangMatrixLayoutMode)target->getOptionSet().getMatrixLayoutMode();
if (defaultMatrixMode == SLANG_MATRIX_LAYOUT_MODE_UNKNOWN)
if ((isCPUTarget(target->getTargetReq()) || isCUDATarget(target->getTargetReq()) || isMetalTarget(target->getTargetReq())))
defaultMatrixMode = SLANG_MATRIX_LAYOUT_ROW_MAJOR;
else if (defaultMatrixMode == SLANG_MATRIX_LAYOUT_MODE_UNKNOWN)
defaultMatrixMode = SLANG_MATRIX_LAYOUT_ROW_MAJOR;
LoweredElementTypeContext context(target, lowerBufferPointer, defaultMatrixMode);
context.processModule(module);
Expand Down
60 changes: 43 additions & 17 deletions tests/compute/column-major.slang
Original file line number Diff line number Diff line change
@@ -1,33 +1,59 @@
// column-major.slang

// Unfortunately CPU and CUDA only work with row layout, so they have to be disabled here.

//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -output-using-type -compile-arg -O3 -shaderobj
//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -output-using-type -shaderobj -Xslang -matrix-layout-column-major
//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -output-using-type -dx12 -shaderobj -Xslang -matrix-layout-column-major
//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -output-using-type -shaderobj -Xslang -matrix-layout-column-major
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -output-using-type -mtl -shaderobj -Xslang -matrix-layout-column-major
//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-cpu -compute -compile-arg -O3 -shaderobj -Xslang -matrix-layout-column-major
//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-slang -compute -shaderobj -Xslang -matrix-layout-column-major
//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-slang -compute -dx12 -shaderobj -Xslang -matrix-layout-column-major
//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-vk -compute -shaderobj -Xslang -matrix-layout-column-major
//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-vk -compute -emit-spirv-via-glsl -Xslang -matrix-layout-column-major
//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-cuda -compute -shaderobj -Xslang -matrix-layout-column-major
//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-mtl -compute -shaderobj -Xslang -matrix-layout-column-major

// This data is in column major layout order....
//TEST_INPUT:cbuffer(data=[1.0 0.0 0.0 10.0 0.0 1.0 0.0 20.0 0.0 0.0 1.0 30.0 0.0 0.0 0.0 1.0]):name matrixBuffer

ConstantBuffer<float4x4> matrixBuffer;

//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name output
RWStructuredBuffer<float> output;
//TEST_INPUT:ubuffer(data=[0], stride=4):out,name output
RWStructuredBuffer<uint> output;

bool floatCheck(float data, float valueToCheckFor)
{
return data < (valueToCheckFor + 0.001) && data > valueToCheckFor - 0.001;
}

[numthreads(1, 1, 1)]
void computeMain(uint3 tid : SV_DispatchThreadID)
{
float4 v = float4(1, 2, 3, 1);

float4x4 M = matrixBuffer;
float4x4 M1 = matrixBuffer;

float4 r = mul(v, M);

output[0] = r.x;
output[1] = r.y;
output[2] = r.z;
output[3] = r.w;
float4 r = mul(v, M1);

float4x4 M2 = mul(M1, M1);

float4x4 M3 = float4x4(
1.0, 0.0, 0.0, 10.0,
0.0, 1.0, 0.0, 20.0,
0.0, 0.0, 1.0, 30.0,
0.0, 0.0, 0.0, 1.0
);

output[0] = uint(true
&& floatCheck(r.x, 11)
&& floatCheck(r.y, 22)
&& floatCheck(r.z, 33)
&& floatCheck(r.w, 1)

&& floatCheck(M1[3][0], 10)

&& floatCheck(M2[3][0], 20)
&& floatCheck(M2._41, 20)
&& floatCheck(M2._41_32[0], 20)
&& floatCheck(M2._33_42[0], 1)
&& floatCheck(M2._42_33[0], 40)

&& floatCheck(M3[0][3], 10)
);
//BUF: 1
}
5 changes: 0 additions & 5 deletions tests/compute/column-major.slang.expected.txt

This file was deleted.

87 changes: 87 additions & 0 deletions tests/compute/memory-packing.slang
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// column-major-with-row-major-operations.slang

// Metal/CPP/CUDA do not correctly deal with packing currently.
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-cpu -compute
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-cuda -compute
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-mtl -compute

//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-slang -compute -dx12
//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-vk -compute
//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-vk -compute -emit-spirv-via-glsl

//TEST_INPUT:cbuffer(data=[1.0 2.0 3.0 0.0 4.0 5.0 6.0 0.0 7.0 8.0 9.0 0]):name matrixTest
ConstantBuffer<row_major float3x3> matrixTestCBuf1;

//TEST_INPUT:cbuffer(data=[1.0 4.0 7.0 0.0 2.0 5.0 8.0 0.0 3.0 6.0 9.0 0.0]):name colMatrixBuffer
ConstantBuffer<column_major float3x3> matrixTestCBuf2;

// struct float3x3{float3[3]
// {
// float3 data1;
// float pad1;
// float3 data2;
// float pad2;
// float3 data3;
// float pad3;
// }

struct NeedsPadding
{
float2 data1;
// float2 pad1;
float2 data2;
// float2 pad2;
};
//TEST_INPUT:cbuffer(data=[1.0 2.0 100 100 3.0 4.0 100 100]):name structTest
ConstantBuffer<NeedsPadding> structTestCBuf1;

//TEST_INPUT:ubuffer(data=[0], stride=4):out,name output
RWStructuredBuffer<uint> output;

bool floatCheck(float data, float valueToCheckFor)
{
return data < (valueToCheckFor + 0.001) && data > valueToCheckFor - 0.001;
}

[numthreads(1, 1, 1)]
void computeMain(uint3 tid : SV_DispatchThreadID)
{
float3x3 matrixTest1;
matrixTest1 = matrixTestCBuf1;

float3x3 matrixTest2;
matrixTest2 = matrixTestCBuf2;

NeedsPadding structTest1;

// Note: default is column major
output[0] = bool(true
&& floatCheck(matrixTest1[0][0], 1)
&& floatCheck(matrixTest1[0][1], 2)
&& floatCheck(matrixTest1[0][2], 3)
&& floatCheck(matrixTest1[1][0], 4)
&& floatCheck(matrixTest1[1][1], 5)
&& floatCheck(matrixTest1[1][2], 6)
&& floatCheck(matrixTest1[2][0], 7)
&& floatCheck(matrixTest1[2][1], 8)
&& floatCheck(matrixTest1[2][2], 9)


&& floatCheck(matrixTest2[0][0], 1)
&& floatCheck(matrixTest2[0][1], 2)
&& floatCheck(matrixTest2[0][2], 3)
&& floatCheck(matrixTest2[1][0], 4)
&& floatCheck(matrixTest2[1][1], 5)
&& floatCheck(matrixTest2[1][2], 6)
&& floatCheck(matrixTest2[2][0], 7)
&& floatCheck(matrixTest2[2][1], 8)
&& floatCheck(matrixTest2[2][2], 9)


&& floatCheck(structTest1.data1[0], 1)
&& floatCheck(structTest1.data1[1], 2)
&& floatCheck(structTest1.data2[0], 3)
&& floatCheck(structTest1.data2[1], 4)
) ? 1 : 0;
//BUF: 1
}

0 comments on commit 47995ab

Please sign in to comment.