diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index 74a9d0cec9..89f33e24b3 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -207,6 +207,12 @@ union Union64 double d; }; +template +SLANG_FORCE_INLINE SLANG_CUDA_CALL float make_float(T val) +{ + return (float)val; +} + SLANG_FORCE_INLINE SLANG_CUDA_CALL float _slang_fmod(float x, float y) { return ::fmodf(x, y); diff --git a/source/slang/slang-compiler.h b/source/slang/slang-compiler.h index 46724119bd..7001dd0d19 100755 --- a/source/slang/slang-compiler.h +++ b/source/slang/slang-compiler.h @@ -1755,6 +1755,8 @@ namespace Slang /// Are we generating code for a CUDA API (CUDA / OptiX)? bool isCUDATarget(TargetRequest* targetReq); + // Are we generating code for a CPU target + bool isCPUTarget(TargetRequest* targetReq); /// A request to generate output in some target format. class TargetRequest : public RefObject diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index d8f0686d52..6762a546a8 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -1257,15 +1257,7 @@ Result linkAndOptimizeIR( if (requiredLoweringPassSet.meshOutput) legalizeMeshOutputTypes(irModule); - if (options.shouldLegalizeExistentialAndResourceTypes) - { - if (!isMetalTarget(targetRequest)) - { - // We need to lower any types used in a buffer resource (e.g. ContantBuffer or StructuredBuffer) into - // a simple storage type that has target independent layout based on the kind of buffer resource. - lowerBufferElementTypeToStorageType(targetProgram, irModule); - } - } + lowerBufferElementTypeToStorageType(targetProgram, irModule); // Rewrite functions that return arrays to return them via `out` parameter, // since our target languages doesn't allow returning arrays. diff --git a/source/slang/slang-ir-lower-buffer-element-type.cpp b/source/slang/slang-ir-lower-buffer-element-type.cpp index 981e29697d..d042aae435 100644 --- a/source/slang/slang-ir-lower-buffer-element-type.cpp +++ b/source/slang/slang-ir-lower-buffer-element-type.cpp @@ -877,7 +877,9 @@ namespace Slang void lowerBufferElementTypeToStorageType(TargetProgram* target, IRModule* module, bool lowerBufferPointer) { SlangMatrixLayoutMode defaultMatrixMode = (SlangMatrixLayoutMode)target->getOptionSet().getMatrixLayoutMode(); - if (defaultMatrixMode == SLANG_MATRIX_LAYOUT_MODE_UNKNOWN) + if ((isCPUTarget(target->getTargetReq()) || isCUDATarget(target->getTargetReq()) || isMetalTarget(target->getTargetReq()))) + defaultMatrixMode = SLANG_MATRIX_LAYOUT_ROW_MAJOR; + else if (defaultMatrixMode == SLANG_MATRIX_LAYOUT_MODE_UNKNOWN) defaultMatrixMode = SLANG_MATRIX_LAYOUT_ROW_MAJOR; LoweredElementTypeContext context(target, lowerBufferPointer, defaultMatrixMode); context.processModule(module); diff --git a/tests/compute/column-major.slang b/tests/compute/column-major.slang index 19d8632604..1cd08434b8 100644 --- a/tests/compute/column-major.slang +++ b/tests/compute/column-major.slang @@ -1,33 +1,59 @@ // column-major.slang -// Unfortunately CPU and CUDA only work with row layout, so they have to be disabled here. - -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -output-using-type -compile-arg -O3 -shaderobj -//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -output-using-type -shaderobj -Xslang -matrix-layout-column-major -//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -output-using-type -dx12 -shaderobj -Xslang -matrix-layout-column-major -//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -output-using-type -shaderobj -Xslang -matrix-layout-column-major -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -output-using-type -mtl -shaderobj -Xslang -matrix-layout-column-major +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-cpu -compute -compile-arg -O3 -shaderobj -Xslang -matrix-layout-column-major +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-slang -compute -shaderobj -Xslang -matrix-layout-column-major +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-slang -compute -dx12 -shaderobj -Xslang -matrix-layout-column-major +//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-vk -compute -shaderobj -Xslang -matrix-layout-column-major +//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-vk -compute -emit-spirv-via-glsl -Xslang -matrix-layout-column-major +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-cuda -compute -shaderobj -Xslang -matrix-layout-column-major +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-mtl -compute -shaderobj -Xslang -matrix-layout-column-major // This data is in column major layout order.... //TEST_INPUT:cbuffer(data=[1.0 0.0 0.0 10.0 0.0 1.0 0.0 20.0 0.0 0.0 1.0 30.0 0.0 0.0 0.0 1.0]):name matrixBuffer ConstantBuffer matrixBuffer; -//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name output -RWStructuredBuffer output; +//TEST_INPUT:ubuffer(data=[0], stride=4):out,name output +RWStructuredBuffer output; + +bool floatCheck(float data, float valueToCheckFor) +{ + return data < (valueToCheckFor + 0.001) && data > valueToCheckFor - 0.001; +} [numthreads(1, 1, 1)] void computeMain(uint3 tid : SV_DispatchThreadID) { float4 v = float4(1, 2, 3, 1); - float4x4 M = matrixBuffer; + float4x4 M1 = matrixBuffer; - float4 r = mul(v, M); - - output[0] = r.x; - output[1] = r.y; - output[2] = r.z; - output[3] = r.w; + float4 r = mul(v, M1); + + float4x4 M2 = mul(M1, M1); + + float4x4 M3 = float4x4( + 1.0, 0.0, 0.0, 10.0, + 0.0, 1.0, 0.0, 20.0, + 0.0, 0.0, 1.0, 30.0, + 0.0, 0.0, 0.0, 1.0 + ); + + output[0] = uint(true + && floatCheck(r.x, 11) + && floatCheck(r.y, 22) + && floatCheck(r.z, 33) + && floatCheck(r.w, 1) + + && floatCheck(M1[3][0], 10) + + && floatCheck(M2[3][0], 20) + && floatCheck(M2._41, 20) + && floatCheck(M2._41_32[0], 20) + && floatCheck(M2._33_42[0], 1) + && floatCheck(M2._42_33[0], 40) + + && floatCheck(M3[0][3], 10) + ); + //BUF: 1 } diff --git a/tests/compute/column-major.slang.expected.txt b/tests/compute/column-major.slang.expected.txt deleted file mode 100644 index 1e24f3253b..0000000000 --- a/tests/compute/column-major.slang.expected.txt +++ /dev/null @@ -1,5 +0,0 @@ -type: float -11.000000 -22.000000 -33.000000 -1.000000 diff --git a/tests/compute/memory-packing.slang b/tests/compute/memory-packing.slang new file mode 100644 index 0000000000..923194f4dd --- /dev/null +++ b/tests/compute/memory-packing.slang @@ -0,0 +1,87 @@ +// column-major-with-row-major-operations.slang + +// Metal/CPP/CUDA do not correctly deal with packing currently. +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-cuda -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-mtl -compute + +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-slang -compute -dx12 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-vk -compute +//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-vk -compute -emit-spirv-via-glsl + +//TEST_INPUT:cbuffer(data=[1.0 2.0 3.0 0.0 4.0 5.0 6.0 0.0 7.0 8.0 9.0 0]):name matrixTest +ConstantBuffer matrixTestCBuf1; + +//TEST_INPUT:cbuffer(data=[1.0 4.0 7.0 0.0 2.0 5.0 8.0 0.0 3.0 6.0 9.0 0.0]):name colMatrixBuffer +ConstantBuffer matrixTestCBuf2; + +// struct float3x3{float3[3] +// { +// float3 data1; +// float pad1; +// float3 data2; +// float pad2; +// float3 data3; +// float pad3; +// } + +struct NeedsPadding +{ + float2 data1; + // float2 pad1; + float2 data2; + // float2 pad2; +}; +//TEST_INPUT:cbuffer(data=[1.0 2.0 100 100 3.0 4.0 100 100]):name structTest +ConstantBuffer structTestCBuf1; + +//TEST_INPUT:ubuffer(data=[0], stride=4):out,name output +RWStructuredBuffer output; + +bool floatCheck(float data, float valueToCheckFor) +{ + return data < (valueToCheckFor + 0.001) && data > valueToCheckFor - 0.001; +} + +[numthreads(1, 1, 1)] +void computeMain(uint3 tid : SV_DispatchThreadID) +{ + float3x3 matrixTest1; + matrixTest1 = matrixTestCBuf1; + + float3x3 matrixTest2; + matrixTest2 = matrixTestCBuf2; + + NeedsPadding structTest1; + + // Note: default is column major + output[0] = bool(true + && floatCheck(matrixTest1[0][0], 1) + && floatCheck(matrixTest1[0][1], 2) + && floatCheck(matrixTest1[0][2], 3) + && floatCheck(matrixTest1[1][0], 4) + && floatCheck(matrixTest1[1][1], 5) + && floatCheck(matrixTest1[1][2], 6) + && floatCheck(matrixTest1[2][0], 7) + && floatCheck(matrixTest1[2][1], 8) + && floatCheck(matrixTest1[2][2], 9) + + + && floatCheck(matrixTest2[0][0], 1) + && floatCheck(matrixTest2[0][1], 2) + && floatCheck(matrixTest2[0][2], 3) + && floatCheck(matrixTest2[1][0], 4) + && floatCheck(matrixTest2[1][1], 5) + && floatCheck(matrixTest2[1][2], 6) + && floatCheck(matrixTest2[2][0], 7) + && floatCheck(matrixTest2[2][1], 8) + && floatCheck(matrixTest2[2][2], 9) + + + && floatCheck(structTest1.data1[0], 1) + && floatCheck(structTest1.data1[1], 2) + && floatCheck(structTest1.data2[0], 3) + && floatCheck(structTest1.data2[1], 4) + ) ? 1 : 0; + //BUF: 1 +}