Skip to content

Commit

Permalink
Texture memory swizzling experiment
Browse files Browse the repository at this point in the history
  • Loading branch information
dubiousconst282 committed Sep 18, 2023
1 parent 36a1552 commit d0896e9
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"type": "cppvsdbg",
"request": "launch",
"program": "${command:cmake.launchTargetPath}",
"args": [],
"args": [ "--benchmark_time_unit=ms" ],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",
"console": "internalConsole",
Expand Down
2 changes: 2 additions & 0 deletions src/SwRast/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ target_link_libraries(SwRast PRIVATE
)
target_include_directories(SwRast PRIVATE ${Stb_INCLUDE_DIR})

find_package(benchmark CONFIG REQUIRED)
target_link_libraries(SwRast PRIVATE benchmark::benchmark)

if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
target_compile_options(SwRast PRIVATE "-march=native" "-ffast-math" "-Wno-unused" "-Wsign-conversion")
Expand Down
38 changes: 37 additions & 1 deletion src/SwRast/Main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,43 @@ class SwRenderer {
}
};

int main(int argc, char** args) {
#include <benchmark/benchmark.h>

[[gnu::noinline]] swr::VFloat2 Rotate(swr::VFloat x, swr::VFloat y, float angle) {
angle = glm::radians(angle);

x -= 0.5f, y -= 0.5f;
swr::VFloat rx = x * cos(angle) - y * sin(angle);
swr::VFloat ry = x * sin(angle) + y * cos(angle);
return { rx + 0.5f, ry + 0.5f };
}

template<int SwizzleMode>
static void BM_TexSampling(benchmark::State& state) {
auto tex = swr::texutil::LoadImage("logs/assets/lake_2k.png");
constexpr swr::SamplerDesc SD = {
.MagFilter = swr::FilterMode::Nearest,
.MinFilter = swr::FilterMode::Nearest,
.SwizzleMode = SwizzleMode,
};
float scale = state.range(0) == 0 ? 0.001f : 1.0f;
float angle = state.range(0) == 0 ? 0 : (state.range(0) - 1) * 45;

for (auto _ : state) {
swr::texutil::IterateTiles(tex.Width, tex.Height, [&](uint32_t x, uint32_t y, swr::VFloat u, swr::VFloat v) {
auto pos = Rotate(u, v, angle) * scale;
swr::VInt color = tex.Sample<SD>(pos.x, pos.y);
benchmark::DoNotOptimize(color);
});
}
}
BENCHMARK(BM_TexSampling<0>)->Name("Linear")->Arg(0)->Arg(1)->Arg(2)->Arg(3);
BENCHMARK(BM_TexSampling<1>)->Name("Tiled")->Arg(0)->Arg(1)->Arg(2)->Arg(3);
BENCHMARK(BM_TexSampling<2>)->Name("ZOrder")->Arg(0)->Arg(1)->Arg(2)->Arg(3);

BENCHMARK_MAIN();

int main2(int argc, char** args) {
if (!glfwInit()) return -1;

glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
Expand Down
1 change: 1 addition & 0 deletions src/SwRast/RendererShaders.h
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,7 @@ struct OverdrawShader {

// TODO: Implement possibly better and faster approach from "Scalable Ambient Obscurance" +/or maybe copy a few tricks from XeGTAO or something?
// https://www.shadertoy.com/view/3dK3zR
// https://github.com/godotengine/godot/pull/44182
struct SSAO {
static const uint32_t KernelSize = 16, FbAttachId = 8;

Expand Down
13 changes: 11 additions & 2 deletions src/SwRast/Texture.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ struct SamplerDesc {
FilterMode MagFilter = FilterMode::Linear;
FilterMode MinFilter = FilterMode::Nearest;
bool EnableMips = true;
int SwizzleMode = 0;
};

template<pixfmt::Texel Texel>
Expand Down Expand Up @@ -388,7 +389,7 @@ struct Texture2D {
mipLevel = SD.EnableMips ? simd::min(simd::max(mipLevel, 0), (int32_t)MipLevels - 1) : 0;

if (filter == FilterMode::Nearest) [[likely]] {
auto res = FetchNearest(ix >> LerpFracBits, iy >> LerpFracBits, layer, mipLevel);
auto res = FetchNearest<SD.SwizzleMode>(ix >> LerpFracBits, iy >> LerpFracBits, layer, mipLevel);

if constexpr (std::is_same<typename Texel::LerpedTy, VInt>()) {
return res.Packed;
Expand Down Expand Up @@ -427,6 +428,7 @@ struct Texture2D {
}

// Fetches the texel at the specified pixel coords. No bounds check.
template<int SwizzleMode = 0>
[[gnu::pure, gnu::always_inline]] Texel FetchNearest(VInt ix, VInt iy, VInt layer, VInt mipLevel) const {
VInt stride = (int32_t)RowShift;
VInt offset = layer << LayerShift;
Expand All @@ -437,7 +439,14 @@ struct Texture2D {
stride -= mipLevel;
offset += _mm512_permutexvar_epi32(mipLevel, _mipOffsets);
}
return GatherTexels<Texel>(offset + ix + (iy << stride));
if constexpr (SwizzleMode == 0) {
offset += ix + (iy << stride);
} else if constexpr (SwizzleMode == 1) {
offset += texutil::GetTiledOffset(ix, iy, stride);
} else {
offset += texutil::Interleave(ix, iy);
}
return GatherTexels<Texel>(offset);
}

// Interpolates the texels overlapping the specified pixel coords (in N.LerpFracBits fixed-point). No bounds check.
Expand Down
3 changes: 2 additions & 1 deletion src/SwRast/vcpkg.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"glfw3",
"stb",
"assimp",
"glm"
"glm",
"benchmark"
]
}

0 comments on commit d0896e9

Please sign in to comment.