Skip to content

Commit

Permalink
GPU/SW: Fix 256-bit AVX2 path
Browse files Browse the repository at this point in the history
  • Loading branch information
stenzek committed Nov 18, 2024
1 parent 122fe3d commit 6c1b98e
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 17 deletions.
14 changes: 7 additions & 7 deletions src/common/gsvector_sse.h
Original file line number Diff line number Diff line change
Expand Up @@ -2541,12 +2541,12 @@ class alignas(32) GSVector8i

ALWAYS_INLINE operator __m256i() const { return m; }

ALWAYS_INLINE GSVector8i min_i8(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi8(m, v)); }
ALWAYS_INLINE GSVector8i max_i8(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi8(m, v)); }
ALWAYS_INLINE GSVector8i min_i16(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi16(m, v)); }
ALWAYS_INLINE GSVector8i max_i16(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi16(m, v)); }
ALWAYS_INLINE GSVector8i min_i32(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi32(m, v)); }
ALWAYS_INLINE GSVector8i max_i32(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi32(m, v)); }
ALWAYS_INLINE GSVector8i min_s8(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi8(m, v)); }
ALWAYS_INLINE GSVector8i max_s8(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi8(m, v)); }
ALWAYS_INLINE GSVector8i min_s16(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi16(m, v)); }
ALWAYS_INLINE GSVector8i max_s16(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi16(m, v)); }
ALWAYS_INLINE GSVector8i min_s32(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi32(m, v)); }
ALWAYS_INLINE GSVector8i max_s32(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi32(m, v)); }

ALWAYS_INLINE GSVector8i min_u8(const GSVector8i& v) const { return GSVector8i(_mm256_min_epu8(m, v)); }
ALWAYS_INLINE GSVector8i max_u8(const GSVector8i& v) const { return GSVector8i(_mm256_max_epu8(m, v)); }
Expand Down Expand Up @@ -2849,7 +2849,7 @@ class alignas(32) GSVector8i
return _mm256_extract_epi64(m, i);
}

ALWAYS_INLINE static GSVector8i zext32(s32 v) { return GSVector8i(_mm256_castsi128_si256(GSVector4i::zext32(v))); }
ALWAYS_INLINE static GSVector8i zext32(s32 v) { return GSVector8i(_mm256_zextsi128_si256(GSVector4i::zext32(v))); }

ALWAYS_INLINE static GSVector8i loadnt(const void* p)
{
Expand Down
7 changes: 0 additions & 7 deletions src/core/gpu_sw_rasterizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,6 @@

LOG_CHANNEL(GPU_SW_Rasterizer);

// Disable 256-bit. We emit that path in a separate file.
// TODO: For those who are compiling with -march=native, probably only want to compile the 256-bit renderer.
// Once it's done, anyway....
#ifdef GSVECTOR_HAS_256
#undef GSVECTOR_HAS_256
#endif

namespace GPU_SW_Rasterizer {
constinit const DitherLUT g_dither_lut = []() constexpr {
DitherLUT lut = {};
Expand Down
6 changes: 3 additions & 3 deletions src/core/gpu_sw_rasterizer.inl
Original file line number Diff line number Diff line change
Expand Up @@ -1143,9 +1143,9 @@ struct TriangleVectors : PixelVectors<texture_enable>
} // namespace

template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable>
static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, UVStepper uv,
const UVSteps& uvstep, RGBStepper rgb, const RGBSteps& rgbstep,
const TriangleVectors<shading_enable, texture_enable>& tv)
ALWAYS_INLINE_RELEASE static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound,
UVStepper uv, const UVSteps& uvstep, RGBStepper rgb, const RGBSteps& rgbstep,
const TriangleVectors<shading_enable, texture_enable>& tv)
{
s32 width = x_bound - x_start;
s32 current_x = TruncateGPUVertexPosition(x_start);
Expand Down

0 comments on commit 6c1b98e

Please sign in to comment.