From 2c4315e7f49a396def6d6024e0da80030980b3c5 Mon Sep 17 00:00:00 2001 From: Sergey Lipskiy Date: Sun, 24 Mar 2024 21:08:20 +0700 Subject: [PATCH 01/12] Change format of mip-map texture atlas: use 32bit for data offset. --- .../glsl_CombinerProgramBuilderAccurate.cpp | 38 +++++++++---------- src/Textures.cpp | 9 +++-- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramBuilderAccurate.cpp b/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramBuilderAccurate.cpp index 263e283a4..dc6423352 100644 --- a/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramBuilderAccurate.cpp +++ b/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramBuilderAccurate.cpp @@ -661,24 +661,19 @@ class ShaderMipmap : public ShaderPart " lowp vec4 c11 = texture2D(tex, (tcData[3] + 0.5)/texSize); \n" ; static const std::string strReadTex1 = - "mediump float get_high4(in float byte) { \n" - " return floor(byte/16.0); \n" - "} \n" - "mediump float get_low4(in float byte) { \n" - " return byte - 16.0*floor(byte/16.0); \n" - "} \n" "lowp vec4 TextureMipMap1(in sampler2D tex, in highp vec2 tcData[5], in lowp float lod) \n" "{ \n" // Fetch from texture atlas - // First 8 texels contain info about tile size and offset, 1 texel per tile - " mediump vec2 texSize = uTextureSize[1]; \n" - " mediump vec4 texWdthAndOff0 = 255.0 * texture2D(tex, vec2(0.5, 0.5)/texSize); \n" - " mediump vec4 texWdthAndOff = 255.0 * texture2D(tex, vec2(lod + 0.5, 0.5)/texSize); \n" - " mediump float lod_scales = pow(2.0, get_high4(texWdthAndOff0.a) - get_high4(texWdthAndOff.a)); \n" - " mediump float lod_scalet = pow(2.0, get_low4(texWdthAndOff0.a) - get_low4(texWdthAndOff.a)); \n" + // First 16 texels contain info about tile size and offset, 2 texels per tile + " mediump vec2 texSize = uTextureSize[1]; \n" + " mediump vec4 texWidth0 = 255.0 * texture2D(tex, vec2(1.0 + 0.5, 0.5)/texSize); \n" + " mediump vec4 texOff = 255.0 * texture2D(tex, vec2(lod * 2.0 + 0.5, 0.5)/texSize); \n" + " mediump vec4 texWidth = 255.0 * texture2D(tex, vec2(lod * 2.0 + 1.0 + 0.5, 0.5)/texSize); \n" + " mediump float lod_scales = pow(2.0, texWidth0.a - texWidth.a); \n" + " mediump float lod_scalet = pow(2.0, texWidth0.b - texWidth.b); \n" " mediump vec2 lod_scale = vec2(lod_scales, lod_scalet); \n" - " mediump float offset = texWdthAndOff.r + texWdthAndOff.g * 256.0; \n" - " mediump float width = texWdthAndOff.b; \n" + " mediump float offset = texOff.r + texOff.g * 256.0 + texOff.b * 65536.0; \n" + " mediump float width = texWidth.r + texWidth.g * 256.0; \n" " mediump vec2 Coords00 = floor(tcData[0] * lod_scale); \n" " mediump float offset00 = offset + width * Coords00.t + Coords00.s; \n" " mediump float Y00 = floor(offset00 / mipmapTileWidth); \n" @@ -800,14 +795,15 @@ class ShaderMipmap : public ShaderPart "#define READ_TEX1_MIPMAP(name, tex, tcData, tile) \\\n" "{ \\\n" // Fetch from texture atlas - // First 8 texels contain info about tile size and offset, 1 texel per tile - " mediump vec4 texWdthAndOff0 = 255.0 * texelFetch(tex, ivec2(0, 0), 0); \\\n" - " mediump vec4 texWdthAndOff = 255.0 * texelFetch(tex, ivec2(int(tile), 0), 0); \\\n" - " mediump float lod_scales = pow(2.0, GET_HIGH4(texWdthAndOff0.a) - GET_HIGH4(texWdthAndOff.a)); \\\n" - " mediump float lod_scalet = pow(2.0, GET_LOW4(texWdthAndOff0.a) - GET_LOW4(texWdthAndOff.a)); \\\n" + // First 16 texels contain info about tile size and offset, 2 texels per tile + " mediump vec4 texWidth0 = 255.0 * texelFetch(tex, ivec2(1, 0), 0); \\\n" + " mediump vec4 texOff = 255.0 * texelFetch(tex, ivec2(int(tile) * 2, 0), 0); \\\n" + " mediump vec4 texWidth = 255.0 * texelFetch(tex, ivec2(int(tile) * 2 + 1, 0), 0); \\\n" + " mediump float lod_scales = pow(2.0, texWidth0.a - texWidth.a); \\\n" + " mediump float lod_scalet = pow(2.0, texWidth0.b - texWidth.b); \\\n" " mediump vec2 lod_scale = vec2(lod_scales, lod_scalet); \\\n" - " mediump int offset = int(texWdthAndOff.r) + int(texWdthAndOff.g) * 256; \\\n" - " mediump int width = int(texWdthAndOff.b); \\\n" + " mediump int offset = int(texOff.r) + int(texOff.g) * 256 + int(texOff.b) * 65536; \\\n" + " mediump int width = int(texWidth.r) + int(texWidth.g) * 256; \\\n" " mediump ivec2 iCoords00 = ivec2(tcData[0] * lod_scale); \\\n" " mediump int offset00 = offset + width * iCoords00.t + iCoords00.s; \\\n" " mediump int Y00 = offset00/mipmapTileWidth; \\\n" diff --git a/src/Textures.cpp b/src/Textures.cpp index 4d4c91a9b..625b21e94 100644 --- a/src/Textures.cpp +++ b/src/Textures.cpp @@ -1628,7 +1628,7 @@ void TextureCache::_loadAccurate(u32 _tile, CachedTexture *_pTexture) } _pTexture->textureBytes = (_pTexture->width * _pTexture->height) << sizeShift; - unsigned int totalTexSize = std::max(static_cast(_pTexture->textureBytes/sizeof(u32) + 8), MIPMAP_TILE_WIDTH) + unsigned int totalTexSize = std::max(static_cast(_pTexture->textureBytes/sizeof(u32) + 16), MIPMAP_TILE_WIDTH) * (_pTexture->max_level + 1); if (m_tempTextureHolder.size() < totalTexSize) { @@ -1661,7 +1661,7 @@ void TextureCache::_loadAccurate(u32 _tile, CachedTexture *_pTexture) if (_pTexture->max_level > 0) { u32 mipLevel = 0; - u32 texDataOffset = 8; // number of gDP.tiles + u32 texDataOffset = 16; // number of gDP.tiles * 2 // Load all tiles into one 1D texture atlas. while (true) @@ -1671,8 +1671,9 @@ void TextureCache::_loadAccurate(u32 _tile, CachedTexture *_pTexture) if (mipRatioS >= 16u) mipRatioS -= 16u; u32 mipRatioT = gDP.tiles[gSP.texture.tile + mipLevel + 1].shiftt + 5u; if (mipRatioT >= 16) mipRatioT -= 16u; - const u32 tileSizePacked = texDataOffset | (tmptex.width << 16) | (mipRatioT << 24) | (mipRatioS << 28); - m_tempTextureHolder[mipLevel] = tileSizePacked; + const u32 tileSizePacked = tmptex.width | (mipRatioT << 16) | (mipRatioS << 24); + m_tempTextureHolder[mipLevel * 2] = texDataOffset; + m_tempTextureHolder[mipLevel * 2 + 1] = tileSizePacked; getLoadParams(tmptex.format, tmptex.size); _getTextureDestData(tmptex, &m_tempTextureHolder[texDataOffset], glInternalFormat, GetTexel, &line); From 8433cda654d954e7c2b86e6bb4fb142bcc8f0439 Mon Sep 17 00:00:00 2001 From: Sergey Lipskiy Date: Sun, 24 Mar 2024 22:40:14 +0700 Subject: [PATCH 02/12] Implement mip-mapping for HD textures. Tiles are generated using "LANCIR" image resizer by Aleksey Vaneev. --- ..._CombinerProgramUniformFactoryAccurate.cpp | 2 +- src/GraphicsDrawer.h | 2 +- src/Textures.cpp | 98 +- src/inc/lancir.h | 2446 +++++++++++++++++ 4 files changed, 2544 insertions(+), 4 deletions(-) create mode 100644 src/inc/lancir.h diff --git a/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramUniformFactoryAccurate.cpp b/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramUniformFactoryAccurate.cpp index f5bf4a49e..31600ddc0 100644 --- a/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramUniformFactoryAccurate.cpp +++ b/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramUniformFactoryAccurate.cpp @@ -111,7 +111,7 @@ class UMipmap : public UniformGroup maxTile = std::min(gSP.texture.level, 1u); // Hack for HD textures uMaxTile.set(maxTile, _force); - bool bNoAtlasTex = (_pTexture != nullptr && _pTexture->bHDTexture) || + bool bNoAtlasTex = (_pTexture != nullptr && _pTexture->bHDTexture && _pTexture->max_level == 0) || maxTile == 0 || gDP.otherMode.textureLOD != G_TL_LOD || (gDP.otherMode.textureDetail != G_TD_DETAIL && maxTile == 1); diff --git a/src/GraphicsDrawer.h b/src/GraphicsDrawer.h index 5f0c307c5..cd1d24ff6 100644 --- a/src/GraphicsDrawer.h +++ b/src/GraphicsDrawer.h @@ -21,7 +21,7 @@ struct FrameBuffer; #define ELEMBUFF_SIZE 1024U constexpr f32 SCREEN_SIZE_DIM = 640.0f; -constexpr u32 MIPMAP_TILE_WIDTH = 256u; +constexpr u32 MIPMAP_TILE_WIDTH = 512u; enum class DrawingState { diff --git a/src/Textures.cpp b/src/Textures.cpp index 625b21e94..9b308be38 100644 --- a/src/Textures.cpp +++ b/src/Textures.cpp @@ -3,6 +3,7 @@ #include #include // std::this_thread::sleep_for #include // std::chrono::seconds +#include #include "Platform.h" #include "Textures.h" #include "GBI.h" @@ -1151,6 +1152,80 @@ void TextureCache::_loadBackground(CachedTexture *pTexture) free(pDest); } +void _loadHiresTextureMipMapAccurate(CachedTexture *_pTexture, GHQTexInfo const& _ghqTexInfo, u64 _ricecrc) +{ + u32 texWidth = _ghqTexInfo.width; + u32 texHeight = _ghqTexInfo.height; + unsigned int totalTexSize = std::max(static_cast(texWidth * texHeight + 16), MIPMAP_TILE_WIDTH) + * (_pTexture->max_level + 1); + + std::vector m_tempTextureHolder(totalTexSize); + std::vector tileData(texWidth * texHeight / 4); + u32* pTileData = reinterpret_cast(_ghqTexInfo.data); + + u32 mipLevel = 0; + u32 texDataOffset = 16; // number of gDP.tiles * 2 + avir::CLancIR imageResizer; + + // Load all tiles into one 1D texture atlas. + u32 mipRatioS = gDP.tiles[gSP.texture.tile + 1].shifts + 5u; + if (mipRatioS >= 16u) mipRatioS -= 16u; + u32 mipRatioT = gDP.tiles[gSP.texture.tile + 1].shiftt + 5u; + if (mipRatioT >= 16) mipRatioT -= 16u; + while (true) + { + const u32 tileSizePacked = texWidth | (mipRatioT << 16) | (mipRatioS << 24); + m_tempTextureHolder[mipLevel * 2] = texDataOffset; + m_tempTextureHolder[mipLevel * 2 + 1] = tileSizePacked; + + txfilter_dmptx((u8*)pTileData, texWidth, texHeight, + texWidth, (u16)_ghqTexInfo.format, + N64FormatSize(_pTexture->format, _pTexture->size), + _ricecrc + mipLevel); + + std::copy_n(pTileData, texWidth * texHeight, &m_tempTextureHolder[texDataOffset]); + pTileData = &m_tempTextureHolder[texDataOffset]; + texDataOffset += texWidth * texHeight; + if (mipLevel == _pTexture->max_level) + break; + + ++mipLevel; + u32 mipRatioSNew = gDP.tiles[gSP.texture.tile + mipLevel + 1].shifts + 5u; + if (mipRatioSNew >= 16u) mipRatioSNew -= 16u; + u32 mipRatioTNew = gDP.tiles[gSP.texture.tile + mipLevel + 1].shiftt + 5u; + if (mipRatioTNew >= 16u) mipRatioTNew -= 16u; + u32 shifts = mipRatioSNew - mipRatioS; + u32 shiftt = mipRatioTNew - mipRatioT; + if (shifts > 0 || shiftt > 0) { + imageResizer.resizeImage((u8*)pTileData, texWidth, texHeight, (u8*)tileData.data(), texWidth >> shifts, texHeight >> shiftt, 4); + texWidth >>= shifts; + texHeight >>= shiftt; + mipRatioS = mipRatioSNew; + mipRatioT = mipRatioTNew; + pTileData = tileData.data(); + } + } + + u32 texformat = gfxContext.convertInternalTextureFormat(_ghqTexInfo.format); + Context::InitTextureParams params; + params.handle = _pTexture->name; + params.textureUnitIndex = textureIndices::Tex[1]; + params.mipMapLevel = 0; + params.mipMapLevels = 1; + params.msaaLevel = 0; + params.width = std::min(texDataOffset, MIPMAP_TILE_WIDTH); + params.height = (texDataOffset / MIPMAP_TILE_WIDTH) + ((texDataOffset % MIPMAP_TILE_WIDTH) ? 1 : 0); + params.internalFormat = InternalColorFormatParam(texformat); + params.format = ColorFormatParam(_ghqTexInfo.texture_format); + params.dataType = DatatypeParam(_ghqTexInfo.pixel_type); + params.data = m_tempTextureHolder.data(); + gfxContext.init2DTexture(params); + assert(!gfxContext.isError()); + _pTexture->mipmapAtlasWidth = params.width; + _pTexture->mipmapAtlasHeight = params.height; + _pTexture->textureBytes = texDataOffset << 2; +} + bool TextureCache::_loadHiresTexture(u32 _tile, CachedTexture *_pTexture, u64 & _ricecrc, u64 & _strongcrc) { if (config.textureFilter.txHiresEnable == 0 || !TFH.isInited()) @@ -1233,6 +1308,21 @@ bool TextureCache::_loadHiresTexture(u32 _tile, CachedTexture *_pTexture, u64 & hirestexFound = txfilter_hirestex(_pTexture->crc, _strongcrc, palette, N64FormatSize(_pTexture->format, _pTexture->size), &ghqTexInfo); } if (hirestexFound && ghqTexInfo.width != 0 && ghqTexInfo.height != 0) { + if (config.generalEmulation.enableInaccurateTextureCoordinates == 0 && + _tile > 0 && + currentCombiner()->usesLOD() && + gSP.texture.level > 1) { + _pTexture->max_level = gDP.otherMode.textureDetail == G_TD_DETAIL ? + static_cast(gSP.texture.level) : + static_cast(gSP.texture.level - 1); + } + if (_pTexture->max_level > 0) + { + _loadHiresTextureMipMapAccurate(_pTexture, ghqTexInfo, _ricecrc); + _updateCachedTexture(ghqTexInfo, _pTexture, width, height); + return true; + } + ghqTexInfo.format = gfxContext.convertInternalTextureFormat(ghqTexInfo.format); Context::InitTextureParams params; params.handle = _pTexture->name; @@ -1604,6 +1694,12 @@ void TextureCache::_loadFast(u32 _tile, CachedTexture *_pTexture) void TextureCache::_loadAccurate(u32 _tile, CachedTexture *_pTexture) { + gDPTile * pTile = _tile < 2 ? gSP.textureTile[_tile] : &gDP.tiles[_tile]; + const u32 tMemMask = gDP.otherMode.textureLUT == G_TT_NONE ? 0x1FF : 0xFF; + gDPLoadTileInfo &info = gDP.loadInfo[pTile->tmem & tMemMask]; + if (info.texAddress == 0x0071a0f0 || info.texAddress == 0x00719ef0) + int t = 0; + u64 ricecrc = 0; u64 strongcrc = 0; if (_loadHiresTexture(_tile, _pTexture, ricecrc, strongcrc)) @@ -1666,7 +1762,6 @@ void TextureCache::_loadAccurate(u32 _tile, CachedTexture *_pTexture) // Load all tiles into one 1D texture atlas. while (true) { - u32 mipRatioS = gDP.tiles[gSP.texture.tile + mipLevel + 1].shifts + 5u; if (mipRatioS >= 16u) mipRatioS -= 16u; u32 mipRatioT = gDP.tiles[gSP.texture.tile + mipLevel + 1].shiftt + 5u; @@ -1699,7 +1794,6 @@ void TextureCache::_loadAccurate(u32 _tile, CachedTexture *_pTexture) ++mipLevel; const u32 tileMipLevel = gSP.texture.tile + mipLevel + 1; gDPTile & mipTile = gDP.tiles[tileMipLevel]; - gDPTile & prevMipTile = gDP.tiles[tileMipLevel - 1]; line = mipTile.line; tmptex.tMem = mipTile.tmem; tmptex.palette = mipTile.palette; diff --git a/src/inc/lancir.h b/src/inc/lancir.h new file mode 100644 index 000000000..12e5c8207 --- /dev/null +++ b/src/inc/lancir.h @@ -0,0 +1,2446 @@ +//$ nobt +//$ nocpp + +/** + * @file lancir.h + * + * @version 3.0.11 + * + * @brief The self-contained header-only "LANCIR" image resizing algorithm. + * + * This is the self-contained inclusion file for the "LANCIR" image resizer, + * a part of the AVIR library. Features scalar, AVX, SSE2, and NEON + * optimizations as well as batched resizing technique which provides a better + * CPU cache performance. + * + * AVIR Copyright (c) 2015-2024 Aleksey Vaneev + * + * @mainpage + * + * @section intro_sec Introduction + * + * Description is available at https://github.com/avaneev/avir + * + * @section license License + * + * LICENSE: + * + * Copyright (c) 2015-2024 Aleksey Vaneev + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef AVIR_CLANCIR_INCLUDED +#define AVIR_CLANCIR_INCLUDED + +#include +#include +#include + +#if defined( __AVX__ ) || defined( __AVX2__ ) + + #include + + #define LANCIR_AVX + #define LANCIR_SSE2 // Some functions use SSE2; AVX has a higher priority. + #define LANCIR_ALIGN 32 + +#elif defined( __SSE4_2__ ) || defined( __SSE4_1__ ) || \ + defined( __SSSE3__ ) || defined( __SSE3__ ) || defined( __SSE2__ ) || \ + defined( __x86_64__ ) || defined( __amd64 ) || defined( _M_X64 ) || \ + defined( _M_AMD64 ) || ( defined( _M_IX86_FP ) && _M_IX86_FP == 2 ) + + #if defined( _MSC_VER ) + #include + #else // defined( _MSC_VER ) + #include + #endif // defined( _MSC_VER ) + + #define LANCIR_SSE2 + #define LANCIR_ALIGN 16 + +#elif defined( __aarch64__ ) || defined( __arm64 ) || defined( __ARM_NEON ) + + #include + + #define LANCIR_NEON + #define LANCIR_ALIGN 16 + +#else // NEON + + #define LANCIR_ALIGN 4 + +#endif // NEON + +namespace avir { + +/** + * @brief LANCIR resizing parameters class. + * + * An object of this class, which can be allocated on stack, can be used to + * pass non-default parameters to the resizing algorithm. See the constructor + * for the default values. + */ + +class CLancIRParams +{ +public: + int SrcSSize; ///< Physical size of the source scanline, in elements (not + ///< bytes). If this value is below 1, SrcWidth * ElCount will be + ///< used. + int NewSSize; ///< Physical size of the destination scanline, in elements + ///< (not bytes). If this value is below 1, NewWidth * ElCount will be + ///< used. + double kx; ///< Resizing step - horizontal (one output pixel corresponds + ///< to `k` input pixels). A downsizing factor if greater than 1.0; + ///< upsizing factor if below or equal to 1.0. Multiply by -1 if you + ///< would like to bypass `ox` and `oy` adjustment which is done by + ///< default to produce a centered image. If this step value equals 0, + ///< the step value will be chosen automatically. + double ky; ///< Resizing step - vertical. Same as `kx`. + double ox; ///< Start X pixel offset within the source image, can be + ///< negative. A positive offset moves the image to the left. + ///< + double oy; ///< Start Y pixel offset within the source image, can be + ///< negative. A positive offset moves the image to the top. + ///< + double la; ///< Lanczos window function's `a` parameter, greater or equal + ///< to 2.0. + ///< + + /** + * Default constructor, with optional arguments that correspond to class + * variables. + * + * @param aSrcSSize Physical size of the source scanline. + * @param aNewSSize Physical size of the destination scanline. + * @param akx Resizing step - horizontal. + * @param aky Resizing step - vertical. + * @param aox Start X pixel offset. + * @param aoy Start Y pixel offset. + */ + + CLancIRParams( const int aSrcSSize = 0, const int aNewSSize = 0, + const double akx = 0.0, const double aky = 0.0, + const double aox = 0.0, const double aoy = 0.0 ) + : SrcSSize( aSrcSSize ) + , NewSSize( aNewSSize ) + , kx( akx ) + , ky( aky ) + , ox( aox ) + , oy( aoy ) + , la( 3.0 ) + { + } +}; + +/** + * @brief LANCIR image resizer class. + * + * The object of this class can be used to resize 1-4 channel images to any + * required size. Resizing is performed by utilizing Lanczos filters, with + * 8-bit precision. This class offers a kind of "optimal" Lanczos resampling + * implementation. + * + * Object of this class can be allocated on stack. + * + * Note that object of this class does not free temporary buffers and + * variables after the resizeImage() function call (until object's + * destruction): these buffers are reused (or reallocated) on subsequent + * calls, thus making batch resizing of images faster. This means resizing is + * not thread-safe: a separate object should be created for each thread. + */ + +class CLancIR +{ +private: + CLancIR( const CLancIR& ) + { + // Unsupported. + } + + CLancIR& operator = ( const CLancIR& ) + { + // Unsupported. + return( *this ); + } + +public: + CLancIR() + : FltBuf0( NULL ) + , FltBuf0Len( 0 ) + , spv0( NULL ) + , spv0len( 0 ) + , spv( NULL ) + { + } + + ~CLancIR() + { + delete[] FltBuf0; + delete[] spv0; + } + + /** + * @brief Function resizes an image. + * + * Performs input-to-output type conversion, if necessary. + * + * @param[in] SrcBuf Source image buffer. + * @param SrcWidth Source image width, in pixels. + * @param SrcHeight Source image height, in pixels. + * @param[out] NewBuf Buffer to accept the resized image. Cannot be equal + * to SrcBuf. + * @param NewWidth New image width, in pixels. + * @param NewHeight New image height, in pixels. + * @param ElCount The number of elements (channels) used to store each + * source and destination pixel (1-4). + * @tparam Tin Input buffer's element type. Can be uint8_t (0-255 value + * range), uint16_t (0-65535 value range), float (0-1 value range), double + * (0-1 value range). uint32_t type is treated as uint16_t. Signed integer + * types and larger integer types are unsupported. + * @tparam Tout Output buffer's element type, treated like `Tin`. If `Tin` + * and `Tout` types do not match, an output value scaling will be applied. + * Floating-point output will not clamped/clipped/saturated, integer + * output is always rounded and clamped. + * @return The number of available output scanlines. Equals to NewHeight, + * or 0 on function parameters error. + */ + + template< typename Tin, typename Tout > + int resizeImage( const Tin* const SrcBuf, const int SrcWidth, + const int SrcHeight, Tout* const NewBuf, const int NewWidth, + const int NewHeight, const int ElCount, + const CLancIRParams* const aParams = NULL ) + { + if(( SrcWidth < 0 ) | ( SrcHeight < 0 ) | ( NewWidth <= 0 ) | + ( NewHeight <= 0 ) | ( SrcBuf == NULL ) | ( NewBuf == NULL ) | + ( (const void*) SrcBuf == (const void*) NewBuf )) + { + return( 0 ); + } + + static const CLancIRParams DefParams; + const CLancIRParams& Params = ( aParams != NULL ? + *aParams : DefParams ); + + if( Params.la < 2.0 ) + { + return( 0 ); + } + + const int OutSSize = NewWidth * ElCount; + const size_t NewScanlineSize = ( Params.NewSSize < 1 ? + OutSSize : Params.NewSSize ); + + if(( SrcWidth == 0 ) | ( SrcHeight == 0 )) + { + Tout* op = NewBuf; + int i; + + for( i = 0; i < NewHeight; i++ ) + { + memset( op, 0, OutSSize * sizeof( Tout )); + op += NewScanlineSize; + } + + return( NewHeight ); + } + + const size_t SrcScanlineSize = ( Params.SrcSSize < 1 ? + SrcWidth * ElCount : Params.SrcSSize ); + + double ox = Params.ox; + double oy = Params.oy; + double kx; + double ky; + + if( Params.kx >= 0.0 ) + { + kx = ( Params.kx == 0.0 ? + (double) SrcWidth / NewWidth : Params.kx ); + + ox += ( kx - 1.0 ) * 0.5; + } + else + { + kx = -Params.kx; + } + + if( Params.ky >= 0.0 ) + { + ky = ( Params.ky == 0.0 ? + (double) SrcHeight / NewHeight : Params.ky ); + + oy += ( ky - 1.0 ) * 0.5; + } + else + { + ky = -Params.ky; + } + + if( rfv.update( Params.la, ky, ElCount )) + { + rsv.reset(); + rsh.reset(); + } + + CResizeFilters* rfh; // Pointer to resizing filters for horizontal + // resizing, may equal to `rfv` if the same stepping is in use. + + if( kx == ky ) + { + rfh = &rfv; + } + else + { + rfh = &rfh0; + + if( rfh0.update( Params.la, kx, ElCount )) + { + rsh.reset(); + } + } + + rsv.update( SrcHeight, NewHeight, oy, rfv, spv ); + rsh.update( SrcWidth, NewWidth, ox, *rfh ); + + // Calculate vertical progressive resizing's batch size. Progressive + // batching is used to try to keep addressing within the cache + // capacity. This technique definitely works well for single-threaded + // resizing on most CPUs, but may not provide an additional benefit + // for multi-threaded resizing, or in a system-wide high-load + // situations. + + const size_t FltWidthE = ( rsh.padl + SrcWidth + rsh.padr ) * ElCount; + const double CacheSize = 5500000.0; // Tuned for various CPUs. + const double OpSize = (double) SrcScanlineSize * SrcHeight * + sizeof( Tin ) + (double) FltWidthE * NewHeight * sizeof( float ); + + int BatchSize = (int) ( NewHeight * CacheSize / ( OpSize + 1.0 )); + + if( BatchSize < 8 ) + { + BatchSize = 8; + } + + if( BatchSize > NewHeight ) + { + BatchSize = NewHeight; + } + + // Allocate/resize intermediate buffers. + + const int svs = ( rsv.padl + SrcHeight + rsv.padr ) * ElCount; + float* const pspv0 = spv0; + reallocBuf( spv0, spv, spv0len, ( svs > OutSSize ? svs : OutSSize )); + reallocBuf( FltBuf0, FltBuf, FltBuf0Len, FltWidthE * BatchSize ); + + if( spv0 != pspv0 ) + { + rsv.updateSPO( rfv, spv ); + } + + // Prepare output-related constants. + + const bool IsOutFloat = ( (Tout) 0.25 != 0 ); + const int Clamp = ( sizeof( Tout ) == 1 ? 255 : 65535 ); + const float OutMul = ( IsOutFloat ? 1.0f : (float) Clamp ) / + ( (Tin) 0.25 != 0 ? 1 : ( sizeof( Tin ) == 1 ? 255 : 65535 )); + + // Perform batched resizing. + + const CResizePos* rpv = rsv.pos; + Tout* opn = NewBuf; + int bl = NewHeight; + + while( bl > 0 ) + { + const int bc = ( bl > BatchSize ? BatchSize : bl ); + + int kl = rfv.KernelLen; + const Tin* ip = SrcBuf; + float* op = FltBuf + rsh.padl * ElCount; + + const int so = (int) rpv[ 0 ].so; + float* const sp = spv + so * ElCount; + + int cc = (int) rpv[ bc - 1 ].so - so + kl; // Pixel copy count. + int rl = 0; // Leftmost pixel's replication count. + int rr = 0; // Rightmost pixel's replication count. + + const int socc = so + cc; + const int spe = rsv.padl + SrcHeight; + + // Calculate scanline copying and padding parameters, depending on + // the batch's size and its vertical offset. + + if( so < rsv.padl ) + { + if( socc <= rsv.padl ) + { + rl = cc; + cc = 0; + } + else + { + if( socc > spe ) + { + rr = socc - spe; + cc -= rr; + } + + rl = rsv.padl - so; + cc -= rl; + } + } + else + { + if( so >= spe ) + { + rr = cc; + cc = 0; + ip += SrcHeight * SrcScanlineSize; + } + else + { + if( socc > spe ) + { + rr = socc - spe; + cc -= rr; + } + + ip += ( so - rsv.padl ) * SrcScanlineSize; + } + } + + // Batched vertical resizing. + + int i; + + if( ElCount == 1 ) + { + for( i = 0; i < SrcWidth; i++ ) + { + copyScanline1v( ip, SrcScanlineSize, sp, cc, rl, rr ); + resize1< false >( NULL, op, FltWidthE, rpv, kl, bc ); + ip += 1; + op += 1; + } + } + else + if( ElCount == 2 ) + { + for( i = 0; i < SrcWidth; i++ ) + { + copyScanline2v( ip, SrcScanlineSize, sp, cc, rl, rr ); + resize2< false >( NULL, op, FltWidthE, rpv, kl, bc ); + ip += 2; + op += 2; + } + } + else + if( ElCount == 3 ) + { + for( i = 0; i < SrcWidth; i++ ) + { + copyScanline3v( ip, SrcScanlineSize, sp, cc, rl, rr ); + resize3< false >( NULL, op, FltWidthE, rpv, kl, bc ); + ip += 3; + op += 3; + } + } + else // ElCount == 4 + { + for( i = 0; i < SrcWidth; i++ ) + { + copyScanline4v( ip, SrcScanlineSize, sp, cc, rl, rr ); + resize4< false >( NULL, op, FltWidthE, rpv, kl, bc ); + ip += 4; + op += 4; + } + } + + // Perform horizontal resizing batch, and produce final output. + + float* ipf = FltBuf; + kl = rfh -> KernelLen; + + if( ElCount == 1 ) + { + for( i = 0; i < bc; i++ ) + { + padScanline1h( ipf, rsh, SrcWidth ); + resize1< true >( ipf, spv, 1, rsh.pos, kl, NewWidth ); + copyToOutput( spv, opn, OutSSize, Clamp, IsOutFloat, + OutMul ); + + ipf += FltWidthE; + opn += NewScanlineSize; + } + } + else + if( ElCount == 2 ) + { + for( i = 0; i < bc; i++ ) + { + padScanline2h( ipf, rsh, SrcWidth ); + resize2< true >( ipf, spv, 2, rsh.pos, kl, NewWidth ); + copyToOutput( spv, opn, OutSSize, Clamp, IsOutFloat, + OutMul ); + + ipf += FltWidthE; + opn += NewScanlineSize; + } + } + else + if( ElCount == 3 ) + { + for( i = 0; i < bc; i++ ) + { + padScanline3h( ipf, rsh, SrcWidth ); + resize3< true >( ipf, spv, 3, rsh.pos, kl, NewWidth ); + copyToOutput( spv, opn, OutSSize, Clamp, IsOutFloat, + OutMul ); + + ipf += FltWidthE; + opn += NewScanlineSize; + } + } + else // ElCount == 4 + { + for( i = 0; i < bc; i++ ) + { + padScanline4h( ipf, rsh, SrcWidth ); + resize4< true >( ipf, spv, 4, rsh.pos, kl, NewWidth ); + copyToOutput( spv, opn, OutSSize, Clamp, IsOutFloat, + OutMul ); + + ipf += FltWidthE; + opn += NewScanlineSize; + } + } + + rpv += bc; + bl -= bc; + } + + return( NewHeight ); + } + + /** + * @brief Legacy resizing function. Not recommended for new projects. + * + * See the prior resizeImage() function and CLancIRParams class for + * details. + * + * @param[in] SrcBuf Source image buffer. + * @param SrcWidth Source image width, in pixels. + * @param SrcHeight Source image height, in pixels. + * @param SrcSSize Physical size of the source scanline, in elements (not + * bytes). + * @param[out] NewBuf Buffer to accept the resized image. Cannot be equal + * to SrcBuf. + * @param NewWidth New image width, in pixels. + * @param NewHeight New image height, in pixels. + * @param NewSSize Physical size of the destination scanline, in elements + * (not bytes). + * @param ElCount The number of elements (channels) used to store each + * source and destination pixel (1-4). + * @param kx0 Resizing step - horizontal. + * @param ky0 Resizing step - vertical. Same as `kx0`. + * @param ox Start X pixel offset within the source image. + * @param oy Start Y pixel offset within the source image. + * @tparam Tin Input buffer's element type. + * @tparam Tout Output buffer's element type. + * @return The number of available output scanlines. Equals to NewHeight, + * or 0 on function parameters error. + */ + + template< typename Tin, typename Tout > + int resizeImage( const Tin* const SrcBuf, const int SrcWidth, + const int SrcHeight, const int SrcSSize, Tout* const NewBuf, + const int NewWidth, const int NewHeight, const int NewSSize, + const int ElCount, const double kx0 = 0.0, const double ky0 = 0.0, + double ox = 0.0, double oy = 0.0 ) + { + const CLancIRParams Params( SrcSSize, NewSSize, kx0, ky0, ox, oy ); + + return( resizeImage( SrcBuf, SrcWidth, SrcHeight, NewBuf, NewWidth, + NewHeight, ElCount, &Params )); + } + +protected: + float* FltBuf0; ///< Intermediate resizing buffer. + size_t FltBuf0Len; ///< Length of `FltBuf0`. + float* FltBuf; ///< Address-aligned `FltBuf0`. + float* spv0; ///< Scanline buffer for vertical resizing, also used at the + ///< output stage. + ///< + int spv0len; ///< Length of `spv0`. + float* spv; ///< Address-aligned `spv0`. + + /** + * Function reallocates a typed buffer if its current length is smaller + * than the required length. + * + * @param buf0 Reference to the pointer of the previously allocated + * buffer. + * @param buf Reference to address-aligned `buf0` pointer. + * @param len The current length of the `buf0`. + * @param newlen A new required length. + * @tparam Tb Buffer element type. + * @tparam Tl Length variable type. + */ + + template< typename Tb, typename Tl > + static void reallocBuf( Tb*& buf0, Tb*& buf, Tl& len, Tl newlen ) + { + newlen += LANCIR_ALIGN; + + if( newlen > len ) + { + if( buf0 != NULL ) + { + delete[] buf0; + buf0 = NULL; + len = 0; + } + + buf0 = new Tb[ newlen ]; + len = newlen; + buf = (Tb*) (( (uintptr_t) buf0 + LANCIR_ALIGN - 1 ) & + ~(uintptr_t) ( LANCIR_ALIGN - 1 )); + } + } + + /** + * Function reallocates a typed buffer if its current length is smaller + * than the required length. + * + * @param buf Reference to the pointer of the previously allocated buffer; + * address alignment will not be applied. + * @param len The current length of the `buf0`. + * @param newlen A new required length. + * @tparam Tb Buffer element type. + * @tparam Tl Length variable type. + */ + + template< typename Tb, typename Tl > + static void reallocBuf( Tb*& buf, Tl& len, const Tl newlen ) + { + if( newlen > len ) + { + if( buf != NULL ) + { + delete[] buf; + buf = NULL; + len = 0; + } + + buf = new Tb[ newlen ]; + len = newlen; + } + } + + class CResizeScanline; + + /** + * Class implements fractional delay filter bank calculation. + */ + + class CResizeFilters + { + friend class CResizeScanline; + + public: + int KernelLen; ///< Resampling filter kernel's length, taps. Available + ///< after the update() function call. Always an even value, + ///< should not be lesser than 4. + + CResizeFilters() + : Filters( NULL ) + , FiltersLen( 0 ) + , la( 0.0 ) + { + memset( Bufs0, 0, sizeof( Bufs0 )); + memset( Bufs0Len, 0, sizeof( Bufs0Len )); + } + + ~CResizeFilters() + { + int i; + + for( i = 0; i < BufCount; i++ ) + { + delete[] Bufs0[ i ]; + } + + delete[] Filters; + } + + /** + * Function updates the resizing filter bank. + * + * @param la0 Lanczos `a` parameter value (greater or equal to 2.0), + * can be fractional. + * @param k0 Resizing step. + * @param ElCount0 Image's element count, may be used for SIMD filter + * tap replication. + * @return `true` if an update occured and scanline resizing positions + * should be updated unconditionally. + */ + + bool update( const double la0, const double k0, const int ElCount0 ) + { + if( la0 == la && k0 == k && ElCount0 == ElCount ) + { + return( false ); + } + + const double NormFreq = ( k0 <= 1.0 ? 1.0 : 1.0 / k0 ); + Freq = 3.1415926535897932 * NormFreq; + FreqA = Freq / la0; + + Len2 = la0 / NormFreq; + fl2 = (int) ceil( Len2 ); + KernelLen = fl2 + fl2; + + #if LANCIR_ALIGN > 4 + + ElRepl = ElCount0; + KernelLenA = KernelLen * ElRepl; + + const int elalign = + (int) ( LANCIR_ALIGN / sizeof( float )) - 1; + + KernelLenA = ( KernelLenA + elalign ) & ~elalign; + + #else // LANCIR_ALIGN > 4 + + ElRepl = 1; + KernelLenA = KernelLen; + + #endif // LANCIR_ALIGN > 4 + + FracCount = 1000; // Enough for Lanczos implicit 8-bit precision. + + la = 0.0; + reallocBuf( Filters, FiltersLen, FracCount + 1 ); + + memset( Filters, 0, FiltersLen * sizeof( Filters[ 0 ])); + + setBuf( 0 ); + + la = la0; + k = k0; + ElCount = ElCount0; + + return( true ); + } + + /** + * Function returns filter at the specified fractional offset. This + * function can only be called after a prior update() function call. + * + * @param x Fractional offset, [0; 1]. + */ + + const float* getFilter( const double x ) + { + const int Frac = (int) ( x * FracCount + 0.5 ); + float* flt = Filters[ Frac ]; + + if( flt != NULL ) + { + return( flt ); + } + + flt = Bufs[ CurBuf ] + CurBufFill * KernelLenA; + Filters[ Frac ] = flt; + CurBufFill++; + + if( CurBufFill == BufLen ) + { + setBuf( CurBuf + 1 ); + } + + makeFilterNorm( flt, 1.0 - (double) Frac / FracCount ); + + if( ElRepl > 1 ) + { + replicateFilter( flt, KernelLen, ElRepl ); + } + + return( flt ); + } + + protected: + double Freq; ///< Circular frequency of the filter. + double FreqA; ///< Circular frequency of the window function. + double Len2; ///< Half resampling filter's length, unrounded. + int fl2; ///< Half resampling filter's length, integer. + int FracCount; ///< The number of fractional positions for which + ///< filters can be created. + ///< + int KernelLenA; ///< SIMD-aligned and replicated filter kernel's + ///< length. + ///< + int ElRepl; ///< The number of repetitions of each filter tap. + static const int BufCount = 4; ///< The maximal number of buffers that + ///< can be in use. + ///< + static const int BufLen = 256; ///< The number of fractional filters + ///< a single buffer may contain. Both `BufLen` and `BufCount` + ///< should correspond to the `FracCount` used. + float* Bufs0[ BufCount ]; ///< Buffers that hold all filters, + ///< original. + ///< + int Bufs0Len[ BufCount ]; ///< Allocated lengthes in `Bufs0`, in + ///< `float` elements. + ///< + float* Bufs[ BufCount ]; ///< Address-aligned `Bufs0`. + int CurBuf; ///< Filter buffer currently being filled. + int CurBufFill; ///< The number of fractional positions filled in the + ///< current filter buffer. + ///< + float** Filters; ///< Fractional delay filters for all positions. + ///< A particular pointer equals NULL if a filter for such + ///< position has not been created yet. + int FiltersLen; ///< Allocated length of Filters, in elements. + double la; ///< Current `la`. + double k; ///< Current `k`. + int ElCount; ///< Current `ElCount`. + + /** + * Function changes the buffer currently being filled, check its + * size and reallocates it if necessary, then resets its fill counter. + * + * @param bi New current buffer index. + */ + + void setBuf( const int bi ) + { + reallocBuf( Bufs0[ bi ], Bufs[ bi ], Bufs0Len[ bi ], + BufLen * KernelLenA ); + + CurBuf = bi; + CurBufFill = 0; + } + + /** + * @brief Sine-wave signal generator class. + * + * Class implements sine-wave signal generator without biasing, with + * constructor-based initialization only. This generator uses an + * oscillator instead of the `sin` function. + */ + + class CSineGen + { + public: + /** + * Constructor initializes `this` sine-wave signal generator. + * + * @param si Sine function increment, in radians. + * @param ph Starting phase, in radians. Add `0.5 x PI` for a + * cosine function. + */ + + CSineGen( const double si, const double ph ) + : svalue1( sin( ph )) + , svalue2( sin( ph - si )) + , sincr( 2.0 * cos( si )) + { + } + + /** + * @return The next value of the sine-wave, without biasing. + */ + + double generate() + { + const double res = svalue1; + + svalue1 = sincr * res - svalue2; + svalue2 = res; + + return( res ); + } + + private: + double svalue1; ///< Current sine value. + double svalue2; ///< Previous sine value. + double sincr; ///< Sine value increment. + }; + + /** + * Function creates a filter for the specified fractional delay. The + * update() function should be called prior to calling this function. + * The created filter is normalized (DC gain=1). + * + * @param[out] op Output filter buffer. + * @param FracDelay Fractional delay, 0 to 1, inclusive. + */ + + void makeFilterNorm( float* op, const double FracDelay ) const + { + CSineGen f( Freq, Freq * ( FracDelay - fl2 )); + CSineGen fw( FreqA, FreqA * ( FracDelay - fl2 )); + + float* op0 = op; + double s = 0.0; + double ut; + + int t = -fl2; + + if( t + FracDelay < -Len2 ) + { + f.generate(); + fw.generate(); + *op = (float) 0; + op++; + t++; + } + + int IsZeroX = ( fabs( FracDelay - 1.0 ) < 2.3e-13 ); + int mt = 0 - IsZeroX; + IsZeroX |= ( fabs( FracDelay ) < 2.3e-13 ); + + while( t < mt ) + { + ut = t + FracDelay; + *op = (float) ( f.generate() * fw.generate() / ( ut * ut )); + s += *op; + op++; + t++; + } + + if( IsZeroX ) // t+FracDelay==0 + { + *op = (float) ( Freq * FreqA ); + s += *op; + f.generate(); + fw.generate(); + } + else + { + ut = FracDelay; // t==0 + *op = (float) ( f.generate() * fw.generate() / ( ut * ut )); + s += *op; + } + + mt = fl2 - 2; + + while( t < mt ) + { + op++; + t++; + ut = t + FracDelay; + *op = (float) ( f.generate() * fw.generate() / ( ut * ut )); + s += *op; + } + + op++; + ut = t + 1 + FracDelay; + + if( ut > Len2 ) + { + *op = (float) 0; + } + else + { + *op = (float) ( f.generate() * fw.generate() / ( ut * ut )); + s += *op; + } + + s = 1.0 / s; + t = (int) ( op - op0 + 1 ); + + while( t != 0 ) + { + *op0 = (float) ( *op0 * s ); + op0++; + t--; + } + } + + /** + * Function replicates taps of the specified filter so that it can + * be used with SIMD loading instructions. This function works + * "in-place". + * + * @param[in,out] p Filter buffer pointer, should be sized to contain + * `kl * erp` elements. + * @param kl Filter kernel's length, in taps. + * @param erp The number of repetitions to apply. + */ + + static void replicateFilter( float* const p, const int kl, + const int erp ) + { + const float* ip = p + kl - 1; + float* op = p + ( kl - 1 ) * erp; + int c = kl; + + if( erp == 2 ) + { + while( c != 0 ) + { + const float v = *ip; + op[ 0 ] = v; + op[ 1 ] = v; + ip--; + op -= 2; + c--; + } + } + else + if( erp == 3 ) + { + while( c != 0 ) + { + const float v = *ip; + op[ 0 ] = v; + op[ 1 ] = v; + op[ 2 ] = v; + ip--; + op -= 3; + c--; + } + } + else // erp == 4 + { + while( c != 0 ) + { + const float v = *ip; + op[ 0 ] = v; + op[ 1 ] = v; + op[ 2 ] = v; + op[ 3 ] = v; + ip--; + op -= 4; + c--; + } + } + } + }; + + /** + * Structure defines source scanline positions and filters for each + * destination pixel. + */ + + struct CResizePos + { + const float* flt; ///< Fractional delay filter. + intptr_t spo; ///< Source scanline's pixel offset, in bytes, or + ///< a direct pointer to scanline buffer. + ///< + intptr_t so; ///< Offset within the source scanline, in pixels. + }; + + /** + * Class contains resizing positions, and prepares source scanline + * positions for resize filtering. The public variables become available + * after the update() function call. + */ + + class CResizeScanline + { + public: + int padl; ///< Left-padding (in pixels) required for source scanline. + int padr; ///< Right-padding (in pixels) required for source scanline. + CResizePos* pos; ///< Source scanline positions (offsets) and filters + ///< for each destination pixel position. + ///< + + CResizeScanline() + : pos( NULL ) + , poslen( 0 ) + , SrcLen( 0 ) + { + } + + ~CResizeScanline() + { + delete[] pos; + } + + /** + * Function "resets" `this` object so that the next update() call + * fully updates the position buffer. Reset is necessary if the filter + * object was updated. + */ + + void reset() + { + SrcLen = 0; + } + + /** + * Function updates resizing positions, updates `padl`, `padr`, and + * `pos` buffer. + * + * @param SrcLen0 Source image scanline length, used to create a + * scanline buffer without length pre-calculation. + * @param DstLen0 Destination image scanline length. + * @param o0 Initial source image offset. + * @param rf Resizing filters object. + * @param sp A pointer to scanline buffer, to use for absolute + * scanline positioning, can be NULL. + */ + + void update( const int SrcLen0, const int DstLen0, const double o0, + CResizeFilters& rf, float* const sp = NULL ) + { + if( SrcLen0 == SrcLen && DstLen0 == DstLen && o0 == o ) + { + return; + } + + const int fl2m1 = rf.fl2 - 1; + padl = fl2m1 - (int) floor( o0 ); + + if( padl < 0 ) + { + padl = 0; + } + + // Make sure `padr` and `pos` are in sync: calculate ending `pos` + // offset in advance. + + const double k = rf.k; + + const int DstLen_m1 = DstLen0 - 1; + const double oe = o0 + k * DstLen_m1; + const int ie = (int) floor( oe ); + + padr = ie + rf.fl2 + 1 - SrcLen0; + + if( padr < 0 ) + { + padr = 0; + } + + SrcLen = 0; + reallocBuf( pos, poslen, DstLen0 ); + + const intptr_t ElCountF = rf.ElCount * sizeof( float ); + const int so = padl - fl2m1; + CResizePos* rp = pos; + intptr_t rpso; + int i; + + for( i = 0; i < DstLen_m1; i++ ) + { + const double ox = o0 + k * i; + const int ix = (int) floor( ox ); + + rp -> flt = rf.getFilter( ox - ix ); + rpso = so + ix; + rp -> spo = (intptr_t) sp + rpso * ElCountF; + rp -> so = rpso; + rp++; + } + + rp -> flt = rf.getFilter( oe - ie ); + rpso = so + ie; + rp -> spo = (intptr_t) sp + rpso * ElCountF; + rp -> so = rpso; + + SrcLen = SrcLen0; + DstLen = DstLen0; + o = o0; + } + + /** + * Function updates `pos` buffer's `spo` values. + * + * @param rf Resizing filters object. + * @param sp A pointer to scanline buffer, to use for absolute + * scanline positioning, can be NULL. + */ + + void updateSPO( CResizeFilters& rf, float* const sp ) + { + const intptr_t ElCountF = rf.ElCount * sizeof( float ); + CResizePos* const rp = pos; + int i; + + for( i = 0; i < DstLen; i++ ) + { + rp[ i ].spo = (intptr_t) sp + rp[ i ].so * ElCountF; + } + } + + protected: + int poslen; ///< Allocated `pos` buffer's length. + int SrcLen; ///< Current `SrcLen`. + int DstLen; ///< Current `DstLen`. + double o; ///< Current `o`. + }; + + CResizeFilters rfv; ///< Resizing filters for vertical resizing. + CResizeFilters rfh0; ///< Resizing filters for horizontal resizing (may + ///< not be in use). + ///< + CResizeScanline rsv; ///< Vertical resize scanline. + CResizeScanline rsh; ///< Horizontal resize scanline. + + /** + * Function copies scanline (fully or partially) from the source buffer, + * in its native format, to the internal scanline buffer, in preparation + * for vertical resizing. Variants for 1-4-channel images. + * + * @param ip Source scanline buffer pointer. + * @param ipinc `ip` increment per pixel. + * @param op Output scanline pointer. + * @param cc Source pixel copy count. + * @param repl Leftmost pixel's replication count. + * @param repr Rightmost pixel's replication count. + * @tparam T Source buffer's element type. + */ + + template< typename T > + static void copyScanline1v( const T* ip, const size_t ipinc, float* op, + int cc, int repl, int repr ) + { + float v0; + + if( repl > 0 ) + { + v0 = (float) ip[ 0 ]; + + do + { + op[ 0 ] = v0; + op += 1; + + } while( --repl != 0 ); + } + + while( cc != 0 ) + { + op[ 0 ] = (float) ip[ 0 ]; + ip += ipinc; + op += 1; + cc--; + } + + if( repr > 0 ) + { + const T* const ipe = ip - ipinc; + v0 = (float) ipe[ 0 ]; + + do + { + op[ 0 ] = v0; + op += 1; + + } while( --repr != 0 ); + } + } + + template< typename T > + static void copyScanline2v( const T* ip, const size_t ipinc, float* op, + int cc, int repl, int repr ) + { + float v0, v1; + + if( repl > 0 ) + { + v0 = (float) ip[ 0 ]; + v1 = (float) ip[ 1 ]; + + do + { + op[ 0 ] = v0; + op[ 1 ] = v1; + op += 2; + + } while( --repl != 0 ); + } + + while( cc != 0 ) + { + op[ 0 ] = (float) ip[ 0 ]; + op[ 1 ] = (float) ip[ 1 ]; + ip += ipinc; + op += 2; + cc--; + } + + if( repr > 0 ) + { + const T* const ipe = ip - ipinc; + v0 = (float) ipe[ 0 ]; + v1 = (float) ipe[ 1 ]; + + do + { + op[ 0 ] = v0; + op[ 1 ] = v1; + op += 2; + + } while( --repr != 0 ); + } + } + + template< typename T > + static void copyScanline3v( const T* ip, const size_t ipinc, float* op, + int cc, int repl, int repr ) + { + float v0, v1, v2; + + if( repl > 0 ) + { + v0 = (float) ip[ 0 ]; + v1 = (float) ip[ 1 ]; + v2 = (float) ip[ 2 ]; + + do + { + op[ 0 ] = v0; + op[ 1 ] = v1; + op[ 2 ] = v2; + op += 3; + + } while( --repl != 0 ); + } + + while( cc != 0 ) + { + op[ 0 ] = (float) ip[ 0 ]; + op[ 1 ] = (float) ip[ 1 ]; + op[ 2 ] = (float) ip[ 2 ]; + ip += ipinc; + op += 3; + cc--; + } + + if( repr > 0 ) + { + const T* const ipe = ip - ipinc; + v0 = (float) ipe[ 0 ]; + v1 = (float) ipe[ 1 ]; + v2 = (float) ipe[ 2 ]; + + do + { + op[ 0 ] = v0; + op[ 1 ] = v1; + op[ 2 ] = v2; + op += 3; + + } while( --repr != 0 ); + } + } + + template< typename T > + static void copyScanline4v( const T* ip, const size_t ipinc, float* op, + int cc, int repl, int repr ) + { + float v0, v1, v2, v3; + + if( repl > 0 ) + { + v0 = (float) ip[ 0 ]; + v1 = (float) ip[ 1 ]; + v2 = (float) ip[ 2 ]; + v3 = (float) ip[ 3 ]; + + do + { + op[ 0 ] = v0; + op[ 1 ] = v1; + op[ 2 ] = v2; + op[ 3 ] = v3; + op += 4; + + } while( --repl != 0 ); + } + + while( cc != 0 ) + { + op[ 0 ] = (float) ip[ 0 ]; + op[ 1 ] = (float) ip[ 1 ]; + op[ 2 ] = (float) ip[ 2 ]; + op[ 3 ] = (float) ip[ 3 ]; + ip += ipinc; + op += 4; + cc--; + } + + if( repr > 0 ) + { + const T* const ipe = ip - ipinc; + v0 = (float) ipe[ 0 ]; + v1 = (float) ipe[ 1 ]; + v2 = (float) ipe[ 2 ]; + v3 = (float) ipe[ 3 ]; + + do + { + op[ 0 ] = v0; + op[ 1 ] = v1; + op[ 2 ] = v2; + op[ 3 ] = v3; + op += 4; + + } while( --repr != 0 ); + } + } + + /** + * Function pads the specified scanline buffer to the left and right by + * replicating its first and last available pixels, in preparation for + * horizontal resizing. Variants for 1-4-channel images. + * + * @param[in,out] op Scanline buffer to pad. + * @param rs Scanline resizing positions object. + * @param l Source scanline's length, in pixels. + */ + + static void padScanline1h( float* op, CResizeScanline& rs, const int l ) + { + const float* ip = op + rs.padl; + + float v0 = ip[ 0 ]; + int i; + + for( i = 0; i < rs.padl; i++ ) + { + op[ i ] = v0; + } + + ip += l; + op += rs.padl + l; + + v0 = ip[ -1 ]; + + for( i = 0; i < rs.padr; i++ ) + { + op[ i ] = v0; + } + } + + static void padScanline2h( float* op, CResizeScanline& rs, const int l ) + { + const float* ip = op + rs.padl * 2; + + float v0 = ip[ 0 ]; + float v1 = ip[ 1 ]; + int i; + + for( i = 0; i < rs.padl; i++ ) + { + op[ 0 ] = v0; + op[ 1 ] = v1; + op += 2; + } + + const int lc = l * 2; + ip += lc; + op += lc; + + v0 = ip[ -2 ]; + v1 = ip[ -1 ]; + + for( i = 0; i < rs.padr; i++ ) + { + op[ 0 ] = v0; + op[ 1 ] = v1; + op += 2; + } + } + + static void padScanline3h( float* op, CResizeScanline& rs, const int l ) + { + const float* ip = op + rs.padl * 3; + + float v0 = ip[ 0 ]; + float v1 = ip[ 1 ]; + float v2 = ip[ 2 ]; + int i; + + for( i = 0; i < rs.padl; i++ ) + { + op[ 0 ] = v0; + op[ 1 ] = v1; + op[ 2 ] = v2; + op += 3; + } + + const int lc = l * 3; + ip += lc; + op += lc; + + v0 = ip[ -3 ]; + v1 = ip[ -2 ]; + v2 = ip[ -1 ]; + + for( i = 0; i < rs.padr; i++ ) + { + op[ 0 ] = v0; + op[ 1 ] = v1; + op[ 2 ] = v2; + op += 3; + } + } + + static void padScanline4h( float* op, CResizeScanline& rs, const int l ) + { + const float* ip = op + rs.padl * 4; + + float v0 = ip[ 0 ]; + float v1 = ip[ 1 ]; + float v2 = ip[ 2 ]; + float v3 = ip[ 3 ]; + int i; + + for( i = 0; i < rs.padl; i++ ) + { + op[ 0 ] = v0; + op[ 1 ] = v1; + op[ 2 ] = v2; + op[ 3 ] = v3; + op += 4; + } + + const int lc = l * 4; + ip += lc; + op += lc; + + v0 = ip[ -4 ]; + v1 = ip[ -3 ]; + v2 = ip[ -2 ]; + v3 = ip[ -1 ]; + + for( i = 0; i < rs.padr; i++ ) + { + op[ 0 ] = v0; + op[ 1 ] = v1; + op[ 2 ] = v2; + op[ 3 ] = v3; + op += 4; + } + } + + /** + * Function rounds a value and applies clamping. + * + * @param v Value to round and clamp. + * @param Clamp High clamp level; low level is 0. + */ + + static inline int roundclamp( const float v, const int Clamp ) + { + if( v < 0.5f ) + { + return( 0 ); + } + + const int vr = (int) ( v + 0.5f ); + + return( vr > Clamp ? Clamp : vr ); + } + + /** + * Function performs final output of the resized scanline pixels to the + * destination image buffer. + * + * @param[in] ip Input resized scanline. + * @param[out] op Output image buffer. + * @param l Output scanline's size (not pixel count). + * @param Clamp Clamp high level, used if `IsOutFloat` is `false`. + * @param IsOutFloat `true` if floating-point output, and no clamping is + * necessary. + * @param OutMul Output multiplier, for value range conversion. + * @tparam T Output buffer's element type. + */ + + template< typename T > + static void copyToOutput( const float* ip, T* op, int l, const int Clamp, + const bool IsOutFloat, const float OutMul ) + { + const bool IsUnityMul = ( OutMul == 1.0f ); + + if( IsOutFloat ) + { + if( IsUnityMul ) + { + if( sizeof( op[ 0 ]) == sizeof( ip[ 0 ])) + { + memcpy( op, ip, l * sizeof( op[ 0 ])); + return; + } + else + { + int l4 = l >> 2; + l &= 3; + + while( l4 != 0 ) + { + op[ 0 ] = (T) ip[ 0 ]; + op[ 1 ] = (T) ip[ 1 ]; + op[ 2 ] = (T) ip[ 2 ]; + op[ 3 ] = (T) ip[ 3 ]; + ip += 4; + op += 4; + l4--; + } + + while( l != 0 ) + { + *op = (T) *ip; + ip++; + op++; + l--; + } + + return; + } + } + + int l4 = l >> 2; + l &= 3; + bool DoScalar = true; + + if( sizeof( op[ 0 ]) == sizeof( ip[ 0 ])) + { + #if defined( LANCIR_SSE2 ) + + DoScalar = false; + const __m128 om = _mm_set1_ps( OutMul ); + + while( l4 != 0 ) + { + _mm_storeu_ps( (float*) op, + _mm_mul_ps( _mm_load_ps( ip ), om )); + + ip += 4; + op += 4; + l4--; + } + + #elif defined( LANCIR_NEON ) + + DoScalar = false; + const float32x4_t om = vdupq_n_f32( OutMul ); + + while( l4 != 0 ) + { + vst1q_f32( (float*) op, + vmulq_f32( vld1q_f32( ip ), om )); + + ip += 4; + op += 4; + l4--; + } + + #endif // defined( LANCIR_NEON ) + } + + if( DoScalar ) + { + while( l4 != 0 ) + { + op[ 0 ] = (T) ( ip[ 0 ] * OutMul ); + op[ 1 ] = (T) ( ip[ 1 ] * OutMul ); + op[ 2 ] = (T) ( ip[ 2 ] * OutMul ); + op[ 3 ] = (T) ( ip[ 3 ] * OutMul ); + ip += 4; + op += 4; + l4--; + } + } + + while( l != 0 ) + { + *op = (T) ( *ip * OutMul ); + ip++; + op++; + l--; + } + + return; + } + + int l4 = l >> 2; + l &= 3; + + #if defined( LANCIR_SSE2 ) + + const __m128 minv = _mm_setzero_ps(); + const __m128 maxv = _mm_set1_ps( (float) Clamp ); + const __m128 om = _mm_set1_ps( OutMul ); + + unsigned int prevrm = _MM_GET_ROUNDING_MODE(); + _MM_SET_ROUNDING_MODE( _MM_ROUND_NEAREST ); + + if( sizeof( op[ 0 ]) == 4 ) + { + while( l4 != 0 ) + { + const __m128 cv = _mm_max_ps( _mm_min_ps( + _mm_mul_ps( _mm_load_ps( ip ), om ), maxv ), minv ); + + _mm_storeu_si128( (__m128i*) op, _mm_cvtps_epi32( cv )); + + ip += 4; + op += 4; + l4--; + } + } + else + if( sizeof( op[ 0 ]) == 2 ) + { + while( l4 != 0 ) + { + const __m128 cv = _mm_max_ps( _mm_min_ps( + _mm_mul_ps( _mm_load_ps( ip ), om ), maxv ), minv ); + + const __m128i v32 = _mm_cvtps_epi32( cv ); + const __m128i v16s = _mm_shufflehi_epi16( + _mm_shufflelo_epi16( v32, 0 | 2 << 2 ), 0 | 2 << 2 ); + + const __m128i v16 = _mm_shuffle_epi32( v16s, 0 | 2 << 2 ); + + uint64_t tmp[ 2 ]; + _mm_storeu_si128( (__m128i*) tmp, v16 ); + *(uint64_t*) op = tmp[ 0 ]; + + ip += 4; + op += 4; + l4--; + } + } + else + { + while( l4 != 0 ) + { + const __m128 cv = _mm_max_ps( _mm_min_ps( + _mm_mul_ps( _mm_load_ps( ip ), om ), maxv ), minv ); + + const __m128i v32 = _mm_cvtps_epi32( cv ); + const __m128i v16s = _mm_shufflehi_epi16( + _mm_shufflelo_epi16( v32, 0 | 2 << 2 ), 0 | 2 << 2 ); + + const __m128i v16 = _mm_shuffle_epi32( v16s, 0 | 2 << 2 ); + const __m128i v8 = _mm_packus_epi16( v16, v16 ); + + *(uint32_t*) op = (uint32_t) _mm_cvtsi128_si32( v8 ); + + ip += 4; + op += 4; + l4--; + } + } + + _MM_SET_ROUNDING_MODE( prevrm ); + + #elif defined( LANCIR_NEON ) + + const float32x4_t minv = vdupq_n_f32( 0.0f ); + const float32x4_t maxv = vdupq_n_f32( (float) Clamp ); + const float32x4_t om = vdupq_n_f32( OutMul ); + const float32x4_t v05 = vdupq_n_f32( 0.5f ); + + if( sizeof( op[ 0 ]) == 4 ) + { + while( l4 != 0 ) + { + const float32x4_t cv = vmaxq_f32( vminq_f32( + vmulq_f32( vld1q_f32( ip ), om ), maxv ), minv ); + + vst1q_u32( (uint32_t*) op, vcvtq_u32_f32( vaddq_f32( + cv, v05 ))); + + ip += 4; + op += 4; + l4--; + } + } + else + if( sizeof( op[ 0 ]) == 2 ) + { + while( l4 != 0 ) + { + const float32x4_t cv = vmaxq_f32( vminq_f32( + vmulq_f32( vld1q_f32( ip ), om ), maxv ), minv ); + + const uint32x4_t v32 = vcvtq_u32_f32( vaddq_f32( cv, v05 )); + const uint16x4_t v16 = vmovn_u32( v32 ); + + vst1_u16( (uint16_t*) op, v16 ); + + ip += 4; + op += 4; + l4--; + } + } + else + { + while( l4 != 0 ) + { + const float32x4_t cv = vmaxq_f32( vminq_f32( + vmulq_f32( vld1q_f32( ip ), om ), maxv ), minv ); + + const uint32x4_t v32 = vcvtq_u32_f32( vaddq_f32( cv, v05 )); + const uint16x4_t v16 = vmovn_u32( v32 ); + const uint8x8_t v8 = vmovn_u16( vcombine_u16( v16, v16 )); + + *(uint32_t*) op = vget_lane_u32( (uint32x2_t) v8, 0 ); + + ip += 4; + op += 4; + l4--; + } + } + + #else // defined( LANCIR_NEON ) + + if( IsUnityMul ) + { + while( l4 != 0 ) + { + op[ 0 ] = (T) roundclamp( ip[ 0 ], Clamp ); + op[ 1 ] = (T) roundclamp( ip[ 1 ], Clamp ); + op[ 2 ] = (T) roundclamp( ip[ 2 ], Clamp ); + op[ 3 ] = (T) roundclamp( ip[ 3 ], Clamp ); + ip += 4; + op += 4; + l4--; + } + } + else + { + while( l4 != 0 ) + { + op[ 0 ] = (T) roundclamp( ip[ 0 ] * OutMul, Clamp ); + op[ 1 ] = (T) roundclamp( ip[ 1 ] * OutMul, Clamp ); + op[ 2 ] = (T) roundclamp( ip[ 2 ] * OutMul, Clamp ); + op[ 3 ] = (T) roundclamp( ip[ 3 ] * OutMul, Clamp ); + ip += 4; + op += 4; + l4--; + } + } + + #endif // defined( LANCIR_NEON ) + + if( IsUnityMul ) + { + while( l != 0 ) + { + *op = (T) roundclamp( *ip, Clamp ); + ip++; + op++; + l--; + } + } + else + { + while( l != 0 ) + { + *op = (T) roundclamp( *ip * OutMul, Clamp ); + ip++; + op++; + l--; + } + } + } + + #define LANCIR_LF_PRE \ + const CResizePos* const rpe = rp + DstLen; \ + while( rp != rpe ) \ + { \ + const float* flt = rp -> flt; \ + const float* ip; \ + if( UseSP ) \ + { \ + ip = (float*) ( (intptr_t) sp + rp -> spo ); \ + } \ + else \ + { \ + ip = (float*) rp -> spo; \ + } + + #define LANCIR_LF_POST \ + op += opinc; \ + rp++; \ + } + + /** + * Function performs scanline resizing. Variants for 1-4-channel images. + * + * @param[in] sp Source scanline buffer. + * @param[out] op Destination buffer. + * @param opinc `op` increment. + * @param rp Source scanline offsets and resizing filters. + * @param kl Filter kernel's length, in taps (always an even value). + * @param DstLen Destination length, in pixels. + * @tparam UseSP `true` if `sp` pointer should be added to `spo`. + */ + + template< bool UseSP > + static void resize1( const float* const sp, float* op, const size_t opinc, + const CResizePos* rp, const int kl, const int DstLen ) + { + const int ci = kl >> 2; + + if(( kl & 3 ) == 0 ) + { + LANCIR_LF_PRE + + int c = ci; + + #if defined( LANCIR_SSE2 ) + + __m128 sum = _mm_mul_ps( _mm_load_ps( flt ), _mm_loadu_ps( ip )); + + while( --c != 0 ) + { + flt += 4; + ip += 4; + sum = _mm_add_ps( sum, _mm_mul_ps( _mm_load_ps( flt ), + _mm_loadu_ps( ip ))); + } + + sum = _mm_add_ps( sum, _mm_movehl_ps( sum, sum )); + + _mm_store_ss( op, _mm_add_ss( sum, + _mm_shuffle_ps( sum, sum, 1 ))); + + #elif defined( LANCIR_NEON ) + + float32x4_t sum = vmulq_f32( vld1q_f32( flt ), vld1q_f32( ip )); + + while( --c != 0 ) + { + flt += 4; + ip += 4; + sum = vmlaq_f32( sum, vld1q_f32( flt ), vld1q_f32( ip )); + } + + op[ 0 ] = vaddvq_f32( sum ); + + #else // defined( LANCIR_NEON ) + + float sum0 = flt[ 0 ] * ip[ 0 ]; + float sum1 = flt[ 1 ] * ip[ 1 ]; + float sum2 = flt[ 2 ] * ip[ 2 ]; + float sum3 = flt[ 3 ] * ip[ 3 ]; + + while( --c != 0 ) + { + flt += 4; + ip += 4; + sum0 += flt[ 0 ] * ip[ 0 ]; + sum1 += flt[ 1 ] * ip[ 1 ]; + sum2 += flt[ 2 ] * ip[ 2 ]; + sum3 += flt[ 3 ] * ip[ 3 ]; + } + + op[ 0 ] = ( sum0 + sum1 ) + ( sum2 + sum3 ); + + #endif // defined( LANCIR_NEON ) + + LANCIR_LF_POST + } + else + { + LANCIR_LF_PRE + + int c = ci; + + #if defined( LANCIR_SSE2 ) + + __m128 sum = _mm_mul_ps( _mm_load_ps( flt ), _mm_loadu_ps( ip )); + + while( --c != 0 ) + { + flt += 4; + ip += 4; + sum = _mm_add_ps( sum, _mm_mul_ps( _mm_load_ps( flt ), + _mm_loadu_ps( ip ))); + } + + sum = _mm_add_ps( sum, _mm_movehl_ps( sum, sum )); + + const __m128 sum2 = _mm_mul_ps( _mm_loadu_ps( flt + 2 ), + _mm_loadu_ps( ip + 2 )); + + sum = _mm_add_ps( sum, _mm_movehl_ps( sum2, sum2 )); + + _mm_store_ss( op, _mm_add_ss( sum, + _mm_shuffle_ps( sum, sum, 1 ))); + + #elif defined( LANCIR_NEON ) + + float32x4_t sum = vmulq_f32( vld1q_f32( flt ), vld1q_f32( ip )); + + while( --c != 0 ) + { + flt += 4; + ip += 4; + sum = vmlaq_f32( sum, vld1q_f32( flt ), vld1q_f32( ip )); + } + + const float32x2_t sum2 = vadd_f32( vget_high_f32( sum ), + vget_low_f32( sum )); + + op[ 0 ] = vaddv_f32( vmla_f32( sum2, vld1_f32( flt + 4 ), + vld1_f32( ip + 4 ))); + + #else // defined( LANCIR_NEON ) + + float sum0 = flt[ 0 ] * ip[ 0 ]; + float sum1 = flt[ 1 ] * ip[ 1 ]; + float sum2 = flt[ 2 ] * ip[ 2 ]; + float sum3 = flt[ 3 ] * ip[ 3 ]; + + while( --c != 0 ) + { + flt += 4; + ip += 4; + sum0 += flt[ 0 ] * ip[ 0 ]; + sum1 += flt[ 1 ] * ip[ 1 ]; + sum2 += flt[ 2 ] * ip[ 2 ]; + sum3 += flt[ 3 ] * ip[ 3 ]; + } + + op[ 0 ] = ( sum0 + sum1 ) + ( sum2 + sum3 ) + + flt[ 4 ] * ip[ 4 ] + flt[ 5 ] * ip[ 5 ]; + + #endif // defined( LANCIR_NEON ) + + LANCIR_LF_POST + } + } + + template< bool UseSP > + static void resize2( const float* const sp, float* op, const size_t opinc, + const CResizePos* rp, const int kl, const int DstLen ) + { + #if LANCIR_ALIGN > 4 + const int ci = kl >> 2; + const int cir = kl & 3; + #else // LANCIR_ALIGN > 4 + const int ci = kl >> 1; + #endif // LANCIR_ALIGN > 4 + + LANCIR_LF_PRE + + int c = ci; + + #if defined( LANCIR_AVX ) + + __m256 sum = _mm256_mul_ps( _mm256_load_ps( flt ), + _mm256_loadu_ps( ip )); + + while( --c != 0 ) + { + flt += 8; + ip += 8; + sum = _mm256_add_ps( sum, _mm256_mul_ps( _mm256_load_ps( flt ), + _mm256_loadu_ps( ip ))); + } + + __m128 res = _mm_add_ps( _mm256_extractf128_ps( sum, 0 ), + _mm256_extractf128_ps( sum, 1 )); + + if( cir == 2 ) + { + res = _mm_add_ps( res, _mm_mul_ps( _mm_load_ps( flt + 8 ), + _mm_loadu_ps( ip + 8 ))); + } + + res = _mm_add_ps( res, _mm_movehl_ps( res, res )); + + _mm_store_ss( op, res ); + _mm_store_ss( op + 1, _mm_shuffle_ps( res, res, 1 )); + + #elif defined( LANCIR_SSE2 ) + + __m128 sumA = _mm_mul_ps( _mm_load_ps( flt ), _mm_loadu_ps( ip )); + __m128 sumB = _mm_mul_ps( _mm_load_ps( flt + 4 ), + _mm_loadu_ps( ip + 4 )); + + while( --c != 0 ) + { + flt += 8; + ip += 8; + sumA = _mm_add_ps( sumA, _mm_mul_ps( _mm_load_ps( flt ), + _mm_loadu_ps( ip ))); + + sumB = _mm_add_ps( sumB, _mm_mul_ps( _mm_load_ps( flt + 4 ), + _mm_loadu_ps( ip + 4 ))); + } + + sumA = _mm_add_ps( sumA, sumB ); + + if( cir == 2 ) + { + sumA = _mm_add_ps( sumA, _mm_mul_ps( _mm_load_ps( flt + 8 ), + _mm_loadu_ps( ip + 8 ))); + } + + sumA = _mm_add_ps( sumA, _mm_movehl_ps( sumA, sumA )); + + _mm_store_ss( op, sumA ); + _mm_store_ss( op + 1, _mm_shuffle_ps( sumA, sumA, 1 )); + + #elif defined( LANCIR_NEON ) + + float32x4_t sumA = vmulq_f32( vld1q_f32( flt ), vld1q_f32( ip )); + float32x4_t sumB = vmulq_f32( vld1q_f32( flt + 4 ), + vld1q_f32( ip + 4 )); + + while( --c != 0 ) + { + flt += 8; + ip += 8; + sumA = vmlaq_f32( sumA, vld1q_f32( flt ), vld1q_f32( ip )); + sumB = vmlaq_f32( sumB, vld1q_f32( flt + 4 ), + vld1q_f32( ip + 4 )); + } + + sumA = vaddq_f32( sumA, sumB ); + + if( cir == 2 ) + { + sumA = vmlaq_f32( sumA, vld1q_f32( flt + 8 ), + vld1q_f32( ip + 8 )); + } + + vst1_f32( op, vadd_f32( vget_high_f32( sumA ), vget_low_f32( sumA ))); + + #else // defined( LANCIR_NEON ) + + const float xx = flt[ 0 ]; + const float xx2 = flt[ 1 ]; + float sum0 = xx * ip[ 0 ]; + float sum1 = xx * ip[ 1 ]; + float sum2 = xx2 * ip[ 2 ]; + float sum3 = xx2 * ip[ 3 ]; + + while( --c != 0 ) + { + flt += 2; + ip += 4; + const float xx = flt[ 0 ]; + const float xx2 = flt[ 1 ]; + sum0 += xx * ip[ 0 ]; + sum1 += xx * ip[ 1 ]; + sum2 += xx2 * ip[ 2 ]; + sum3 += xx2 * ip[ 3 ]; + } + + op[ 0 ] = sum0 + sum2; + op[ 1 ] = sum1 + sum3; + + #endif // defined( LANCIR_NEON ) + + LANCIR_LF_POST + } + + template< bool UseSP > + static void resize3( const float* const sp, float* op, const size_t opinc, + const CResizePos* rp, const int kl, const int DstLen ) + { + #if LANCIR_ALIGN > 4 + + const int ci = kl >> 2; + const int cir = kl & 3; + + LANCIR_LF_PRE + + float res[ 12 ]; + int c = ci; + + #if defined( LANCIR_AVX ) + + __m128 sumA = _mm_mul_ps( _mm_load_ps( flt ), _mm_loadu_ps( ip )); + __m256 sumB = _mm256_mul_ps( _mm256_loadu_ps( flt + 4 ), + _mm256_loadu_ps( ip + 4 )); + + while( --c != 0 ) + { + flt += 12; + ip += 12; + sumA = _mm_add_ps( sumA, _mm_mul_ps( _mm_load_ps( flt ), + _mm_loadu_ps( ip ))); + + sumB = _mm256_add_ps( sumB, _mm256_mul_ps( + _mm256_loadu_ps( flt + 4 ), _mm256_loadu_ps( ip + 4 ))); + } + + if( cir == 2 ) + { + sumA = _mm_add_ps( sumA, _mm_mul_ps( _mm_load_ps( flt + 12 ), + _mm_loadu_ps( ip + 12 ))); + } + + _mm_storeu_ps( res, sumA ); + + float o0 = res[ 0 ] + res[ 3 ]; + float o1 = res[ 1 ]; + float o2 = res[ 2 ]; + + _mm256_storeu_ps( res + 4, sumB ); + + o1 += res[ 4 ]; + o2 += res[ 5 ]; + + #elif defined( LANCIR_SSE2 ) + + __m128 sumA = _mm_mul_ps( _mm_load_ps( flt ), _mm_loadu_ps( ip )); + __m128 sumB = _mm_mul_ps( _mm_load_ps( flt + 4 ), + _mm_loadu_ps( ip + 4 )); + + __m128 sumC = _mm_mul_ps( _mm_load_ps( flt + 8 ), + _mm_loadu_ps( ip + 8 )); + + while( --c != 0 ) + { + flt += 12; + ip += 12; + sumA = _mm_add_ps( sumA, _mm_mul_ps( _mm_load_ps( flt ), + _mm_loadu_ps( ip ))); + + sumB = _mm_add_ps( sumB, _mm_mul_ps( _mm_load_ps( flt + 4 ), + _mm_loadu_ps( ip + 4 ))); + + sumC = _mm_add_ps( sumC, _mm_mul_ps( _mm_load_ps( flt + 8 ), + _mm_loadu_ps( ip + 8 ))); + } + + if( cir == 2 ) + { + sumA = _mm_add_ps( sumA, _mm_mul_ps( _mm_load_ps( flt + 12 ), + _mm_loadu_ps( ip + 12 ))); + } + + _mm_storeu_ps( res, sumA ); + _mm_storeu_ps( res + 4, sumB ); + + float o0 = res[ 0 ] + res[ 3 ]; + float o1 = res[ 1 ] + res[ 4 ]; + float o2 = res[ 2 ] + res[ 5 ]; + + _mm_storeu_ps( res + 8, sumC ); + + #elif defined( LANCIR_NEON ) + + float32x4_t sumA = vmulq_f32( vld1q_f32( flt ), vld1q_f32( ip )); + float32x4_t sumB = vmulq_f32( vld1q_f32( flt + 4 ), + vld1q_f32( ip + 4 )); + + float32x4_t sumC = vmulq_f32( vld1q_f32( flt + 8 ), + vld1q_f32( ip + 8 )); + + while( --c != 0 ) + { + flt += 12; + ip += 12; + sumA = vmlaq_f32( sumA, vld1q_f32( flt ), vld1q_f32( ip )); + sumB = vmlaq_f32( sumB, vld1q_f32( flt + 4 ), + vld1q_f32( ip + 4 )); + + sumC = vmlaq_f32( sumC, vld1q_f32( flt + 8 ), + vld1q_f32( ip + 8 )); + } + + if( cir == 2 ) + { + sumA = vmlaq_f32( sumA, vld1q_f32( flt + 12 ), + vld1q_f32( ip + 12 )); + } + + vst1q_f32( res, sumA ); + vst1q_f32( res + 4, sumB ); + + float o0 = res[ 0 ] + res[ 3 ]; + float o1 = res[ 1 ] + res[ 4 ]; + float o2 = res[ 2 ] + res[ 5 ]; + + vst1q_f32( res + 8, sumC ); + + #endif // defined( LANCIR_NEON ) + + o0 += res[ 6 ] + res[ 9 ]; + o1 += res[ 7 ] + res[ 10 ]; + o2 += res[ 8 ] + res[ 11 ]; + + if( cir == 2 ) + { + o1 += flt[ 16 ] * ip[ 16 ]; + o2 += flt[ 17 ] * ip[ 17 ]; + } + + op[ 0 ] = o0; + op[ 1 ] = o1; + op[ 2 ] = o2; + + #else // LANCIR_ALIGN > 4 + + const int ci = kl >> 1; + + LANCIR_LF_PRE + + int c = ci; + + const float xx = flt[ 0 ]; + float sum0 = xx * ip[ 0 ]; + float sum1 = xx * ip[ 1 ]; + float sum2 = xx * ip[ 2 ]; + const float xx2 = flt[ 1 ]; + float sum3 = xx2 * ip[ 3 ]; + float sum4 = xx2 * ip[ 4 ]; + float sum5 = xx2 * ip[ 5 ]; + + while( --c != 0 ) + { + flt += 2; + ip += 6; + const float xx = flt[ 0 ]; + sum0 += xx * ip[ 0 ]; + sum1 += xx * ip[ 1 ]; + sum2 += xx * ip[ 2 ]; + const float xx2 = flt[ 1 ]; + sum3 += xx2 * ip[ 3 ]; + sum4 += xx2 * ip[ 4 ]; + sum5 += xx2 * ip[ 5 ]; + } + + op[ 0 ] = sum0 + sum3; + op[ 1 ] = sum1 + sum4; + op[ 2 ] = sum2 + sum5; + + #endif // LANCIR_ALIGN > 4 + + LANCIR_LF_POST + } + + template< bool UseSP > + static void resize4( const float* const sp, float* op, const size_t opinc, + const CResizePos* rp, const int kl, const int DstLen ) + { + #if LANCIR_ALIGN > 4 + const int ci = kl >> 1; + #else // LANCIR_ALIGN > 4 + const int ci = kl; + #endif // LANCIR_ALIGN > 4 + + LANCIR_LF_PRE + + int c = ci; + + #if defined( LANCIR_AVX ) + + __m256 sum = _mm256_mul_ps( _mm256_load_ps( flt ), + _mm256_loadu_ps( ip )); + + while( --c != 0 ) + { + flt += 8; + ip += 8; + sum = _mm256_add_ps( sum, _mm256_mul_ps( _mm256_load_ps( flt ), + _mm256_loadu_ps( ip ))); + } + + _mm_store_ps( op, _mm_add_ps( _mm256_extractf128_ps( sum, 0 ), + _mm256_extractf128_ps( sum, 1 ))); + + #elif defined( LANCIR_SSE2 ) + + __m128 sumA = _mm_mul_ps( _mm_load_ps( flt ), _mm_load_ps( ip )); + __m128 sumB = _mm_mul_ps( _mm_load_ps( flt + 4 ), + _mm_load_ps( ip + 4 )); + + while( --c != 0 ) + { + flt += 8; + ip += 8; + sumA = _mm_add_ps( sumA, _mm_mul_ps( _mm_load_ps( flt ), + _mm_load_ps( ip ))); + + sumB = _mm_add_ps( sumB, _mm_mul_ps( _mm_load_ps( flt + 4 ), + _mm_load_ps( ip + 4 ))); + } + + _mm_store_ps( op, _mm_add_ps( sumA, sumB )); + + #elif defined( LANCIR_NEON ) + + float32x4_t sumA = vmulq_f32( vld1q_f32( flt ), vld1q_f32( ip )); + float32x4_t sumB = vmulq_f32( vld1q_f32( flt + 4 ), + vld1q_f32( ip + 4 )); + + while( --c != 0 ) + { + flt += 8; + ip += 8; + sumA = vmlaq_f32( sumA, vld1q_f32( flt ), vld1q_f32( ip )); + sumB = vmlaq_f32( sumB, vld1q_f32( flt + 4 ), + vld1q_f32( ip + 4 )); + } + + vst1q_f32( op, vaddq_f32( sumA, sumB )); + + #else // defined( LANCIR_NEON ) + + const float xx = flt[ 0 ]; + float sum0 = xx * ip[ 0 ]; + float sum1 = xx * ip[ 1 ]; + float sum2 = xx * ip[ 2 ]; + float sum3 = xx * ip[ 3 ]; + + while( --c != 0 ) + { + flt++; + ip += 4; + const float xx = flt[ 0 ]; + sum0 += xx * ip[ 0 ]; + sum1 += xx * ip[ 1 ]; + sum2 += xx * ip[ 2 ]; + sum3 += xx * ip[ 3 ]; + } + + op[ 0 ] = sum0; + op[ 1 ] = sum1; + op[ 2 ] = sum2; + op[ 3 ] = sum3; + + #endif // defined( LANCIR_NEON ) + + LANCIR_LF_POST + } + + #undef LANCIR_LF_PRE + #undef LANCIR_LF_POST +}; + +#undef LANCIR_ALIGN + +} // namespace avir + +#endif // AVIR_CLANCIR_INCLUDED From 49cded608ff4c8d53a679b37ff223d5174d36216 Mon Sep 17 00:00:00 2001 From: Sergey Lipskiy Date: Sat, 20 Apr 2024 20:48:24 +0700 Subject: [PATCH 03/12] Dump all levels of mip-map texture. Load levels of HD mip-map texture. --- src/Textures.cpp | 239 +++++++++++++++++++++++++++++------------------ 1 file changed, 146 insertions(+), 93 deletions(-) diff --git a/src/Textures.cpp b/src/Textures.cpp index 9b308be38..dfa36037f 100644 --- a/src/Textures.cpp +++ b/src/Textures.cpp @@ -1152,7 +1152,95 @@ void TextureCache::_loadBackground(CachedTexture *pTexture) free(pDest); } -void _loadHiresTextureMipMapAccurate(CachedTexture *_pTexture, GHQTexInfo const& _ghqTexInfo, u64 _ricecrc) +struct TexLoadData +{ + int bpl = 0; + int width = 0; + int height = 0; + u8 * addr = nullptr; + u8 * paladdr = nullptr; + u16 * palette = nullptr; + u64 ricecrc; + u64 strongcrc; +}; + +static bool _calculateHiresTextureCRC(u32 _tileIdx, CachedTexture *_pTexture, TexLoadData & _ldata) +{ + if (config.textureFilter.txHiresEnable == 0 || !TFH.isInited()) + return false; + + gDPLoadTileInfo & info = gDP.loadInfo[_pTexture->tMem]; + + // Temporal workaround for crash problem with mip-mapped textures. See #1711 for details. + // TODO: make proper fix. + if (info.texAddress == 0) + return false; + + _ldata.addr = (u8*)(RDRAM + info.texAddress); + if (info.loadType == LOADTYPE_TILE) { + _ldata.bpl = info.texWidth << info.size >> 1; + _ldata.addr += (info.ult * _ldata.bpl) + (((info.uls << info.size) + 1) >> 1); + + _ldata.width = min(info.width, info.texWidth); + if (info.size > _pTexture->size) + _ldata.width <<= info.size - _pTexture->size; + + _ldata.height = info.height; + if ((config.generalEmulation.hacks & hack_MK64) != 0 && (_ldata.height % 2) != 0) + _ldata.height--; + } + else { + const gDPTile & tile = gDP.tiles[_tileIdx]; + int tile_width = tile.lrs - tile.uls + 1; + int tile_height = tile.lrt - tile.ult + 1; + + int mask_width = (tile.masks == 0) ? (tile_width) : (1 << tile.masks); + int mask_height = (tile.maskt == 0) ? (tile_height) : (1 << tile.maskt); + + if ((tile.clamps && tile_width <= 256)) + _ldata.width = min(mask_width, tile_width); + else + _ldata.width = mask_width; + + if ((tile.clampt && tile_height <= 256) || (mask_height > 256)) + _ldata.height = min(mask_height, tile_height); + else + _ldata.height = mask_height; + + if (tile.size == G_IM_SIZ_32b) + _ldata.bpl = tile.line << 4; + else if (info.dxt == 0) + _ldata.bpl = tile.line << 3; + else { + u32 dxt = info.dxt; + if (dxt > 1) + dxt = ReverseDXT(dxt, info.width, _pTexture->width, _pTexture->size); + _ldata.bpl = dxt << 3; + } + } + + _ldata.paladdr = nullptr; + _ldata.palette = nullptr; + if ((_pTexture->size < G_IM_SIZ_16b) && (gDP.otherMode.textureLUT != G_TT_NONE || _pTexture->format == G_IM_FMT_CI)) { + if (_pTexture->size == G_IM_SIZ_8b) + _ldata.paladdr = (u8*)(gDP.TexFilterPalette); + else if (config.textureFilter.txHresAltCRC) + _ldata.paladdr = (u8*)(gDP.TexFilterPalette + (_pTexture->palette << 5)); + else + _ldata.paladdr = (u8*)(gDP.TexFilterPalette + (_pTexture->palette << 4)); + // TODO: fix palette load + // palette = (rdp.pal_8 + (gSP.textureTile[_t]->palette << 4)); + } + + _ldata.ricecrc = txfilter_checksum(_ldata.addr, _ldata.width, _ldata.height, _pTexture->size, _ldata.bpl, _ldata.paladdr); + if (config.textureFilter.txStrongCRC) + _ldata.strongcrc = txfilter_checksum_strong(_ldata.addr, _ldata.width, _ldata.height, _pTexture->size, _ldata.bpl, _ldata.paladdr); + + return true; +} + +static +void _loadHiresTextureMipMapAccurate(CachedTexture *_pTexture, GHQTexInfo const& _ghqTexInfo/*, u64 _ricecrc*/) { u32 texWidth = _ghqTexInfo.width; u32 texHeight = _ghqTexInfo.height; @@ -1178,10 +1266,10 @@ void _loadHiresTextureMipMapAccurate(CachedTexture *_pTexture, GHQTexInfo const& m_tempTextureHolder[mipLevel * 2] = texDataOffset; m_tempTextureHolder[mipLevel * 2 + 1] = tileSizePacked; - txfilter_dmptx((u8*)pTileData, texWidth, texHeight, - texWidth, (u16)_ghqTexInfo.format, - N64FormatSize(_pTexture->format, _pTexture->size), - _ricecrc + mipLevel); + //txfilter_dmptx((u8*)pTileData, texWidth, texHeight, + // texWidth, (u16)_ghqTexInfo.format, + // N64FormatSize(_pTexture->format, _pTexture->size), + // _ricecrc + mipLevel); std::copy_n(pTileData, texWidth * texHeight, &m_tempTextureHolder[texDataOffset]); pTileData = &m_tempTextureHolder[texDataOffset]; @@ -1197,12 +1285,37 @@ void _loadHiresTextureMipMapAccurate(CachedTexture *_pTexture, GHQTexInfo const& u32 shifts = mipRatioSNew - mipRatioS; u32 shiftt = mipRatioTNew - mipRatioT; if (shifts > 0 || shiftt > 0) { - imageResizer.resizeImage((u8*)pTileData, texWidth, texHeight, (u8*)tileData.data(), texWidth >> shifts, texHeight >> shiftt, 4); - texWidth >>= shifts; - texHeight >>= shiftt; + const u32 tileMipLevel = gSP.texture.tile + mipLevel + 1; + gDPTile & mipTile = gDP.tiles[tileMipLevel]; + CachedTexture tmptex = *_pTexture; + tmptex.tMem = mipTile.tmem; + tmptex.palette = mipTile.palette; + tmptex.maskS = mipTile.masks; + tmptex.maskT = mipTile.maskt; + tmptex.format = mipTile.format; + tmptex.size = mipTile.size; + TexLoadData ldata; + GHQTexInfo ghqTexInfo; + if (_calculateHiresTextureCRC(gSP.texture.tile + mipLevel + 1, &tmptex, ldata)) { + // TODO: fix problem with zero texture dimensions on GLideNHQ side. + auto hirestexFound = txfilter_hirestex(_pTexture->crc, ldata.ricecrc, ldata.palette, N64FormatSize(_pTexture->format, _pTexture->size), &ghqTexInfo); + if (!hirestexFound) { + // Texture with RiceCRC was not found. Try alternative CRC. + hirestexFound = txfilter_hirestex(_pTexture->crc, ldata.strongcrc, ldata.palette, N64FormatSize(_pTexture->format, _pTexture->size), &ghqTexInfo); + } + } + if (ghqTexInfo.data != nullptr && ghqTexInfo.width != 0 && ghqTexInfo.height != 0) { + pTileData = reinterpret_cast(ghqTexInfo.data); + texWidth = ghqTexInfo.width; + texHeight = ghqTexInfo.height; + } else { + imageResizer.resizeImage((u8*)pTileData, texWidth, texHeight, (u8*)tileData.data(), texWidth >> shifts, texHeight >> shiftt, 4); + pTileData = tileData.data(); + texWidth >>= shifts; + texHeight >>= shiftt; + } mipRatioS = mipRatioSNew; mipRatioT = mipRatioTNew; - pTileData = tileData.data(); } } @@ -1228,84 +1341,21 @@ void _loadHiresTextureMipMapAccurate(CachedTexture *_pTexture, GHQTexInfo const& bool TextureCache::_loadHiresTexture(u32 _tile, CachedTexture *_pTexture, u64 & _ricecrc, u64 & _strongcrc) { - if (config.textureFilter.txHiresEnable == 0 || !TFH.isInited()) + TexLoadData ldata; + if (!_calculateHiresTextureCRC(_tile, _pTexture, ldata)) return false; - gDPLoadTileInfo & info = gDP.loadInfo[_pTexture->tMem]; - - // Temporal workaround for crash problem with mip-mapped textures. See #1711 for details. - // TODO: make proper fix. - if (info.texAddress == 0) - return false; - - int bpl; - int width, height; - u8 * addr = (u8*)(RDRAM + info.texAddress); - if (info.loadType == LOADTYPE_TILE) { - bpl = info.texWidth << info.size >> 1; - addr += (info.ult * bpl) + (((info.uls << info.size) + 1) >> 1); - - width = min(info.width, info.texWidth); - if (info.size > _pTexture->size) - width <<= info.size - _pTexture->size; - - height = info.height; - if ((config.generalEmulation.hacks & hack_MK64) != 0 && (height % 2) != 0) - height--; - } else { - const gDPTile * pTile = gSP.textureTile[_tile]; - int tile_width = pTile->lrs - pTile->uls + 1; - int tile_height = pTile->lrt - pTile->ult + 1; - - int mask_width = (pTile->masks == 0) ? (tile_width) : (1 << pTile->masks); - int mask_height = (pTile->maskt == 0) ? (tile_height) : (1 << pTile->maskt); - - if ((pTile->clamps && tile_width <= 256)) - width = min(mask_width, tile_width); - else - width = mask_width; + _ricecrc = ldata.ricecrc; + _strongcrc = ldata.strongcrc; - if ((pTile->clampt && tile_height <= 256) || (mask_height > 256)) - height = min(mask_height, tile_height); - else - height = mask_height; - - if (pTile->size == G_IM_SIZ_32b) - bpl = pTile->line << 4; - else if (info.dxt == 0) - bpl = pTile->line << 3; - else { - u32 dxt = info.dxt; - if (dxt > 1) - dxt = ReverseDXT(dxt, info.width, _pTexture->width, _pTexture->size); - bpl = dxt << 3; - } - } - - u8 * paladdr = nullptr; - u16 * palette = nullptr; - if ((_pTexture->size < G_IM_SIZ_16b) && (gDP.otherMode.textureLUT != G_TT_NONE || _pTexture->format == G_IM_FMT_CI)) { - if (_pTexture->size == G_IM_SIZ_8b) - paladdr = (u8*)(gDP.TexFilterPalette); - else if (config.textureFilter.txHresAltCRC) - paladdr = (u8*)(gDP.TexFilterPalette + (_pTexture->palette << 5)); - else - paladdr = (u8*)(gDP.TexFilterPalette + (_pTexture->palette << 4)); - // TODO: fix palette load - // palette = (rdp.pal_8 + (gSP.textureTile[_t]->palette << 4)); - } - - _ricecrc = txfilter_checksum(addr, width, height, _pTexture->size, bpl, paladdr); - if (config.textureFilter.txStrongCRC) - _strongcrc = txfilter_checksum_strong(addr, width, height, _pTexture->size, bpl, paladdr); GHQTexInfo ghqTexInfo; // TODO: fix problem with zero texture dimensions on GLideNHQ side. - auto hirestexFound = txfilter_hirestex(_pTexture->crc, _ricecrc, palette, N64FormatSize(_pTexture->format, _pTexture->size), &ghqTexInfo); + auto hirestexFound = txfilter_hirestex(_pTexture->crc, _ricecrc, ldata.palette, N64FormatSize(_pTexture->format, _pTexture->size), &ghqTexInfo); if (!hirestexFound) { // Texture with RiceCRC was not found. Try alternative CRC. if (_strongcrc == 0U) - _strongcrc = txfilter_checksum_strong(addr, width, height, _pTexture->size, bpl, paladdr); - hirestexFound = txfilter_hirestex(_pTexture->crc, _strongcrc, palette, N64FormatSize(_pTexture->format, _pTexture->size), &ghqTexInfo); + _strongcrc = txfilter_checksum_strong(ldata.addr, ldata.width, ldata.height, _pTexture->size, ldata.bpl, ldata.paladdr); + hirestexFound = txfilter_hirestex(_pTexture->crc, _strongcrc, ldata.palette, N64FormatSize(_pTexture->format, _pTexture->size), &ghqTexInfo); } if (hirestexFound && ghqTexInfo.width != 0 && ghqTexInfo.height != 0) { if (config.generalEmulation.enableInaccurateTextureCoordinates == 0 && @@ -1318,8 +1368,8 @@ bool TextureCache::_loadHiresTexture(u32 _tile, CachedTexture *_pTexture, u64 & } if (_pTexture->max_level > 0) { - _loadHiresTextureMipMapAccurate(_pTexture, ghqTexInfo, _ricecrc); - _updateCachedTexture(ghqTexInfo, _pTexture, width, height); + _loadHiresTextureMipMapAccurate(_pTexture, ghqTexInfo/*, _ricecrc*/); + _updateCachedTexture(ghqTexInfo, _pTexture, ldata.width, ldata.height); return true; } @@ -1337,7 +1387,7 @@ bool TextureCache::_loadHiresTexture(u32 _tile, CachedTexture *_pTexture, u64 & params.textureUnitIndex = textureIndices::Tex[_tile]; gfxContext.init2DTexture(params); assert(!gfxContext.isError()); - _updateCachedTexture(ghqTexInfo, _pTexture, width, height); + _updateCachedTexture(ghqTexInfo, _pTexture, ldata.width, ldata.height); return true; } @@ -1697,8 +1747,8 @@ void TextureCache::_loadAccurate(u32 _tile, CachedTexture *_pTexture) gDPTile * pTile = _tile < 2 ? gSP.textureTile[_tile] : &gDP.tiles[_tile]; const u32 tMemMask = gDP.otherMode.textureLUT == G_TT_NONE ? 0x1FF : 0xFF; gDPLoadTileInfo &info = gDP.loadInfo[pTile->tmem & tMemMask]; - if (info.texAddress == 0x0071a0f0 || info.texAddress == 0x00719ef0) - int t = 0; + //if (info.texAddress == 0x0071a0f0 || info.texAddress == 0x00719ef0) + // int t = 0; u64 ricecrc = 0; u64 strongcrc = 0; @@ -1777,15 +1827,18 @@ void TextureCache::_loadAccurate(u32 _tile, CachedTexture *_pTexture) config.textureFilter.txHiresEnable != 0 && config.hotkeys.enabledKeys[Config::HotKey::hkTexDump] != 0) || config.textureFilter.txDump) { - config.textureFilter.txStrongCRC ? - txfilter_dmptx_strong((u8*)(m_tempTextureHolder.data() + texDataOffset), tmptex.width, tmptex.height, - tmptex.width, (u16)u32(glInternalFormat), - N64FormatSize(_pTexture->format, _pTexture->size), - strongcrc) : - txfilter_dmptx((u8*)(m_tempTextureHolder.data() + texDataOffset), tmptex.width, tmptex.height, - tmptex.width, (u16)u32(glInternalFormat), - N64FormatSize(_pTexture->format, _pTexture->size), - ricecrc); + TexLoadData ldata; + if (_calculateHiresTextureCRC(gSP.texture.tile + mipLevel + 1, &tmptex, ldata)) { + config.textureFilter.txStrongCRC ? + txfilter_dmptx_strong(reinterpret_cast(&m_tempTextureHolder[texDataOffset]), + tmptex.width, tmptex.height, tmptex.width, (u16)u32(glInternalFormat), + N64FormatSize(_pTexture->format, _pTexture->size), + ldata.strongcrc) : + txfilter_dmptx(reinterpret_cast(&m_tempTextureHolder[texDataOffset]), + tmptex.width, tmptex.height, tmptex.width, (u16)u32(glInternalFormat), + N64FormatSize(_pTexture->format, _pTexture->size), + ldata.ricecrc); + } } texDataOffset += tmptex.width * tmptex.height; From 3872631fcd607eb1c7fbf0e3326f044bb46bb6ff Mon Sep 17 00:00:00 2001 From: Sergey Lipskiy Date: Sun, 21 Apr 2024 09:00:36 +0700 Subject: [PATCH 04/12] Mipmap folder --- src/GLideNHQ/TxFilter.cpp | 82 +++++++++++++++++++++++++++------ src/GLideNHQ/TxFilter.h | 8 ++++ src/GLideNHQ/TxFilterExport.cpp | 19 ++++++++ src/GLideNHQ/TxFilterExport.h | 19 ++++++++ src/Textures.cpp | 75 +++++++++++++++++++----------- 5 files changed, 162 insertions(+), 41 deletions(-) diff --git a/src/GLideNHQ/TxFilter.cpp b/src/GLideNHQ/TxFilter.cpp index fe5b774e6..4d2ec530a 100644 --- a/src/GLideNHQ/TxFilter.cpp +++ b/src/GLideNHQ/TxFilter.cpp @@ -589,20 +589,48 @@ TxFilter::checksum64strong(uint8 *src, int width, int height, int size, int rowS return 0; } +tx_wstring +TxFilter::getDumpPath(boolean isStrongCrc) +{ + if (_dumpPath.empty() || _ident.empty()) + return tx_wstring(); + + tx_wstring path; + path.assign(_dumpPath); + path.append(wst("/")); + path.append(_ident); + isStrongCrc ? path.append(wst("/GLideNHQ_strong_crc")) : path.append(wst("/GLideNHQ")); + return path; +} + +tx_wstring +TxFilter::getMipMapDumpPath(N64FormatSize n64FmtSz, Checksum r_crc64, boolean isStrongCrc) +{ + tx_wstring path = getDumpPath(isStrongCrc); + if (path.empty()) + return path; + + wchar_t wbuf[256]; + if (n64FmtSz._format == 0x2) + tx_swprintf(wbuf, 256, wst("/%ls#%08X#%01X#%01X#%08X_mipmap"), _ident.c_str(), r_crc64._texture, n64FmtSz._format, n64FmtSz._size, r_crc64._palette); + else + tx_swprintf(wbuf, 256, wst("/%ls#%08X#%01X#%01X_mipmap"), _ident.c_str(), r_crc64._texture, n64FmtSz._format, n64FmtSz._size); + + path.append(wbuf); + return path; +} + boolean -TxFilter::dmptx(uint8 *src, int width, int height, int rowStridePixel, - ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum r_crc64, boolean isStrongCrc) +TxFilter::dmptxImpl(uint8 *src, int width, int height, int rowStridePixel, + ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum r_crc64, tx_wstring const& dumpPath) { assert(gfmt != graphics::colorFormat::RGBA); - if (!_initialized) - return 0; - - if (!(_options & DUMP_TEX)) + if (!_initialized || !(_options & DUMP_TEX) || dumpPath.empty()) return 0; DBG_INFO(80, wst("gfmt = %02x n64fmt = %02x\n"), u32(gfmt), n64FmtSz._format); DBG_INFO(80, wst("hirestex: r_crc64:%08X %08X\n"), - r_crc64._palette, r_crc64._texture); + r_crc64._palette, r_crc64._texture); if (gfmt != graphics::internalcolorFormat::RGBA8) { if (!_txQuantize->quantize(src, _tex1, rowStridePixel, height, gfmt, graphics::internalcolorFormat::RGBA8)) @@ -613,13 +641,9 @@ TxFilter::dmptx(uint8 *src, int width, int height, int rowStridePixel, if (!_dumpPath.empty() && !_ident.empty()) { /* dump it to disk */ FILE *fp = nullptr; - tx_wstring tmpbuf; + tx_wstring tmpbuf = dumpPath; /* create directories */ - tmpbuf.assign(_dumpPath); - tmpbuf.append(wst("/")); - tmpbuf.append(_ident); - isStrongCrc ? tmpbuf.append(wst("/GLideNHQ_strong_crc")) : tmpbuf.append(wst("/GLideNHQ")); if (!osal_path_existsW(tmpbuf.c_str()) && osal_mkdirp(tmpbuf.c_str()) != 0) return 0; @@ -627,7 +651,8 @@ TxFilter::dmptx(uint8 *src, int width, int height, int rowStridePixel, wchar_t wbuf[256]; tx_swprintf(wbuf, 256, wst("/%ls#%08X#%01X#%01X#%08X_ciByRGBA.png"), _ident.c_str(), r_crc64._texture, n64FmtSz._format, n64FmtSz._size, r_crc64._palette); tmpbuf.append(wbuf); - } else { + } + else { wchar_t wbuf[256]; tx_swprintf(wbuf, 256, wst("/%ls#%08X#%01X#%01X_all.png"), _ident.c_str(), r_crc64._texture, n64FmtSz._format, n64FmtSz._size); tmpbuf.append(wbuf); @@ -644,11 +669,40 @@ TxFilter::dmptx(uint8 *src, int width, int height, int rowStridePixel, fclose(fp); return 1; } - } + } return 0; } + +boolean +TxFilter::dmptx(uint8 *src, int width, int height, int rowStridePixel, + ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum r_crc64, boolean isStrongCrc) +{ + return dmptxImpl(src, width, height, rowStridePixel, gfmt, n64FmtSz, r_crc64, getDumpPath(isStrongCrc)); +} + +boolean +TxFilter::dmptxMipmap(uint8 *src, int width, int height, int rowStridePixel, + ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum crc64, Checksum crc64base, boolean isStrongCrc) +{ + tx_wstring path = getMipMapDumpPath(n64FmtSz, crc64base, isStrongCrc); + return dmptxImpl(src, width, height, rowStridePixel, gfmt, n64FmtSz, crc64, path); +} + +//boolean +//TxFilter::dmpMipmap(GHQDumpTexInfo* infos, int numLevel, boolean isStrongCrc) +//{ +// if (!infos || !numLevel) +// return 0; +// tx_wstring path = getMipMapDumpPath(infos[0].n64_format_size, infos[0].checksum, isStrongCrc); +// for (int i = 0; i < numLevel; ++i) { +// GHQDumpTexInfo& info = infos[i]; +// dmptxImpl(info.data, info.width, info.height, info.stride, info.texture_format, info.n64_format_size, info.checksum, path); +// } +// return 1; +//} + boolean TxFilter::reloadhirestex() { diff --git a/src/GLideNHQ/TxFilter.h b/src/GLideNHQ/TxFilter.h index 75d0aa4b3..96a209202 100644 --- a/src/GLideNHQ/TxFilter.h +++ b/src/GLideNHQ/TxFilter.h @@ -53,6 +53,11 @@ class TxFilter TxImage *_txImage; boolean _initialized; void clear(); + tx_wstring getDumpPath(boolean isStrongCrc); + tx_wstring getMipMapDumpPath(N64FormatSize n64FmtSz, Checksum r_crc64, boolean isStrongCrc); + boolean dmptxImpl(uint8 *src, int width, int height, int rowStridePixel, + ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum r_crc64, tx_wstring const& dumpPath); + public: ~TxFilter(); TxFilter(int maxwidth, @@ -81,6 +86,9 @@ class TxFilter uint64 checksum64strong(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette); boolean dmptx(uint8 *src, int width, int height, int rowStridePixel, ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum r_crc64, boolean isStrongCrc); + boolean dmptxMipmap(uint8 *src, int width, int height, int rowStridePixel, + ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum crc64, Checksum crc64base, boolean isStrongCrc); + //boolean dmpMipmap(GHQDumpTexInfo* infos, int numLevel, boolean isStrongCrc); boolean reloadhirestex(); void dumpcache(); }; diff --git a/src/GLideNHQ/TxFilterExport.cpp b/src/GLideNHQ/TxFilterExport.cpp index dc892e858..98395b32a 100644 --- a/src/GLideNHQ/TxFilterExport.cpp +++ b/src/GLideNHQ/TxFilterExport.cpp @@ -111,6 +111,25 @@ txfilter_dmptx_strong(uint8 *src, int width, int height, int rowStridePixel, uin return 0; } +TAPI boolean TAPIENTRY +txfilter_dmptx_mipmap(uint8 *src, int width, int height, int rowStridePixel, uint16 gfmt, N64FormatSize n64FmtSz, Checksum crc64, Checksum crc64base, boolean strongCRC) +{ + if (txFilter) + return txFilter->dmptxMipmap(src, width, height, rowStridePixel, ColorFormat(u32(gfmt)), n64FmtSz, crc64, crc64base, strongCRC); + + return 0; +} + +TAPI boolean TAPIENTRY +txfilter_dmp_mipmap(GHQDumpTexInfo* infos, int numLevels) +{ + for (int i = 0; i < numLevels; ++i) { + + } + + return 0; +} + TAPI boolean TAPIENTRY txfilter_reloadhirestex() { diff --git a/src/GLideNHQ/TxFilterExport.h b/src/GLideNHQ/TxFilterExport.h index 9633d04ed..ace133561 100644 --- a/src/GLideNHQ/TxFilterExport.h +++ b/src/GLideNHQ/TxFilterExport.h @@ -165,6 +165,19 @@ struct GHQTexInfo N64FormatSize n64_format_size{ 0u, 0u }; }; +struct GHQDumpTexInfo +{ + GHQDumpTexInfo(): checksum(0) {} + ~GHQDumpTexInfo() {} + unsigned char *data{ nullptr }; + unsigned int width{ 0u }; + unsigned int height{ 0u }; + unsigned int stride{ 0u }; + unsigned short texture_format{ 0u }; + N64FormatSize n64_format_size{ 0u, 0u }; + Checksum checksum; +}; + /* Callback to display hires texture info. * Gonetz * @@ -234,6 +247,12 @@ txfilter_dmptx(uint8 *src, int width, int height, int rowStridePixel, uint16 gfm TAPI boolean TAPIENTRY txfilter_dmptx_strong(uint8 *src, int width, int height, int rowStridePixel, uint16 gfmt, N64FormatSize n64FmtSz, Checksum r_crc64); +TAPI boolean TAPIENTRY +txfilter_dmptx_mipmap(uint8 *src, int width, int height, int rowStridePixel, uint16 gfmt, N64FormatSize n64FmtSz, Checksum crc64, Checksum crc64base, boolean strongCRC); + +TAPI boolean TAPIENTRY +txfilter_dmp_mipmap(GHQDumpTexInfo* infos, int numLevels); + TAPI boolean TAPIENTRY txfilter_reloadhirestex(); diff --git a/src/Textures.cpp b/src/Textures.cpp index dfa36037f..50586376d 100644 --- a/src/Textures.cpp +++ b/src/Textures.cpp @@ -1347,6 +1347,7 @@ bool TextureCache::_loadHiresTexture(u32 _tile, CachedTexture *_pTexture, u64 & _ricecrc = ldata.ricecrc; _strongcrc = ldata.strongcrc; + u32 tile = _tile - gSP.texture.tile; GHQTexInfo ghqTexInfo; // TODO: fix problem with zero texture dimensions on GLideNHQ side. @@ -1359,7 +1360,7 @@ bool TextureCache::_loadHiresTexture(u32 _tile, CachedTexture *_pTexture, u64 & } if (hirestexFound && ghqTexInfo.width != 0 && ghqTexInfo.height != 0) { if (config.generalEmulation.enableInaccurateTextureCoordinates == 0 && - _tile > 0 && + tile > 0 && currentCombiner()->usesLOD() && gSP.texture.level > 1) { _pTexture->max_level = gDP.otherMode.textureDetail == G_TD_DETAIL ? @@ -1384,7 +1385,7 @@ bool TextureCache::_loadHiresTexture(u32 _tile, CachedTexture *_pTexture, u64 & params.format = ColorFormatParam(ghqTexInfo.texture_format); params.dataType = DatatypeParam(ghqTexInfo.pixel_type); params.data = ghqTexInfo.data; - params.textureUnitIndex = textureIndices::Tex[_tile]; + params.textureUnitIndex = textureIndices::Tex[tile]; gfxContext.init2DTexture(params); assert(!gfxContext.isError()); _updateCachedTexture(ghqTexInfo, _pTexture, ldata.width, ldata.height); @@ -1744,21 +1745,22 @@ void TextureCache::_loadFast(u32 _tile, CachedTexture *_pTexture) void TextureCache::_loadAccurate(u32 _tile, CachedTexture *_pTexture) { - gDPTile * pTile = _tile < 2 ? gSP.textureTile[_tile] : &gDP.tiles[_tile]; - const u32 tMemMask = gDP.otherMode.textureLUT == G_TT_NONE ? 0x1FF : 0xFF; - gDPLoadTileInfo &info = gDP.loadInfo[pTile->tmem & tMemMask]; + //gDPTile * pTile = _tile < 2 ? gSP.textureTile[_tile] : &gDP.tiles[_tile]; + //const u32 tMemMask = gDP.otherMode.textureLUT == G_TT_NONE ? 0x1FF : 0xFF; + //gDPLoadTileInfo &info = gDP.loadInfo[pTile->tmem & tMemMask]; //if (info.texAddress == 0x0071a0f0 || info.texAddress == 0x00719ef0) // int t = 0; u64 ricecrc = 0; u64 strongcrc = 0; - if (_loadHiresTexture(_tile, _pTexture, ricecrc, strongcrc)) + if (_loadHiresTexture(gSP.texture.tile + _tile, _pTexture, ricecrc, strongcrc)) return; bool force32bitFormat = false; _pTexture->max_level = 0; - if (currentCombiner()->usesLOD() && gSP.texture.level > 1 && _tile > 0) { + const bool isMipMapTex = currentCombiner()->usesLOD() && gSP.texture.level > 1; + if (isMipMapTex && _tile > 0) { _pTexture->max_level = gDP.otherMode.textureDetail == G_TD_DETAIL ? static_cast(gSP.texture.level) : static_cast(gSP.texture.level - 1); @@ -1803,12 +1805,26 @@ void TextureCache::_loadAccurate(u32 _tile, CachedTexture *_pTexture) CachedTexture tmptex = *_pTexture; u16 line = tmptex.line; + const bool needDump = (m_toggleDumpTex && + config.textureFilter.txHiresEnable != 0 && + config.hotkeys.enabledKeys[Config::HotKey::hkTexDump] != 0) || + config.textureFilter.txDump; if (_pTexture->max_level > 0) { u32 mipLevel = 0; u32 texDataOffset = 16; // number of gDP.tiles * 2 + u64 ricecrcbase = 0u; + u64 strongcrcbase = 0u; + if (needDump) { + TexLoadData ldata; + if (_calculateHiresTextureCRC(gSP.texture.tile, current[0], ldata)) { + ricecrcbase = ldata.ricecrc; + strongcrcbase = ldata.strongcrc; + } + } + // Load all tiles into one 1D texture atlas. while (true) { @@ -1823,21 +1839,18 @@ void TextureCache::_loadAccurate(u32 _tile, CachedTexture *_pTexture) getLoadParams(tmptex.format, tmptex.size); _getTextureDestData(tmptex, &m_tempTextureHolder[texDataOffset], glInternalFormat, GetTexel, &line); - if ((m_toggleDumpTex && - config.textureFilter.txHiresEnable != 0 && - config.hotkeys.enabledKeys[Config::HotKey::hkTexDump] != 0) || - config.textureFilter.txDump) { + if (needDump) { TexLoadData ldata; if (_calculateHiresTextureCRC(gSP.texture.tile + mipLevel + 1, &tmptex, ldata)) { config.textureFilter.txStrongCRC ? - txfilter_dmptx_strong(reinterpret_cast(&m_tempTextureHolder[texDataOffset]), + txfilter_dmptx_mipmap(reinterpret_cast(&m_tempTextureHolder[texDataOffset]), tmptex.width, tmptex.height, tmptex.width, (u16)u32(glInternalFormat), N64FormatSize(_pTexture->format, _pTexture->size), - ldata.strongcrc) : - txfilter_dmptx(reinterpret_cast(&m_tempTextureHolder[texDataOffset]), + ldata.strongcrc, strongcrcbase, TRUE) : + txfilter_dmptx_mipmap(reinterpret_cast(&m_tempTextureHolder[texDataOffset]), tmptex.width, tmptex.height, tmptex.width, (u16)u32(glInternalFormat), N64FormatSize(_pTexture->format, _pTexture->size), - ldata.ricecrc); + ldata.ricecrc, ricecrcbase, FALSE); } } @@ -1889,19 +1902,27 @@ void TextureCache::_loadAccurate(u32 _tile, CachedTexture *_pTexture) return; } - if ((m_toggleDumpTex && - config.textureFilter.txHiresEnable != 0 && - config.hotkeys.enabledKeys[Config::HotKey::hkTexDump] != 0) || - config.textureFilter.txDump) { + if (needDump) { + if (isMipMapTex) { config.textureFilter.txStrongCRC ? - txfilter_dmptx_strong((u8*)m_tempTextureHolder.data(), tmptex.width, tmptex.height, - tmptex.width, (u16)u32(glInternalFormat), - N64FormatSize(_pTexture->format, _pTexture->size), - strongcrc) : - txfilter_dmptx((u8*)m_tempTextureHolder.data(), tmptex.width, tmptex.height, - tmptex.width, (u16)u32(glInternalFormat), - N64FormatSize(_pTexture->format, _pTexture->size), - ricecrc); + txfilter_dmptx_mipmap(reinterpret_cast(m_tempTextureHolder.data()), + tmptex.width, tmptex.height, tmptex.width, (u16)u32(glInternalFormat), + N64FormatSize(_pTexture->format, _pTexture->size), + strongcrc, strongcrc, TRUE) : + txfilter_dmptx_mipmap(reinterpret_cast(m_tempTextureHolder.data()), + tmptex.width, tmptex.height, tmptex.width, (u16)u32(glInternalFormat), + N64FormatSize(_pTexture->format, _pTexture->size), + ricecrc, ricecrc, FALSE); + } + else { + config.textureFilter.txStrongCRC ? + txfilter_dmptx_strong(reinterpret_cast(m_tempTextureHolder.data()), + tmptex.width, tmptex.height, tmptex.width, (u16)u32(glInternalFormat), + N64FormatSize(_pTexture->format, _pTexture->size), strongcrc) : + txfilter_dmptx(reinterpret_cast(m_tempTextureHolder.data()), + tmptex.width, tmptex.height, tmptex.width, (u16)u32(glInternalFormat), + N64FormatSize(_pTexture->format, _pTexture->size), ricecrc); + } } bool bLoaded = false; From a2ca202a98824c141111f8a4dd0882753f30f522 Mon Sep 17 00:00:00 2001 From: Sergey Lipskiy Date: Sun, 21 Apr 2024 10:40:42 +0700 Subject: [PATCH 05/12] Cleanup --- src/GLideNHQ/TxFilter.cpp | 78 +++++++++++++-------------------- src/GLideNHQ/TxFilter.h | 7 ++- src/GLideNHQ/TxFilterExport.cpp | 10 ----- src/GLideNHQ/TxFilterExport.h | 16 ------- src/Textures.cpp | 20 ++++----- 5 files changed, 44 insertions(+), 87 deletions(-) diff --git a/src/GLideNHQ/TxFilter.cpp b/src/GLideNHQ/TxFilter.cpp index 4d2ec530a..bf3aa6b8f 100644 --- a/src/GLideNHQ/TxFilter.cpp +++ b/src/GLideNHQ/TxFilter.cpp @@ -590,7 +590,7 @@ TxFilter::checksum64strong(uint8 *src, int width, int height, int size, int rowS } tx_wstring -TxFilter::getDumpPath(boolean isStrongCrc) +TxFilter::getTexDumpPath(boolean isStrongCrc) { if (_dumpPath.empty() || _ident.empty()) return tx_wstring(); @@ -604,17 +604,21 @@ TxFilter::getDumpPath(boolean isStrongCrc) } tx_wstring -TxFilter::getMipMapDumpPath(N64FormatSize n64FmtSz, Checksum r_crc64, boolean isStrongCrc) +TxFilter::getMipMapTexDumpPath(N64FormatSize n64FmtSz, Checksum r_crc64, boolean isStrongCrc) { - tx_wstring path = getDumpPath(isStrongCrc); + tx_wstring path = getTexDumpPath(isStrongCrc); if (path.empty()) return path; wchar_t wbuf[256]; +#if 1 + tx_swprintf(wbuf, 256, wst("/%ls#%08X_mipmap"), _ident.c_str(), r_crc64._texture); +#else if (n64FmtSz._format == 0x2) tx_swprintf(wbuf, 256, wst("/%ls#%08X#%01X#%01X#%08X_mipmap"), _ident.c_str(), r_crc64._texture, n64FmtSz._format, n64FmtSz._size, r_crc64._palette); else tx_swprintf(wbuf, 256, wst("/%ls#%08X#%01X#%01X_mipmap"), _ident.c_str(), r_crc64._texture, n64FmtSz._format, n64FmtSz._size); +#endif path.append(wbuf); return path; @@ -622,10 +626,10 @@ TxFilter::getMipMapDumpPath(N64FormatSize n64FmtSz, Checksum r_crc64, boolean is boolean TxFilter::dmptxImpl(uint8 *src, int width, int height, int rowStridePixel, - ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum r_crc64, tx_wstring const& dumpPath) + ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum r_crc64, tx_wstring const& texDumpPath) { assert(gfmt != graphics::colorFormat::RGBA); - if (!_initialized || !(_options & DUMP_TEX) || dumpPath.empty()) + if (!_initialized || !(_options & DUMP_TEX) || texDumpPath.empty()) return 0; DBG_INFO(80, wst("gfmt = %02x n64fmt = %02x\n"), u32(gfmt), n64FmtSz._format); @@ -638,71 +642,51 @@ TxFilter::dmptxImpl(uint8 *src, int width, int height, int rowStridePixel, src = _tex1; } - if (!_dumpPath.empty() && !_ident.empty()) { - /* dump it to disk */ - FILE *fp = nullptr; - tx_wstring tmpbuf = dumpPath; + /* dump it to disk */ + FILE *fp = nullptr; + tx_wstring tmpbuf = texDumpPath; - /* create directories */ - if (!osal_path_existsW(tmpbuf.c_str()) && osal_mkdirp(tmpbuf.c_str()) != 0) - return 0; + /* create directories */ + if (!osal_path_existsW(tmpbuf.c_str()) && osal_mkdirp(tmpbuf.c_str()) != 0) + return 0; - if (n64FmtSz._format == 0x2) { - wchar_t wbuf[256]; - tx_swprintf(wbuf, 256, wst("/%ls#%08X#%01X#%01X#%08X_ciByRGBA.png"), _ident.c_str(), r_crc64._texture, n64FmtSz._format, n64FmtSz._size, r_crc64._palette); - tmpbuf.append(wbuf); - } - else { - wchar_t wbuf[256]; - tx_swprintf(wbuf, 256, wst("/%ls#%08X#%01X#%01X_all.png"), _ident.c_str(), r_crc64._texture, n64FmtSz._format, n64FmtSz._size); - tmpbuf.append(wbuf); - } + wchar_t wbuf[256]; + if (n64FmtSz._format == 0x2) + tx_swprintf(wbuf, 256, wst("/%ls#%08X#%01X#%01X#%08X_ciByRGBA.png"), _ident.c_str(), r_crc64._texture, n64FmtSz._format, n64FmtSz._size, r_crc64._palette); + else + tx_swprintf(wbuf, 256, wst("/%ls#%08X#%01X#%01X_all.png"), _ident.c_str(), r_crc64._texture, n64FmtSz._format, n64FmtSz._size); + tmpbuf.append(wbuf); #ifdef OS_WINDOWS - if ((fp = _wfopen(tmpbuf.c_str(), wst("wb"))) != nullptr) { + if ((fp = _wfopen(tmpbuf.c_str(), wst("wb"))) != nullptr) { #else - char cbuf[MAX_PATH]; - wcstombs(cbuf, tmpbuf.c_str(), MAX_PATH); - if ((fp = fopen(cbuf, "wb")) != nullptr) { + char cbuf[MAX_PATH]; + wcstombs(cbuf, tmpbuf.c_str(), MAX_PATH); + if ((fp = fopen(cbuf, "wb")) != nullptr) { #endif - _txImage->writePNG(src, fp, width, height, (rowStridePixel << 2), graphics::internalcolorFormat::RGBA8); - fclose(fp); - return 1; - } - } + _txImage->writePNG(src, fp, width, height, (rowStridePixel << 2), graphics::internalcolorFormat::RGBA8); + fclose(fp); + return 1; + } return 0; } - boolean TxFilter::dmptx(uint8 *src, int width, int height, int rowStridePixel, ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum r_crc64, boolean isStrongCrc) { - return dmptxImpl(src, width, height, rowStridePixel, gfmt, n64FmtSz, r_crc64, getDumpPath(isStrongCrc)); + return dmptxImpl(src, width, height, rowStridePixel, gfmt, n64FmtSz, r_crc64, getTexDumpPath(isStrongCrc)); } boolean TxFilter::dmptxMipmap(uint8 *src, int width, int height, int rowStridePixel, ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum crc64, Checksum crc64base, boolean isStrongCrc) { - tx_wstring path = getMipMapDumpPath(n64FmtSz, crc64base, isStrongCrc); + tx_wstring path = getMipMapTexDumpPath(n64FmtSz, crc64base, isStrongCrc); return dmptxImpl(src, width, height, rowStridePixel, gfmt, n64FmtSz, crc64, path); } -//boolean -//TxFilter::dmpMipmap(GHQDumpTexInfo* infos, int numLevel, boolean isStrongCrc) -//{ -// if (!infos || !numLevel) -// return 0; -// tx_wstring path = getMipMapDumpPath(infos[0].n64_format_size, infos[0].checksum, isStrongCrc); -// for (int i = 0; i < numLevel; ++i) { -// GHQDumpTexInfo& info = infos[i]; -// dmptxImpl(info.data, info.width, info.height, info.stride, info.texture_format, info.n64_format_size, info.checksum, path); -// } -// return 1; -//} - boolean TxFilter::reloadhirestex() { diff --git a/src/GLideNHQ/TxFilter.h b/src/GLideNHQ/TxFilter.h index 96a209202..c10147599 100644 --- a/src/GLideNHQ/TxFilter.h +++ b/src/GLideNHQ/TxFilter.h @@ -53,10 +53,10 @@ class TxFilter TxImage *_txImage; boolean _initialized; void clear(); - tx_wstring getDumpPath(boolean isStrongCrc); - tx_wstring getMipMapDumpPath(N64FormatSize n64FmtSz, Checksum r_crc64, boolean isStrongCrc); + tx_wstring getTexDumpPath(boolean isStrongCrc); + tx_wstring getMipMapTexDumpPath(N64FormatSize n64FmtSz, Checksum r_crc64, boolean isStrongCrc); boolean dmptxImpl(uint8 *src, int width, int height, int rowStridePixel, - ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum r_crc64, tx_wstring const& dumpPath); + ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum r_crc64, tx_wstring const& texDumpPath); public: ~TxFilter(); @@ -88,7 +88,6 @@ class TxFilter ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum r_crc64, boolean isStrongCrc); boolean dmptxMipmap(uint8 *src, int width, int height, int rowStridePixel, ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum crc64, Checksum crc64base, boolean isStrongCrc); - //boolean dmpMipmap(GHQDumpTexInfo* infos, int numLevel, boolean isStrongCrc); boolean reloadhirestex(); void dumpcache(); }; diff --git a/src/GLideNHQ/TxFilterExport.cpp b/src/GLideNHQ/TxFilterExport.cpp index 98395b32a..c12c6e6a9 100644 --- a/src/GLideNHQ/TxFilterExport.cpp +++ b/src/GLideNHQ/TxFilterExport.cpp @@ -120,16 +120,6 @@ txfilter_dmptx_mipmap(uint8 *src, int width, int height, int rowStridePixel, uin return 0; } -TAPI boolean TAPIENTRY -txfilter_dmp_mipmap(GHQDumpTexInfo* infos, int numLevels) -{ - for (int i = 0; i < numLevels; ++i) { - - } - - return 0; -} - TAPI boolean TAPIENTRY txfilter_reloadhirestex() { diff --git a/src/GLideNHQ/TxFilterExport.h b/src/GLideNHQ/TxFilterExport.h index ace133561..916b56a6f 100644 --- a/src/GLideNHQ/TxFilterExport.h +++ b/src/GLideNHQ/TxFilterExport.h @@ -165,19 +165,6 @@ struct GHQTexInfo N64FormatSize n64_format_size{ 0u, 0u }; }; -struct GHQDumpTexInfo -{ - GHQDumpTexInfo(): checksum(0) {} - ~GHQDumpTexInfo() {} - unsigned char *data{ nullptr }; - unsigned int width{ 0u }; - unsigned int height{ 0u }; - unsigned int stride{ 0u }; - unsigned short texture_format{ 0u }; - N64FormatSize n64_format_size{ 0u, 0u }; - Checksum checksum; -}; - /* Callback to display hires texture info. * Gonetz * @@ -250,9 +237,6 @@ txfilter_dmptx_strong(uint8 *src, int width, int height, int rowStridePixel, uin TAPI boolean TAPIENTRY txfilter_dmptx_mipmap(uint8 *src, int width, int height, int rowStridePixel, uint16 gfmt, N64FormatSize n64FmtSz, Checksum crc64, Checksum crc64base, boolean strongCRC); -TAPI boolean TAPIENTRY -txfilter_dmp_mipmap(GHQDumpTexInfo* infos, int numLevels); - TAPI boolean TAPIENTRY txfilter_reloadhirestex(); diff --git a/src/Textures.cpp b/src/Textures.cpp index 50586376d..7731359eb 100644 --- a/src/Textures.cpp +++ b/src/Textures.cpp @@ -1171,10 +1171,13 @@ static bool _calculateHiresTextureCRC(u32 _tileIdx, CachedTexture *_pTexture, Te gDPLoadTileInfo & info = gDP.loadInfo[_pTexture->tMem]; - // Temporal workaround for crash problem with mip-mapped textures. See #1711 for details. - // TODO: make proper fix. - if (info.texAddress == 0) - return false; + bool mipMapWorkaround = info.texAddress == 0U; + if (mipMapWorkaround) + { + // Workaround for crash problem with mip-mapped textures. See #1711 for details. + info = gDP.loadInfo[TextureCache::get().current[0]->tMem]; + info.texAddress += _pTexture->tMem << 3; + } _ldata.addr = (u8*)(RDRAM + info.texAddress); if (info.loadType == LOADTYPE_TILE) { @@ -1236,6 +1239,9 @@ static bool _calculateHiresTextureCRC(u32 _tileIdx, CachedTexture *_pTexture, Te if (config.textureFilter.txStrongCRC) _ldata.strongcrc = txfilter_checksum_strong(_ldata.addr, _ldata.width, _ldata.height, _pTexture->size, _ldata.bpl, _ldata.paladdr); + if (mipMapWorkaround) + info.texAddress = 0U; + return true; } @@ -1745,12 +1751,6 @@ void TextureCache::_loadFast(u32 _tile, CachedTexture *_pTexture) void TextureCache::_loadAccurate(u32 _tile, CachedTexture *_pTexture) { - //gDPTile * pTile = _tile < 2 ? gSP.textureTile[_tile] : &gDP.tiles[_tile]; - //const u32 tMemMask = gDP.otherMode.textureLUT == G_TT_NONE ? 0x1FF : 0xFF; - //gDPLoadTileInfo &info = gDP.loadInfo[pTile->tmem & tMemMask]; - //if (info.texAddress == 0x0071a0f0 || info.texAddress == 0x00719ef0) - // int t = 0; - u64 ricecrc = 0; u64 strongcrc = 0; if (_loadHiresTexture(gSP.texture.tile + _tile, _pTexture, ricecrc, strongcrc)) From 7e3204f78c84a25b19a09aedfdca72f7a3efe134 Mon Sep 17 00:00:00 2001 From: Sergey Lipskiy Date: Sun, 21 Apr 2024 11:39:59 +0700 Subject: [PATCH 06/12] Store mipmaps with detail tile in *_detail folder. --- src/GLideNHQ/TxFilter.cpp | 25 +++++++++--------- src/GLideNHQ/TxFilter.h | 4 +-- src/GLideNHQ/TxFilterExport.cpp | 5 ++-- src/GLideNHQ/TxFilterExport.h | 3 ++- src/Textures.cpp | 45 ++++++++++++++++----------------- 5 files changed, 41 insertions(+), 41 deletions(-) diff --git a/src/GLideNHQ/TxFilter.cpp b/src/GLideNHQ/TxFilter.cpp index bf3aa6b8f..f147ea594 100644 --- a/src/GLideNHQ/TxFilter.cpp +++ b/src/GLideNHQ/TxFilter.cpp @@ -604,23 +604,22 @@ TxFilter::getTexDumpPath(boolean isStrongCrc) } tx_wstring -TxFilter::getMipMapTexDumpPath(N64FormatSize n64FmtSz, Checksum r_crc64, boolean isStrongCrc) +TxFilter::getMipMapTexDumpPath(N64FormatSize n64FmtSz, Checksum detailedTileCRC, Checksum firstTileCRC, boolean isStrongCrc) { tx_wstring path = getTexDumpPath(isStrongCrc); if (path.empty()) return path; wchar_t wbuf[256]; -#if 1 - tx_swprintf(wbuf, 256, wst("/%ls#%08X_mipmap"), _ident.c_str(), r_crc64._texture); -#else - if (n64FmtSz._format == 0x2) - tx_swprintf(wbuf, 256, wst("/%ls#%08X#%01X#%01X#%08X_mipmap"), _ident.c_str(), r_crc64._texture, n64FmtSz._format, n64FmtSz._size, r_crc64._palette); - else - tx_swprintf(wbuf, 256, wst("/%ls#%08X#%01X#%01X_mipmap"), _ident.c_str(), r_crc64._texture, n64FmtSz._format, n64FmtSz._size); -#endif + if (detailedTileCRC._texture != 0u) { + tx_swprintf(wbuf, 256, wst("/%ls#%08X_detail"), _ident.c_str(), detailedTileCRC._texture); + path.append(wbuf); + } + if (firstTileCRC._texture != 0u) { + tx_swprintf(wbuf, 256, wst("/%ls#%08X_mipmap"), _ident.c_str(), firstTileCRC._texture); + path.append(wbuf); + } - path.append(wbuf); return path; } @@ -681,10 +680,10 @@ TxFilter::dmptx(uint8 *src, int width, int height, int rowStridePixel, boolean TxFilter::dmptxMipmap(uint8 *src, int width, int height, int rowStridePixel, - ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum crc64, Checksum crc64base, boolean isStrongCrc) + ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum detailedTileCRC, Checksum firstTileCRC, Checksum tileCRC, boolean isStrongCrc) { - tx_wstring path = getMipMapTexDumpPath(n64FmtSz, crc64base, isStrongCrc); - return dmptxImpl(src, width, height, rowStridePixel, gfmt, n64FmtSz, crc64, path); + tx_wstring path = getMipMapTexDumpPath(n64FmtSz, detailedTileCRC, firstTileCRC, isStrongCrc); + return dmptxImpl(src, width, height, rowStridePixel, gfmt, n64FmtSz, tileCRC, path); } boolean diff --git a/src/GLideNHQ/TxFilter.h b/src/GLideNHQ/TxFilter.h index c10147599..4e8b39ddb 100644 --- a/src/GLideNHQ/TxFilter.h +++ b/src/GLideNHQ/TxFilter.h @@ -54,7 +54,7 @@ class TxFilter boolean _initialized; void clear(); tx_wstring getTexDumpPath(boolean isStrongCrc); - tx_wstring getMipMapTexDumpPath(N64FormatSize n64FmtSz, Checksum r_crc64, boolean isStrongCrc); + tx_wstring getMipMapTexDumpPath(N64FormatSize n64FmtSz, Checksum detailedTileCRC, Checksum firstTileCRC, boolean isStrongCrc); boolean dmptxImpl(uint8 *src, int width, int height, int rowStridePixel, ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum r_crc64, tx_wstring const& texDumpPath); @@ -87,7 +87,7 @@ class TxFilter boolean dmptx(uint8 *src, int width, int height, int rowStridePixel, ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum r_crc64, boolean isStrongCrc); boolean dmptxMipmap(uint8 *src, int width, int height, int rowStridePixel, - ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum crc64, Checksum crc64base, boolean isStrongCrc); + ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum detailedTileCRC, Checksum firstTileCRC, Checksum tileCRC, boolean isStrongCrc); boolean reloadhirestex(); void dumpcache(); }; diff --git a/src/GLideNHQ/TxFilterExport.cpp b/src/GLideNHQ/TxFilterExport.cpp index c12c6e6a9..d88bbd7c9 100644 --- a/src/GLideNHQ/TxFilterExport.cpp +++ b/src/GLideNHQ/TxFilterExport.cpp @@ -112,10 +112,11 @@ txfilter_dmptx_strong(uint8 *src, int width, int height, int rowStridePixel, uin } TAPI boolean TAPIENTRY -txfilter_dmptx_mipmap(uint8 *src, int width, int height, int rowStridePixel, uint16 gfmt, N64FormatSize n64FmtSz, Checksum crc64, Checksum crc64base, boolean strongCRC) +txfilter_dmptx_mipmap(uint8 *src, int width, int height, int rowStridePixel, uint16 gfmt, N64FormatSize n64FmtSz, + Checksum detailedTileCRC, Checksum firstTileCRC, Checksum tileCRC, boolean isStrongCRC) { if (txFilter) - return txFilter->dmptxMipmap(src, width, height, rowStridePixel, ColorFormat(u32(gfmt)), n64FmtSz, crc64, crc64base, strongCRC); + return txFilter->dmptxMipmap(src, width, height, rowStridePixel, ColorFormat(u32(gfmt)), n64FmtSz, detailedTileCRC, firstTileCRC, tileCRC, isStrongCRC); return 0; } diff --git a/src/GLideNHQ/TxFilterExport.h b/src/GLideNHQ/TxFilterExport.h index 916b56a6f..3b5ce0b65 100644 --- a/src/GLideNHQ/TxFilterExport.h +++ b/src/GLideNHQ/TxFilterExport.h @@ -235,7 +235,8 @@ TAPI boolean TAPIENTRY txfilter_dmptx_strong(uint8 *src, int width, int height, int rowStridePixel, uint16 gfmt, N64FormatSize n64FmtSz, Checksum r_crc64); TAPI boolean TAPIENTRY -txfilter_dmptx_mipmap(uint8 *src, int width, int height, int rowStridePixel, uint16 gfmt, N64FormatSize n64FmtSz, Checksum crc64, Checksum crc64base, boolean strongCRC); +txfilter_dmptx_mipmap(uint8 *src, int width, int height, int rowStridePixel, uint16 gfmt, N64FormatSize n64FmtSz, + Checksum detailedTileCRC, Checksum firstTileCRC, Checksum tileCRC, boolean isStrongCRC); TAPI boolean TAPIENTRY txfilter_reloadhirestex(); diff --git a/src/Textures.cpp b/src/Textures.cpp index 7731359eb..26a1a406d 100644 --- a/src/Textures.cpp +++ b/src/Textures.cpp @@ -1812,19 +1812,22 @@ void TextureCache::_loadAccurate(u32 _tile, CachedTexture *_pTexture) if (_pTexture->max_level > 0) { - u32 mipLevel = 0; - u32 texDataOffset = 16; // number of gDP.tiles * 2 - - u64 ricecrcbase = 0u; - u64 strongcrcbase = 0u; + u64 detailTileCrc = 0U; + u64 firstTileCrc = 0U; if (needDump) { TexLoadData ldata; if (_calculateHiresTextureCRC(gSP.texture.tile, current[0], ldata)) { - ricecrcbase = ldata.ricecrc; - strongcrcbase = ldata.strongcrc; + firstTileCrc = config.textureFilter.txStrongCRC ? ldata.strongcrc : ldata.ricecrc; + if (gDP.otherMode.textureDetail == G_TD_DETAIL) { + detailTileCrc = firstTileCrc; + firstTileCrc = config.textureFilter.txStrongCRC ? strongcrc : ricecrc; + } } } + u32 mipLevel = 0; + u32 texDataOffset = 16; // number of gDP.tiles * 2 + // Load all tiles into one 1D texture atlas. while (true) { @@ -1842,15 +1845,11 @@ void TextureCache::_loadAccurate(u32 _tile, CachedTexture *_pTexture) if (needDump) { TexLoadData ldata; if (_calculateHiresTextureCRC(gSP.texture.tile + mipLevel + 1, &tmptex, ldata)) { - config.textureFilter.txStrongCRC ? - txfilter_dmptx_mipmap(reinterpret_cast(&m_tempTextureHolder[texDataOffset]), - tmptex.width, tmptex.height, tmptex.width, (u16)u32(glInternalFormat), - N64FormatSize(_pTexture->format, _pTexture->size), - ldata.strongcrc, strongcrcbase, TRUE) : - txfilter_dmptx_mipmap(reinterpret_cast(&m_tempTextureHolder[texDataOffset]), + u64 tileCrc = config.textureFilter.txStrongCRC ? ldata.strongcrc : ldata.ricecrc; + txfilter_dmptx_mipmap(reinterpret_cast(&m_tempTextureHolder[texDataOffset]), tmptex.width, tmptex.height, tmptex.width, (u16)u32(glInternalFormat), N64FormatSize(_pTexture->format, _pTexture->size), - ldata.ricecrc, ricecrcbase, FALSE); + detailTileCrc, firstTileCrc, tileCrc, config.textureFilter.txStrongCRC); } } @@ -1904,15 +1903,15 @@ void TextureCache::_loadAccurate(u32 _tile, CachedTexture *_pTexture) if (needDump) { if (isMipMapTex) { - config.textureFilter.txStrongCRC ? - txfilter_dmptx_mipmap(reinterpret_cast(m_tempTextureHolder.data()), - tmptex.width, tmptex.height, tmptex.width, (u16)u32(glInternalFormat), - N64FormatSize(_pTexture->format, _pTexture->size), - strongcrc, strongcrc, TRUE) : - txfilter_dmptx_mipmap(reinterpret_cast(m_tempTextureHolder.data()), - tmptex.width, tmptex.height, tmptex.width, (u16)u32(glInternalFormat), - N64FormatSize(_pTexture->format, _pTexture->size), - ricecrc, ricecrc, FALSE); + u64 detailTileCrc = 0U; + u64 firstTileCrc = config.textureFilter.txStrongCRC ? strongcrc : ricecrc; + u64 tileCrc = firstTileCrc; + if (gDP.otherMode.textureDetail == G_TD_DETAIL) + std::swap(detailTileCrc, firstTileCrc); + txfilter_dmptx_mipmap(reinterpret_cast(m_tempTextureHolder.data()), + tmptex.width, tmptex.height, tmptex.width, (u16)u32(glInternalFormat), + N64FormatSize(_pTexture->format, _pTexture->size), + detailTileCrc, firstTileCrc, tileCrc, config.textureFilter.txStrongCRC); } else { config.textureFilter.txStrongCRC ? From ba3733ac03ef9b47747eca45febdc151e2f97835 Mon Sep 17 00:00:00 2001 From: Sergey Lipskiy Date: Sun, 21 Apr 2024 13:34:53 +0700 Subject: [PATCH 07/12] Use seed to calculate checksum of mipmap tiles. --- src/GLideNHQ/TxFilter.cpp | 8 ++--- src/GLideNHQ/TxFilter.h | 4 +-- src/GLideNHQ/TxFilterExport.cpp | 8 ++--- src/GLideNHQ/TxFilterExport.h | 4 +-- src/GLideNHQ/TxUtil.cpp | 62 ++++++++++++++++----------------- src/GLideNHQ/TxUtil.h | 16 ++++----- src/Textures.cpp | 24 ++++++------- 7 files changed, 63 insertions(+), 63 deletions(-) diff --git a/src/GLideNHQ/TxFilter.cpp b/src/GLideNHQ/TxFilter.cpp index f147ea594..4f770fc0c 100644 --- a/src/GLideNHQ/TxFilter.cpp +++ b/src/GLideNHQ/TxFilter.cpp @@ -572,19 +572,19 @@ TxFilter::hirestex(uint64 g64crc, Checksum r_crc64, uint16 *palette, N64FormatSi } uint64 -TxFilter::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette) +TxFilter::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette, uint64 seed) { if (_options & (HIRESTEXTURES_MASK|DUMP_TEX)) - return TxUtil::checksum64(src, width, height, size, rowStride, palette); + return TxUtil::checksum64(src, width, height, size, rowStride, palette, seed); return 0; } uint64 -TxFilter::checksum64strong(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette) +TxFilter::checksum64strong(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette, uint64 seed) { if (_options & (HIRESTEXTURES_MASK | DUMP_TEX)) - return TxUtil::checksum64strong(src, width, height, size, rowStride, palette); + return TxUtil::checksum64strong(src, width, height, size, rowStride, palette, seed); return 0; } diff --git a/src/GLideNHQ/TxFilter.h b/src/GLideNHQ/TxFilter.h index 4e8b39ddb..6135951e5 100644 --- a/src/GLideNHQ/TxFilter.h +++ b/src/GLideNHQ/TxFilter.h @@ -82,8 +82,8 @@ class TxFilter uint16 *palette, N64FormatSize n64FmtSz, GHQTexInfo *info); - uint64 checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette); - uint64 checksum64strong(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette); + uint64 checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette, uint64 seed); + uint64 checksum64strong(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette, uint64 seed); boolean dmptx(uint8 *src, int width, int height, int rowStridePixel, ColorFormat gfmt, N64FormatSize n64FmtSz, Checksum r_crc64, boolean isStrongCrc); boolean dmptxMipmap(uint8 *src, int width, int height, int rowStridePixel, diff --git a/src/GLideNHQ/TxFilterExport.cpp b/src/GLideNHQ/TxFilterExport.cpp index d88bbd7c9..b254bc65a 100644 --- a/src/GLideNHQ/TxFilterExport.cpp +++ b/src/GLideNHQ/TxFilterExport.cpp @@ -76,19 +76,19 @@ txfilter_hirestex(uint64 g64crc, Checksum r_crc64, uint16 *palette, N64FormatSiz } TAPI uint64 TAPIENTRY -txfilter_checksum(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette) +txfilter_checksum(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette, uint64 seed) { if (txFilter) - return txFilter->checksum64(src, width, height, size, rowStride, palette); + return txFilter->checksum64(src, width, height, size, rowStride, palette, seed); return 0; } TAPI uint64 TAPIENTRY -txfilter_checksum_strong(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette) +txfilter_checksum_strong(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette, uint64 seed) { if (txFilter) - return txFilter->checksum64strong(src, width, height, size, rowStride, palette); + return txFilter->checksum64strong(src, width, height, size, rowStride, palette, seed); return 0; } diff --git a/src/GLideNHQ/TxFilterExport.h b/src/GLideNHQ/TxFilterExport.h index 3b5ce0b65..394701495 100644 --- a/src/GLideNHQ/TxFilterExport.h +++ b/src/GLideNHQ/TxFilterExport.h @@ -223,10 +223,10 @@ TAPI boolean TAPIENTRY txfilter_hirestex(uint64 g64crc, Checksum r_crc64, uint16 *palette, N64FormatSize n64FmtSz, GHQTexInfo *info); TAPI uint64 TAPIENTRY -txfilter_checksum(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette); +txfilter_checksum(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette, uint64 seed); TAPI uint64 TAPIENTRY -txfilter_checksum_strong(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette); +txfilter_checksum_strong(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette, uint64 seed); TAPI boolean TAPIENTRY txfilter_dmptx(uint8 *src, int width, int height, int rowStridePixel, uint16 gfmt, N64FormatSize n64FmtSz, Checksum r_crc64); diff --git a/src/GLideNHQ/TxUtil.cpp b/src/GLideNHQ/TxUtil.cpp index 57a132fda..8e33b61ac 100644 --- a/src/GLideNHQ/TxUtil.cpp +++ b/src/GLideNHQ/TxUtil.cpp @@ -79,7 +79,7 @@ TxUtil::sizeofTx(int width, int height, ColorFormat format) } uint64 -TxUtil::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette) +TxUtil::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette, uint64 seed) { /* Rice CRC32 for now. We can switch this to Jabo MD5 or * any other custom checksum. @@ -88,35 +88,35 @@ TxUtil::checksum64(uint8 *src, int width, int height, int size, int rowStride, u if (!src) return 0; - uint64 crc64Ret = 0; + uint64 crc64Ret = 0U; if (palette) { uint32 crc32 = 0, cimax = 0; switch (size & 0xff) { case 1: - if (RiceCRC32_CI8(src, width, height, rowStride, &crc32, &cimax)) { - crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 512); + if (RiceCRC32_CI8(src, width, height, rowStride, &crc32, &cimax, seed)) { + crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 512, 0); crc64Ret <<= 32; crc64Ret |= (uint64)crc32; } break; case 0: - if (RiceCRC32_CI4(src, width, height, rowStride, &crc32, &cimax)) { - crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 32); + if (RiceCRC32_CI4(src, width, height, rowStride, &crc32, &cimax, seed)) { + crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 32, 0); crc64Ret <<= 32; crc64Ret |= (uint64)crc32; } } } - if (!crc64Ret) { - crc64Ret = (uint64)RiceCRC32(src, width, height, size, rowStride); + if (crc64Ret == 0U) { + crc64Ret = (uint64)RiceCRC32(src, width, height, size, rowStride, seed); } return crc64Ret; } uint64 -TxUtil::checksum64strong(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette) +TxUtil::checksum64strong(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette, uint64 seed) { /* XXH3_64bits for strong 32bit texture hash. */ /* Returned value is 64bits: hi=palette crc32 low=texture crc32 */ @@ -130,15 +130,15 @@ TxUtil::checksum64strong(uint8 *src, int width, int height, int size, int rowStr uint32 crc32 = 0, cimax = 0; switch (size & 0xff) { case 1: - if (StrongCRC32_CI8(src, width, height, rowStride, &crc32, &cimax)) { - crc64Ret = StrongCRC32(palette, cimax + 1, 1, 2, 512); + if (StrongCRC32_CI8(src, width, height, rowStride, &crc32, &cimax, seed)) { + crc64Ret = StrongCRC32(palette, cimax + 1, 1, 2, 512, 0); crc64Ret <<= 32; crc64Ret |= crc32; } break; case 0: - if (StrongCRC32_CI4(src, width, height, rowStride, &crc32, &cimax)) { - crc64Ret = StrongCRC32(palette, cimax + 1, 1, 2, 32); + if (StrongCRC32_CI4(src, width, height, rowStride, &crc32, &cimax, seed)) { + crc64Ret = StrongCRC32(palette, cimax + 1, 1, 2, 32, 0); crc64Ret <<= 32; crc64Ret |= crc32; } @@ -146,7 +146,7 @@ TxUtil::checksum64strong(uint8 *src, int width, int height, int size, int rowStr } if (!crc64Ret) { - crc64Ret = StrongCRC32(src, width, height, size, rowStride); + crc64Ret = StrongCRC32(src, width, height, size, rowStride, seed); } return crc64Ret; @@ -167,11 +167,11 @@ TxUtil::checksum64strong(uint8 *src, int width, int height, int size, int rowStr * bpl); */ uint32 -TxUtil::RiceCRC32(const uint8* src, int width, int height, int size, int rowStride) +TxUtil::RiceCRC32(const uint8* src, int width, int height, int size, int rowStride, uint64 seed) { /* NOTE: bytes_per_width must be equal or larger than 4 */ - uint32 crc32Ret = 0; + uint32 crc32Ret = static_cast(seed & 0xFFFFFFFF); const uint32 bytesPerLine = width << size >> 1; try { @@ -183,7 +183,7 @@ TxUtil::RiceCRC32(const uint8* src, int width, int height, int size, int rowStri mov ecx, dword ptr [src]; mov eax, dword ptr [height]; - mov edx, 0; + mov edx, dword ptr [crc32Ret]; dec eax; loop2: @@ -274,11 +274,11 @@ uint8 CalculateMaxCI4b(const uint8* src, uint32 width, uint32 height, uint32 row boolean TxUtil::RiceCRC32_CI4(const uint8* src, int width, int height, int rowStride, - uint32* crc32, uint32* cimax) + uint32* crc32, uint32* cimax, uint64 seed) { /* NOTE: bytes_per_width must be equal or larger than 4 */ - uint32 crc32Ret = 0; + uint32 crc32Ret = static_cast(seed & 0xFFFFFFFF); uint32 cimaxRet = 0; const uint32 bytes_per_width = width >> 1; @@ -294,7 +294,7 @@ TxUtil::RiceCRC32_CI4(const uint8* src, int width, int height, int rowStride, mov ecx, dword ptr [src]; mov eax, dword ptr [height]; - mov edx, 0; + mov edx, dword ptr [crc32Ret]; mov edi, 0; dec eax; @@ -395,7 +395,7 @@ TxUtil::RiceCRC32_CI4(const uint8* src, int width, int height, int rowStride, pop ebx; } #else - crc32Ret = RiceCRC32(src, width, height, 0, rowStride); + crc32Ret = RiceCRC32(src, width, height, 0, rowStride, seed); cimaxRet = CalculateMaxCI4b(src, width, height, rowStride); #endif } catch(...) { @@ -410,11 +410,11 @@ TxUtil::RiceCRC32_CI4(const uint8* src, int width, int height, int rowStride, boolean TxUtil::RiceCRC32_CI8(const uint8* src, int width, int height, int rowStride, - uint32* crc32, uint32* cimax) + uint32* crc32, uint32* cimax, uint64 seed) { /* NOTE: bytes_per_width must be equal or larger than 4 */ - uint32 crc32Ret = 0; + uint32 crc32Ret = static_cast(seed & 0xFFFFFFFF); uint32 cimaxRet = 0; /* 8bit CI */ @@ -428,7 +428,7 @@ TxUtil::RiceCRC32_CI8(const uint8* src, int width, int height, int rowStride, mov ecx, dword ptr [src]; mov eax, dword ptr [height]; - mov edx, 0; + mov edx, dword ptr[crc32Ret]; mov edi, 0; dec eax; @@ -497,7 +497,7 @@ TxUtil::RiceCRC32_CI8(const uint8* src, int width, int height, int rowStride, pop ebx; } #else - crc32Ret = RiceCRC32(src, width, height, 1, rowStride); + crc32Ret = RiceCRC32(src, width, height, 1, rowStride, seed); cimaxRet = CalculateMaxCI8b(src, width, height, rowStride); #endif } catch(...) { @@ -511,7 +511,7 @@ TxUtil::RiceCRC32_CI8(const uint8* src, int width, int height, int rowStride, } uint32 -TxUtil::StrongCRC32(const uint8* src, int width, int height, int size, int rowStride) +TxUtil::StrongCRC32(const uint8* src, int width, int height, int size, int rowStride, uint64 seed) { /* NOTE: bytesPerLine must be equal or larger than 4 */ const uint32 bytesPerLine = width << size >> 1; @@ -533,7 +533,7 @@ TxUtil::StrongCRC32(const uint8* src, int width, int height, int size, int rowSt } src += rowStride; } - crc = XXH3_64bits(buf.data(), static_cast(pData - buf.data())); + crc = XXH3_64bits_withSeed(buf.data(), static_cast(pData - buf.data()), seed); } catch (...) { DBG_INFO(80, wst("Error: StrongCRC32 exception!\n")); @@ -544,13 +544,13 @@ TxUtil::StrongCRC32(const uint8* src, int width, int height, int size, int rowSt boolean TxUtil::StrongCRC32_CI4(const uint8* src, int width, int height, int rowStride, - uint32* crc32, uint32* cimax) + uint32* crc32, uint32* cimax, uint64 seed) { /* NOTE: bytes_per_width must be equal or larger than 4 */ /* 4bit CI */ try { - uint32 crc32Ret = StrongCRC32(src, width, height, 0, rowStride); + uint32 crc32Ret = StrongCRC32(src, width, height, 0, rowStride, seed); uint32 cimaxRet = CalculateMaxCI4b(src, width, height, rowStride); *crc32 = crc32Ret; *cimax = cimaxRet; @@ -563,13 +563,13 @@ TxUtil::StrongCRC32_CI4(const uint8* src, int width, int height, int rowStride, boolean TxUtil::StrongCRC32_CI8(const uint8* src, int width, int height, int rowStride, - uint32* crc32, uint32* cimax) + uint32* crc32, uint32* cimax, uint64 seed) { /* NOTE: bytes_per_width must be equal or larger than 4 */ /* 8bit CI */ try { - uint32 crc32Ret = StrongCRC32(src, width, height, 1, rowStride); + uint32 crc32Ret = StrongCRC32(src, width, height, 1, rowStride, seed); uint32 cimaxRet = CalculateMaxCI8b(src, width, height, rowStride); *crc32 = crc32Ret; *cimax = cimaxRet; diff --git a/src/GLideNHQ/TxUtil.h b/src/GLideNHQ/TxUtil.h index 1c0b2e61c..28397f685 100644 --- a/src/GLideNHQ/TxUtil.h +++ b/src/GLideNHQ/TxUtil.h @@ -38,21 +38,21 @@ class TxUtil { private: - static uint32 RiceCRC32(const uint8* src, int width, int height, int size, int rowStride); + static uint32 RiceCRC32(const uint8* src, int width, int height, int size, int rowStride, uint64 seed); static boolean RiceCRC32_CI4(const uint8* src, int width, int height, int rowStride, - uint32* crc32, uint32* cimax); + uint32* crc32, uint32* cimax, uint64 seed); static boolean RiceCRC32_CI8(const uint8* src, int width, int height, int rowStride, - uint32* crc32, uint32* cimax); - static uint32 StrongCRC32(const uint8* src, int width, int height, int size, int rowStride); + uint32* crc32, uint32* cimax, uint64 seed); + static uint32 StrongCRC32(const uint8* src, int width, int height, int size, int rowStride, uint64 seed); static boolean StrongCRC32_CI4(const uint8* src, int width, int height, int rowStride, - uint32* crc32, uint32* cimax); + uint32* crc32, uint32* cimax, uint64 seed); static boolean StrongCRC32_CI8(const uint8* src, int width, int height, int rowStride, - uint32* crc32, uint32* cimax); + uint32* crc32, uint32* cimax, uint64 seed); public: static int sizeofTx(int width, int height, ColorFormat format); static uint32 checksumTx(uint8 *data, int width, int height, ColorFormat format); - static uint64 checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette); - static uint64 checksum64strong(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette); + static uint64 checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette, uint64 seed); + static uint64 checksum64strong(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette, uint64 seed); static uint32 getNumberofProcessors(); }; diff --git a/src/Textures.cpp b/src/Textures.cpp index 26a1a406d..e25fe5099 100644 --- a/src/Textures.cpp +++ b/src/Textures.cpp @@ -995,7 +995,7 @@ bool TextureCache::_loadHiresBackground(CachedTexture *_pTexture, u64 & _ricecrc } _ricecrc = txfilter_checksum(addr, tile_width, - tile_height, gSP.bgImage.size, bpl, paladdr); + tile_height, gSP.bgImage.size, bpl, paladdr, 0U); GHQTexInfo ghqTexInfo; // TODO: fix problem with zero texture dimensions on GLideNHQ side. if (txfilter_hirestex(_pTexture->crc, _ricecrc, palette, N64FormatSize(_pTexture->format, _pTexture->size), &ghqTexInfo) && @@ -1164,7 +1164,7 @@ struct TexLoadData u64 strongcrc; }; -static bool _calculateHiresTextureCRC(u32 _tileIdx, CachedTexture *_pTexture, TexLoadData & _ldata) +static bool _calculateHiresTextureCRC(u32 _tileIdx, CachedTexture *_pTexture, TexLoadData & _ldata, u64 seed) { if (config.textureFilter.txHiresEnable == 0 || !TFH.isInited()) return false; @@ -1235,9 +1235,9 @@ static bool _calculateHiresTextureCRC(u32 _tileIdx, CachedTexture *_pTexture, Te // palette = (rdp.pal_8 + (gSP.textureTile[_t]->palette << 4)); } - _ldata.ricecrc = txfilter_checksum(_ldata.addr, _ldata.width, _ldata.height, _pTexture->size, _ldata.bpl, _ldata.paladdr); + _ldata.ricecrc = txfilter_checksum(_ldata.addr, _ldata.width, _ldata.height, _pTexture->size, _ldata.bpl, _ldata.paladdr, seed); if (config.textureFilter.txStrongCRC) - _ldata.strongcrc = txfilter_checksum_strong(_ldata.addr, _ldata.width, _ldata.height, _pTexture->size, _ldata.bpl, _ldata.paladdr); + _ldata.strongcrc = txfilter_checksum_strong(_ldata.addr, _ldata.width, _ldata.height, _pTexture->size, _ldata.bpl, _ldata.paladdr, seed); if (mipMapWorkaround) info.texAddress = 0U; @@ -1246,7 +1246,7 @@ static bool _calculateHiresTextureCRC(u32 _tileIdx, CachedTexture *_pTexture, Te } static -void _loadHiresTextureMipMapAccurate(CachedTexture *_pTexture, GHQTexInfo const& _ghqTexInfo/*, u64 _ricecrc*/) +void _loadHiresTextureMipMapAccurate(CachedTexture *_pTexture, GHQTexInfo const& _ghqTexInfo, u64 seed) { u32 texWidth = _ghqTexInfo.width; u32 texHeight = _ghqTexInfo.height; @@ -1275,7 +1275,7 @@ void _loadHiresTextureMipMapAccurate(CachedTexture *_pTexture, GHQTexInfo const& //txfilter_dmptx((u8*)pTileData, texWidth, texHeight, // texWidth, (u16)_ghqTexInfo.format, // N64FormatSize(_pTexture->format, _pTexture->size), - // _ricecrc + mipLevel); + // seed + mipLevel); std::copy_n(pTileData, texWidth * texHeight, &m_tempTextureHolder[texDataOffset]); pTileData = &m_tempTextureHolder[texDataOffset]; @@ -1302,7 +1302,7 @@ void _loadHiresTextureMipMapAccurate(CachedTexture *_pTexture, GHQTexInfo const& tmptex.size = mipTile.size; TexLoadData ldata; GHQTexInfo ghqTexInfo; - if (_calculateHiresTextureCRC(gSP.texture.tile + mipLevel + 1, &tmptex, ldata)) { + if (_calculateHiresTextureCRC(gSP.texture.tile + mipLevel + 1, &tmptex, ldata, seed)) { // TODO: fix problem with zero texture dimensions on GLideNHQ side. auto hirestexFound = txfilter_hirestex(_pTexture->crc, ldata.ricecrc, ldata.palette, N64FormatSize(_pTexture->format, _pTexture->size), &ghqTexInfo); if (!hirestexFound) { @@ -1348,7 +1348,7 @@ void _loadHiresTextureMipMapAccurate(CachedTexture *_pTexture, GHQTexInfo const& bool TextureCache::_loadHiresTexture(u32 _tile, CachedTexture *_pTexture, u64 & _ricecrc, u64 & _strongcrc) { TexLoadData ldata; - if (!_calculateHiresTextureCRC(_tile, _pTexture, ldata)) + if (!_calculateHiresTextureCRC(_tile, _pTexture, ldata, 0)) return false; _ricecrc = ldata.ricecrc; @@ -1361,7 +1361,7 @@ bool TextureCache::_loadHiresTexture(u32 _tile, CachedTexture *_pTexture, u64 & if (!hirestexFound) { // Texture with RiceCRC was not found. Try alternative CRC. if (_strongcrc == 0U) - _strongcrc = txfilter_checksum_strong(ldata.addr, ldata.width, ldata.height, _pTexture->size, ldata.bpl, ldata.paladdr); + _strongcrc = txfilter_checksum_strong(ldata.addr, ldata.width, ldata.height, _pTexture->size, ldata.bpl, ldata.paladdr, 0); hirestexFound = txfilter_hirestex(_pTexture->crc, _strongcrc, ldata.palette, N64FormatSize(_pTexture->format, _pTexture->size), &ghqTexInfo); } if (hirestexFound && ghqTexInfo.width != 0 && ghqTexInfo.height != 0) { @@ -1375,7 +1375,7 @@ bool TextureCache::_loadHiresTexture(u32 _tile, CachedTexture *_pTexture, u64 & } if (_pTexture->max_level > 0) { - _loadHiresTextureMipMapAccurate(_pTexture, ghqTexInfo/*, _ricecrc*/); + _loadHiresTextureMipMapAccurate(_pTexture, ghqTexInfo, config.textureFilter.txStrongCRC ? _strongcrc : _ricecrc); _updateCachedTexture(ghqTexInfo, _pTexture, ldata.width, ldata.height); return true; } @@ -1816,7 +1816,7 @@ void TextureCache::_loadAccurate(u32 _tile, CachedTexture *_pTexture) u64 firstTileCrc = 0U; if (needDump) { TexLoadData ldata; - if (_calculateHiresTextureCRC(gSP.texture.tile, current[0], ldata)) { + if (_calculateHiresTextureCRC(gSP.texture.tile, current[0], ldata, 0)) { firstTileCrc = config.textureFilter.txStrongCRC ? ldata.strongcrc : ldata.ricecrc; if (gDP.otherMode.textureDetail == G_TD_DETAIL) { detailTileCrc = firstTileCrc; @@ -1844,7 +1844,7 @@ void TextureCache::_loadAccurate(u32 _tile, CachedTexture *_pTexture) if (needDump) { TexLoadData ldata; - if (_calculateHiresTextureCRC(gSP.texture.tile + mipLevel + 1, &tmptex, ldata)) { + if (_calculateHiresTextureCRC(gSP.texture.tile + mipLevel + 1, &tmptex, ldata, mipLevel == 0 ? 0 : firstTileCrc)) { u64 tileCrc = config.textureFilter.txStrongCRC ? ldata.strongcrc : ldata.ricecrc; txfilter_dmptx_mipmap(reinterpret_cast(&m_tempTextureHolder[texDataOffset]), tmptex.width, tmptex.height, tmptex.width, (u16)u32(glInternalFormat), From 14b357ac62b603a6fb2d498c55a3ae6054d1de05 Mon Sep 17 00:00:00 2001 From: Sergey Lipskiy Date: Sun, 21 Apr 2024 16:02:08 +0700 Subject: [PATCH 08/12] Fix load of HD mipmap tiles. --- src/Textures.cpp | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/Textures.cpp b/src/Textures.cpp index e25fe5099..ed8abe8dc 100644 --- a/src/Textures.cpp +++ b/src/Textures.cpp @@ -1246,8 +1246,14 @@ static bool _calculateHiresTextureCRC(u32 _tileIdx, CachedTexture *_pTexture, Te } static -void _loadHiresTextureMipMapAccurate(CachedTexture *_pTexture, GHQTexInfo const& _ghqTexInfo, u64 seed) +void _loadHiresTextureMipMapAccurate(CachedTexture *_pTexture, GHQTexInfo const& _ghqTexInfo, u64 firstTileCrc) { + if (firstTileCrc == 0U) { + TexLoadData ldata; + if (_calculateHiresTextureCRC(gSP.texture.tile, TextureCache::get().current[0], ldata, 0)) + firstTileCrc = config.textureFilter.txStrongCRC ? ldata.strongcrc : ldata.ricecrc; + } + u32 texWidth = _ghqTexInfo.width; u32 texHeight = _ghqTexInfo.height; unsigned int totalTexSize = std::max(static_cast(texWidth * texHeight + 16), MIPMAP_TILE_WIDTH) @@ -1275,7 +1281,7 @@ void _loadHiresTextureMipMapAccurate(CachedTexture *_pTexture, GHQTexInfo const& //txfilter_dmptx((u8*)pTileData, texWidth, texHeight, // texWidth, (u16)_ghqTexInfo.format, // N64FormatSize(_pTexture->format, _pTexture->size), - // seed + mipLevel); + // firstTileCrc + mipLevel); std::copy_n(pTileData, texWidth * texHeight, &m_tempTextureHolder[texDataOffset]); pTileData = &m_tempTextureHolder[texDataOffset]; @@ -1296,13 +1302,14 @@ void _loadHiresTextureMipMapAccurate(CachedTexture *_pTexture, GHQTexInfo const& CachedTexture tmptex = *_pTexture; tmptex.tMem = mipTile.tmem; tmptex.palette = mipTile.palette; - tmptex.maskS = mipTile.masks; - tmptex.maskT = mipTile.maskt; tmptex.format = mipTile.format; tmptex.size = mipTile.size; + TileSizes sizes; + _calcTileSizes(tileMipLevel, sizes, nullptr); + tmptex.width = std::min(tmptex.width, static_cast(sizes.width)); TexLoadData ldata; GHQTexInfo ghqTexInfo; - if (_calculateHiresTextureCRC(gSP.texture.tile + mipLevel + 1, &tmptex, ldata, seed)) { + if (_calculateHiresTextureCRC(tileMipLevel, &tmptex, ldata, firstTileCrc)) { // TODO: fix problem with zero texture dimensions on GLideNHQ side. auto hirestexFound = txfilter_hirestex(_pTexture->crc, ldata.ricecrc, ldata.palette, N64FormatSize(_pTexture->format, _pTexture->size), &ghqTexInfo); if (!hirestexFound) { @@ -1373,9 +1380,11 @@ bool TextureCache::_loadHiresTexture(u32 _tile, CachedTexture *_pTexture, u64 & static_cast(gSP.texture.level) : static_cast(gSP.texture.level - 1); } - if (_pTexture->max_level > 0) - { - _loadHiresTextureMipMapAccurate(_pTexture, ghqTexInfo, config.textureFilter.txStrongCRC ? _strongcrc : _ricecrc); + if (_pTexture->max_level > 0) { + u64 firstTileCrc = 0; + if (gDP.otherMode.textureDetail == G_TD_DETAIL) + firstTileCrc = config.textureFilter.txStrongCRC ? _strongcrc : _ricecrc; + _loadHiresTextureMipMapAccurate(_pTexture, ghqTexInfo, firstTileCrc); _updateCachedTexture(ghqTexInfo, _pTexture, ldata.width, ldata.height); return true; } From 76b644ac8494b505cf833821667f98309e7dd07f Mon Sep 17 00:00:00 2001 From: Sergey Lipskiy Date: Tue, 30 Apr 2024 15:38:23 +0700 Subject: [PATCH 09/12] Add mip-map hotkey --- src/Config.cpp | 4 ++++ src/Config.h | 1 + src/GLideNUI/ConfigDialog.cpp | 2 ++ src/VI.cpp | 16 ++++++++++++++++ src/mupenplus/Config_mupenplus.cpp | 2 ++ 5 files changed, 25 insertions(+) diff --git a/src/Config.cpp b/src/Config.cpp index 8a2463f45..f27ec92aa 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -184,6 +184,8 @@ const char* Config::hotkeyIniName(u32 _idx) return "hkHdTexReload"; case Config::HotKey::hkHdTexToggle: return "hkHdTexToggle"; + case Config::HotKey::hkMipMap: + return "hkMipMap"; case Config::HotKey::hkTexCoordBounds: return "hkTexCoordBounds"; case Config::HotKey::hkNativeResTexrects: @@ -224,6 +226,8 @@ const char* Config::enabledHotkeyIniName(u32 _idx) return "hkHdTexReloadEnabled"; case Config::HotKey::hkHdTexToggle: return "hkHdTexToggleEnabled"; + case Config::HotKey::hkMipMap: + return "hkMipMapEnabled"; case Config::HotKey::hkTexCoordBounds: return "hkTexCoordBoundsEnabled"; case Config::HotKey::hkNativeResTexrects: diff --git a/src/Config.h b/src/Config.h index 053a27622..c14aeee48 100644 --- a/src/Config.h +++ b/src/Config.h @@ -228,6 +228,7 @@ struct Config hkTexDump = 0, hkHdTexReload, hkHdTexToggle, + hkMipMap, hkTexCoordBounds, hkNativeResTexrects, hkVsync, diff --git a/src/GLideNUI/ConfigDialog.cpp b/src/GLideNUI/ConfigDialog.cpp index e00cee456..7fee187b2 100644 --- a/src/GLideNUI/ConfigDialog.cpp +++ b/src/GLideNUI/ConfigDialog.cpp @@ -81,6 +81,8 @@ QString ConfigDialog::_hotkeyDescription(quint32 _idx) const return tr("Reload HD textures"); case Config::HotKey::hkHdTexToggle: return tr("Toggle HD textures"); + case Config::HotKey::hkMipMap: + return tr("Toggle mip-map emulation"); case Config::HotKey::hkTexCoordBounds: return tr("Toggle texcoords bounds"); case Config::HotKey::hkNativeResTexrects: diff --git a/src/VI.cpp b/src/VI.cpp index fafa695d7..ab773b041 100644 --- a/src/VI.cpp +++ b/src/VI.cpp @@ -142,6 +142,22 @@ static void checkHotkeys() } } + if (osal_is_key_pressed(config.hotkeys.enabledKeys[Config::hkMipMap], 0x0001)) { + config.generalEmulation.enableLOD = !config.generalEmulation.enableLOD; + if (config.generalEmulation.enableInaccurateTextureCoordinates == 0) { + textureCache().clear(); + } else { + // Need to rebuild shaders + dwnd().stop(); + dwnd().start(); + } + if (config.generalEmulation.enableLOD == 0) + dwnd().getDrawer().showMessage("Mip-map emulation off\n", Milliseconds(1000)); + else + dwnd().getDrawer().showMessage("Mip-map emulation on\n", Milliseconds(1000)); + } + + if (osal_is_key_pressed(config.hotkeys.enabledKeys[Config::hkTexCoordBounds], 0x0001)) { if (config.graphics2D.enableTexCoordBounds == 0) dwnd().getDrawer().showMessage("Bound texrect texture coordinates on\n", Milliseconds(1000)); diff --git a/src/mupenplus/Config_mupenplus.cpp b/src/mupenplus/Config_mupenplus.cpp index 2d9315d62..c0a18c6e0 100644 --- a/src/mupenplus/Config_mupenplus.cpp +++ b/src/mupenplus/Config_mupenplus.cpp @@ -29,6 +29,8 @@ const char* _hotkeyDescription(u32 _idx) return "Hotkey: reload HD textures"; case Config::HotKey::hkHdTexToggle: return "Hotkey: toggle HD textures"; + case Config::HotKey::hkMipMap: + return "Hotkey: toggle mip-map emulation"; case Config::HotKey::hkTexCoordBounds: return "Hotkey: toggle texcoords bounds"; case Config::HotKey::hkNativeResTexrects: From 0ce8baf1af072176d9d24f9d2244956870dbb6ab Mon Sep 17 00:00:00 2001 From: Sergey Lipskiy Date: Tue, 30 Apr 2024 14:03:00 +0700 Subject: [PATCH 10/12] Do not create texture atlas if mipmap emulation is off. --- .../GLSL/glsl_CombinerProgramUniformFactoryAccurate.cpp | 3 ++- src/Textures.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramUniformFactoryAccurate.cpp b/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramUniformFactoryAccurate.cpp index 31600ddc0..86753535b 100644 --- a/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramUniformFactoryAccurate.cpp +++ b/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramUniformFactoryAccurate.cpp @@ -111,7 +111,8 @@ class UMipmap : public UniformGroup maxTile = std::min(gSP.texture.level, 1u); // Hack for HD textures uMaxTile.set(maxTile, _force); - bool bNoAtlasTex = (_pTexture != nullptr && _pTexture->bHDTexture && _pTexture->max_level == 0) || + bool bNoAtlasTex = config.generalEmulation.enableLOD == 0 || + (_pTexture != nullptr && _pTexture->bHDTexture && _pTexture->max_level == 0) || maxTile == 0 || gDP.otherMode.textureLOD != G_TL_LOD || (gDP.otherMode.textureDetail != G_TD_DETAIL && maxTile == 1); diff --git a/src/Textures.cpp b/src/Textures.cpp index ed8abe8dc..2d27e1c6e 100644 --- a/src/Textures.cpp +++ b/src/Textures.cpp @@ -1373,6 +1373,7 @@ bool TextureCache::_loadHiresTexture(u32 _tile, CachedTexture *_pTexture, u64 & } if (hirestexFound && ghqTexInfo.width != 0 && ghqTexInfo.height != 0) { if (config.generalEmulation.enableInaccurateTextureCoordinates == 0 && + config.generalEmulation.enableLOD != 0 && tile > 0 && currentCombiner()->usesLOD() && gSP.texture.level > 1) { @@ -1768,7 +1769,7 @@ void TextureCache::_loadAccurate(u32 _tile, CachedTexture *_pTexture) bool force32bitFormat = false; _pTexture->max_level = 0; - const bool isMipMapTex = currentCombiner()->usesLOD() && gSP.texture.level > 1; + const bool isMipMapTex = config.generalEmulation.enableLOD != 0 && currentCombiner()->usesLOD() && gSP.texture.level > 1; if (isMipMapTex && _tile > 0) { _pTexture->max_level = gDP.otherMode.textureDetail == G_TD_DETAIL ? static_cast(gSP.texture.level) : From 54c158c9647072022a477e001bada7923bdfc6f0 Mon Sep 17 00:00:00 2001 From: Sergey Lipskiy Date: Tue, 30 Apr 2024 16:05:10 +0700 Subject: [PATCH 11/12] Correct accurate mipmap shader. --- .../GLSL/glsl_CombinerProgramBuilderAccurate.cpp | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramBuilderAccurate.cpp b/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramBuilderAccurate.cpp index dc6423352..94fe52aeb 100644 --- a/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramBuilderAccurate.cpp +++ b/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramBuilderAccurate.cpp @@ -853,19 +853,9 @@ class ShaderMipmap : public ShaderPart "uniform lowp int uTextureDetail; \n" " \n" "mediump float mipmap(out lowp vec4 readtex0, out lowp vec4 readtex1) { \n" - ; - if (config.generalEmulation.enableLOD == 0) { - m_part += - " mediump float lod = 1.0; \n" - ; - } else { - m_part += - " mediump vec2 dx = abs(dFdx(vLodTexCoord)) * uScreenScale; \n" - " mediump vec2 dy = abs(dFdy(vLodTexCoord)) * uScreenScale; \n" - " mediump float lod = max(max(dx.x, dx.y), max(dy.x, dy.y)); \n" - ; - } - m_part += + " mediump vec2 dx = abs(dFdx(vLodTexCoord)) * uScreenScale; \n" + " mediump vec2 dy = abs(dFdy(vLodTexCoord)) * uScreenScale; \n" + " mediump float lod = max(max(dx.x, dx.y), max(dy.x, dy.y)); \n" " lowp int max_tile = min(uTextureDetail != 2 ? 7 : 6, uMaxTile); \n" " mediump float min_lod = uTextureDetail != 0 ? uMinLod : 1.0; \n" " mediump float max_lod = pow(2.0, float(max_tile)) - 1.0 / 32.0; \n" From 45eb4c66d8df62967dc56f368c6bd991ff69c32a Mon Sep 17 00:00:00 2001 From: Sergey Lipskiy Date: Tue, 30 Apr 2024 15:23:15 +0700 Subject: [PATCH 12/12] textureEngine1 hack --- .../OpenGLContext/GLSL/glsl_CombinerProgramBuilderAccurate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramBuilderAccurate.cpp b/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramBuilderAccurate.cpp index 94fe52aeb..cfe989413 100644 --- a/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramBuilderAccurate.cpp +++ b/src/Graphics/OpenGLContext/GLSL/glsl_CombinerProgramBuilderAccurate.cpp @@ -1139,7 +1139,7 @@ class ShaderTextureEngine : public ShaderPart "void textureEngine1(in highp vec2 texCoord, out highp vec2 tcData[5]) \n" "{ \n" " highp vec2 tileCoord = (WRAP(texCoord * uShiftScale[1] - uTexOffset[1], -1024.0, 1024.0)); \n" - " tileCoord = (tileCoord + uBilinearOffset) * uHDRatio[1] - uBilinearOffset; \n" + " tileCoord = (tileCoord + uBilinearOffset*0.5) * uHDRatio[1] - uBilinearOffset; \n" " mediump vec2 intPart = floor(tileCoord); \n" " highp vec2 tc00 = clampWrapMirror(intPart, uTexWrap[1], uTexClamp[1], uTexWrapEn[1], uTexClampEn[1], uTexMirrorEn[1]); \n" " highp vec2 tc11 = clampWrapMirror(intPart + vec2(1.0,1.0), uTexWrap[1], uTexClamp[1], uTexWrapEn[1], uTexClampEn[1], uTexMirrorEn[1]); \n"