Skip to content

Commit

Permalink
Merge pull request #87984 from BlueCube3310/etcpak-update
Browse files Browse the repository at this point in the history
etcpak: Sync with upstream
  • Loading branch information
akien-mga committed Feb 7, 2024
2 parents 011ed23 + becf5b1 commit f3a5c61
Show file tree
Hide file tree
Showing 12 changed files with 185 additions and 494 deletions.
1 change: 0 additions & 1 deletion modules/etcpak/SCsub
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ thirdparty_dir = "#thirdparty/etcpak/"
thirdparty_sources = [
"Dither.cpp",
"ProcessDxtc.cpp",
"ProcessRgtc.cpp",
"ProcessRGB.cpp",
"Tables.cpp",
]
Expand Down
9 changes: 4 additions & 5 deletions modules/etcpak/image_compress_etcpak.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@

#include <ProcessDxtc.hpp>
#include <ProcessRGB.hpp>
#include <ProcessRgtc.hpp>

EtcpakType _determine_etc_type(Image::UsedChannels p_channels) {
switch (p_channels) {
Expand Down Expand Up @@ -246,11 +245,11 @@ void _compress_etcpak(EtcpakType p_compresstype, Image *r_img) {
break;

case EtcpakType::ETCPAK_TYPE_ETC2_R:
CompressEtc2R8(src_mip_read, dest_mip_write, blocks, mip_w);
CompressEacR(src_mip_read, dest_mip_write, blocks, mip_w);
break;

case EtcpakType::ETCPAK_TYPE_ETC2_RG:
CompressEtc2RG8(src_mip_read, dest_mip_write, blocks, mip_w);
CompressEacRg(src_mip_read, dest_mip_write, blocks, mip_w);
break;

case EtcpakType::ETCPAK_TYPE_DXT1:
Expand All @@ -263,11 +262,11 @@ void _compress_etcpak(EtcpakType p_compresstype, Image *r_img) {
break;

case EtcpakType::ETCPAK_TYPE_RGTC_R:
CompressRgtcR(src_mip_read, dest_mip_write, blocks, mip_w);
CompressBc4(src_mip_read, dest_mip_write, blocks, mip_w);
break;

case EtcpakType::ETCPAK_TYPE_RGTC_RG:
CompressRgtcRG(src_mip_read, dest_mip_write, blocks, mip_w);
CompressBc5(src_mip_read, dest_mip_write, blocks, mip_w);
break;

default:
Expand Down
8 changes: 1 addition & 7 deletions thirdparty/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ its functionality to IPv4 only.
## etcpak

- Upstream: https://github.com/wolfpld/etcpak
- Version: 1.0 (153f0e04a18b93c277684b577365210adcf8e11c, 2022)
- Version: git (5380688660a3801aec4b25483366027fe0442d7b, 2024)
- License: BSD-3-Clause

Files extracted from upstream source:
Expand All @@ -225,12 +225,6 @@ Files extracted from upstream source:
```
- `AUTHORS.txt` and `LICENSE.txt`

Two files (`ProcessRGB.{cpp,hpp}`) have been modified to provide ETC2_R and ETC2_RG compression,
the changes are based on the existing code.

Two files (`ProcessRgtc.{cpp,hpp}`) have been added to provide RGTC compression implementation,
based on library's `ProcessDxtc.{cpp,hpp}`.

## fonts

- `DroidSans*.woff2`:
Expand Down
152 changes: 141 additions & 11 deletions thirdparty/etcpak/ProcessDxtc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -739,18 +739,8 @@ static etcpak_force_inline uint64_t ProcessRGB_SSE( __m128i px0, __m128i px1, __
return uint64_t( ( uint64_t( to565( vmin ) ) << 16 ) | to565( vmax ) | ( uint64_t( vp ) << 32 ) );
}

static etcpak_force_inline uint64_t ProcessAlpha_SSE( __m128i px0, __m128i px1, __m128i px2, __m128i px3 )
static etcpak_force_inline uint64_t ProcessOneChannel_SSE( __m128i a )
{
__m128i mask = _mm_setr_epi32( 0x0f0b0703, -1, -1, -1 );

__m128i m0 = _mm_shuffle_epi8( px0, mask );
__m128i m1 = _mm_shuffle_epi8( px1, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 3, 0, 3 ) ) );
__m128i m2 = _mm_shuffle_epi8( px2, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 0, 3, 3 ) ) );
__m128i m3 = _mm_shuffle_epi8( px3, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 0, 3, 3, 3 ) ) );
__m128i m4 = _mm_or_si128( m0, m1 );
__m128i m5 = _mm_or_si128( m2, m3 );
__m128i a = _mm_or_si128( m4, m5 );

__m128i solidCmp = _mm_shuffle_epi8( a, _mm_setzero_si128() );
__m128i cmpRes = _mm_cmpeq_epi8( a, solidCmp );
if( _mm_testc_si128( cmpRes, _mm_set1_epi32( -1 ) ) )
Expand Down Expand Up @@ -800,6 +790,21 @@ static etcpak_force_inline uint64_t ProcessAlpha_SSE( __m128i px0, __m128i px1,
}
return (uint64_t)(uint16_t)_mm_cvtsi128_si32( minmax ) | ( data << 16 );
}

static etcpak_force_inline uint64_t ProcessAlpha_SSE( __m128i px0, __m128i px1, __m128i px2, __m128i px3 )
{
__m128i mask = _mm_setr_epi32( 0x0f0b0703, -1, -1, -1 );

__m128i m0 = _mm_shuffle_epi8( px0, mask );
__m128i m1 = _mm_shuffle_epi8( px1, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 3, 0, 3 ) ) );
__m128i m2 = _mm_shuffle_epi8( px2, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 0, 3, 3 ) ) );
__m128i m3 = _mm_shuffle_epi8( px3, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 0, 3, 3, 3 ) ) );
__m128i m4 = _mm_or_si128( m0, m1 );
__m128i m5 = _mm_or_si128( m2, m3 );
__m128i a = _mm_or_si128( m4, m5 );

return ProcessOneChannel_SSE( a );
}
#endif

void CompressDxt1( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
Expand Down Expand Up @@ -954,3 +959,128 @@ void CompressDxt5( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t w
}
while( --blocks );
}

void CompressBc4( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
{
int i = 0;
auto ptr = dst;
do
{
#ifdef __SSE4_1__
__m128i px0 = _mm_loadu_si128( (__m128i*)( src + width * 0 ) );
__m128i px1 = _mm_loadu_si128( (__m128i*)( src + width * 1 ) );
__m128i px2 = _mm_loadu_si128( (__m128i*)( src + width * 2 ) );
__m128i px3 = _mm_loadu_si128( (__m128i*)( src + width * 3 ) );

src += 4;
if( ++i == width/4 )
{
src += width * 3;
i = 0;
}

__m128i mask = _mm_setr_epi32( 0x0c080400, -1, -1, -1 );

__m128i m0 = _mm_shuffle_epi8( px0, mask );
__m128i m1 = _mm_shuffle_epi8( px1, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 3, 0, 3 ) ) );
__m128i m2 = _mm_shuffle_epi8( px2, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 0, 3, 3 ) ) );
__m128i m3 = _mm_shuffle_epi8( px3, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 0, 3, 3, 3 ) ) );
__m128i m4 = _mm_or_si128( m0, m1 );
__m128i m5 = _mm_or_si128( m2, m3 );

*ptr++ = ProcessOneChannel_SSE( _mm_or_si128( m4, m5 ) );
#else
uint8_t r[4*4];
auto rgba = src;
for( int i=0; i<4; i++ )
{
r[i*4] = rgba[0] & 0xff;
r[i*4+1] = rgba[1] & 0xff;
r[i*4+2] = rgba[2] & 0xff;
r[i*4+3] = rgba[3] & 0xff;

rgba += width;
}

src += 4;
if( ++i == width/4 )
{
src += width * 3;
i = 0;
}

*ptr++ = ProcessAlpha( r );
#endif
} while( --blocks );
}

void CompressBc5( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
{
int i = 0;
auto ptr = dst;
do
{
#ifdef __SSE4_1__
__m128i px0 = _mm_loadu_si128( (__m128i*)( src + width * 0 ) );
__m128i px1 = _mm_loadu_si128( (__m128i*)( src + width * 1 ) );
__m128i px2 = _mm_loadu_si128( (__m128i*)( src + width * 2 ) );
__m128i px3 = _mm_loadu_si128( (__m128i*)( src + width * 3 ) );

src += 4;
if( ++i == width/4 )
{
src += width*3;
i = 0;
}

__m128i mask = _mm_setr_epi32( 0x0c080400, -1, -1, -1 );

__m128i m0 = _mm_shuffle_epi8( px0, mask );
__m128i m1 = _mm_shuffle_epi8( px1, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 3, 0, 3 ) ) );
__m128i m2 = _mm_shuffle_epi8( px2, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 0, 3, 3 ) ) );
__m128i m3 = _mm_shuffle_epi8( px3, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 0, 3, 3, 3 ) ) );
__m128i m4 = _mm_or_si128( m0, m1 );
__m128i m5 = _mm_or_si128( m2, m3 );

*ptr++ = ProcessOneChannel_SSE( _mm_or_si128( m4, m5 ) );

mask = _mm_setr_epi32( 0x0d090501, -1, -1, -1 );

m0 = _mm_shuffle_epi8( px0, mask );
m1 = _mm_shuffle_epi8( px1, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 3, 0, 3 ) ) );
m2 = _mm_shuffle_epi8( px2, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 0, 3, 3 ) ) );
m3 = _mm_shuffle_epi8( px3, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 0, 3, 3, 3 ) ) );
m4 = _mm_or_si128( m0, m1 );
m5 = _mm_or_si128( m2, m3 );

*ptr++ = ProcessOneChannel_SSE( _mm_or_si128( m4, m5 ) );
#else
uint8_t rg[4*4*2];
auto rgba = src;
for( int i=0; i<4; i++ )
{
rg[i*4] = rgba[0] & 0xff;
rg[i*4+1] = rgba[1] & 0xff;
rg[i*4+2] = rgba[2] & 0xff;
rg[i*4+3] = rgba[3] & 0xff;

rg[16+i*4] = (rgba[0] & 0xff00) >> 8;
rg[16+i*4+1] = (rgba[1] & 0xff00) >> 8;
rg[16+i*4+2] = (rgba[2] & 0xff00) >> 8;
rg[16+i*4+3] = (rgba[3] & 0xff00) >> 8;

rgba += width;
}

src += 4;
if( ++i == width/4 )
{
src += width*3;
i = 0;
}

*ptr++ = ProcessAlpha( rg );
*ptr++ = ProcessAlpha( &rg[16] );
#endif
} while( --blocks );
}
3 changes: 3 additions & 0 deletions thirdparty/etcpak/ProcessDxtc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,7 @@ void CompressDxt1( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t w
void CompressDxt1Dither( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressDxt5( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );

void CompressBc4( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressBc5( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );

#endif
52 changes: 29 additions & 23 deletions thirdparty/etcpak/ProcessRGB.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3266,16 +3266,21 @@ etcpak_force_inline static int16x8_t WidenMultiplier_EAC_NEON( int16x8_t multipl

#endif

template<bool checkSolid = true>
static etcpak_force_inline uint64_t ProcessAlpha_ETC2( const uint8_t* src )
{
#if defined __SSE4_1__
// Check solid
__m128i s = _mm_loadu_si128( (__m128i*)src );
__m128i solidCmp = _mm_set1_epi8( src[0] );
__m128i cmpRes = _mm_cmpeq_epi8( s, solidCmp );
if( _mm_testc_si128( cmpRes, _mm_set1_epi32( -1 ) ) )

if( checkSolid )
{
return src[0];
// Check solid
__m128i solidCmp = _mm_set1_epi8( src[0] );
__m128i cmpRes = _mm_cmpeq_epi8( s, solidCmp );
if( _mm_testc_si128( cmpRes, _mm_set1_epi32( -1 ) ) )
{
return src[0];
}
}

// Calculate min, max
Expand Down Expand Up @@ -3684,12 +3689,15 @@ static etcpak_force_inline uint64_t ProcessAlpha_ETC2( const uint8_t* src )
int srcMid;
uint8x16_t srcAlphaBlock = vld1q_u8( src );
{
uint8_t ref = src[0];
uint8x16_t a0 = vdupq_n_u8( ref );
uint8x16_t r = vceqq_u8( srcAlphaBlock, a0 );
int64x2_t m = vreinterpretq_s64_u8( r );
if( m[0] == -1 && m[1] == -1 )
return ref;
if( checkSolid )
{
uint8_t ref = src[0];
uint8x16_t a0 = vdupq_n_u8( ref );
uint8x16_t r = vceqq_u8( srcAlphaBlock, a0 );
int64x2_t m = vreinterpretq_s64_u8( r );
if( m[0] == -1 && m[1] == -1 )
return ref;
}

// srcRange
#ifdef __aarch64__
Expand Down Expand Up @@ -3759,6 +3767,7 @@ static etcpak_force_inline uint64_t ProcessAlpha_ETC2( const uint8_t* src )
#undef EAC_RECONSTRUCT_VALUE

#else
if( checkSolid )
{
bool solid = true;
const uint8_t* ptr = src + 1;
Expand Down Expand Up @@ -3849,7 +3858,6 @@ static etcpak_force_inline uint64_t ProcessAlpha_ETC2( const uint8_t* src )
#endif
}


void CompressEtc1Alpha( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
{
int w = 0;
Expand Down Expand Up @@ -4176,14 +4184,13 @@ void CompressEtc2Rgba( const uint32_t* src, uint64_t* dst, uint32_t blocks, size
src += width * 3;
w = 0;
}
*dst++ = ProcessAlpha_ETC2( alpha );
*dst++ = ProcessAlpha_ETC2<true>( alpha );
*dst++ = ProcessRGB_ETC2( (uint8_t*)rgba, useHeuristics );
}
while( --blocks );
}

// -- GODOT start --
void CompressEtc2R8( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
void CompressEacR( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
{
int w = 0;
uint8_t r[4*4];
Expand Down Expand Up @@ -4239,12 +4246,12 @@ void CompressEtc2R8( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t
src += width * 3;
w = 0;
}
*dst++ = ProcessAlpha_ETC2( r );
*dst++ = ProcessAlpha_ETC2<false>( r );
}
while( --blocks );
}

void CompressEtc2RG8( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
void CompressEacRg( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
{
int w = 0;
uint8_t rg[4*4*2];
Expand Down Expand Up @@ -4300,15 +4307,15 @@ void CompressEtc2RG8( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_
src += width;
v = *src;
*ptrr++ = (v & 0xff0000) >> 16;
*ptrg++ = (v & 0xff00) >> 8;
*ptrg++ = (v & 0xff00) >> 8;
src += width;
v = *src;
*ptrr++ = (v & 0xff0000) >> 16;
*ptrg++ = (v & 0xff00) >> 8;
*ptrg++ = (v & 0xff00) >> 8;
src += width;
v = *src;
*ptrr++ = (v & 0xff0000) >> 16;
*ptrg++ = (v & 0xff00) >> 8;
*ptrg++ = (v & 0xff00) >> 8;
src -= width * 3 - 1;
}
#endif
Expand All @@ -4317,9 +4324,8 @@ void CompressEtc2RG8( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_
src += width * 3;
w = 0;
}
*dst++ = ProcessAlpha_ETC2( rg );
*dst++ = ProcessAlpha_ETC2( &rg[16] );
*dst++ = ProcessAlpha_ETC2<false>( rg );
*dst++ = ProcessAlpha_ETC2<false>( &rg[16] );
}
while( --blocks );
}
// -- GODOT end --
8 changes: 4 additions & 4 deletions thirdparty/etcpak/ProcessRGB.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ void CompressEtc1Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_
void CompressEtc1RgbDither( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressEtc2Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width, bool useHeuristics );
void CompressEtc2Rgba( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width, bool useHeuristics );
// -- GODOT start --
void CompressEtc2R8( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressEtc2RG8( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
// -- GODOT end --

void CompressEacR( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressEacRg( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );

#endif
Loading

0 comments on commit f3a5c61

Please sign in to comment.