Skip to content

Commit

Permalink
Animation optimizations (#571)
Browse files Browse the repository at this point in the history
* debug: Tweak size of level panel

* asset: Minor renaming in gltf loader

* scene: Store animation times normalized

* scene: Store anim times as 16 bits

* core: Add some 16 bit SIMD utils

* geo: Minor optimization to slerp routine

* scene: Minor tweaks to anim_find_frame

* scene: Increase max number of created footstep decals

* asset: Truncate anim channels with identical keys

* assets: Prune identity scale anim channels

* asset: Remove redundant animation frames
  • Loading branch information
BastianBlokland authored Sep 3, 2023
1 parent d03518b commit 66542d2
Show file tree
Hide file tree
Showing 8 changed files with 180 additions and 56 deletions.
2 changes: 1 addition & 1 deletion libs/asset/include/asset_mesh.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ typedef enum {

typedef struct {
u32 frameCount;
AssetMeshAnimPtr timeData; // f32[frameCount].
AssetMeshAnimPtr timeData; // u16[frameCount] (normalized, fractions of the anim duration).
AssetMeshAnimPtr valueData; // (GeoVector | GeoQuat)[frameCount].
} AssetMeshAnimChannel;

Expand Down
109 changes: 97 additions & 12 deletions libs/asset/src/loader_mesh_gltf.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
*/
#define gltf_skinned_bounds_mult 3.0f

#define gltf_eq_threshold 1e-2f

typedef enum {
GltfLoadPhase_BuffersAcquire,
GltfLoadPhase_BuffersWait,
Expand Down Expand Up @@ -414,7 +416,7 @@ static AssetMeshAnimPtr gltf_anim_data_push_trans(GltfLoad* ld, const GltfTransf
return res;
}

static AssetMeshAnimPtr gltf_anim_data_push_access(GltfLoad* ld, const u32 acc) {
MAYBE_UNUSED static AssetMeshAnimPtr gltf_anim_data_push_access(GltfLoad* ld, const u32 acc) {
const u32 elemSize = gltf_comp_size(ld->access[acc].compType) * ld->access[acc].compCount;
const AssetMeshAnimPtr res = gltf_anim_data_begin(ld, bits_nextpow2(elemSize));
const Mem accessorMem = mem_create(ld->access[acc].data_raw, elemSize * ld->access[acc].count);
Expand Down Expand Up @@ -459,6 +461,19 @@ static AssetMeshAnimPtr gltf_anim_data_push_access_mat(GltfLoad* ld, const u32 a
return res;
}

static AssetMeshAnimPtr
gltf_anim_data_push_access_norm16(GltfLoad* ld, const u32 acc, const f32 refValue) {
diag_assert(ld->access[acc].compType == GltfType_f32);
diag_assert(ld->access[acc].compCount == 1);

const AssetMeshAnimPtr res = gltf_anim_data_begin(ld, alignof(u16));
for (u32 i = 0; i != ld->access[acc].count; ++i) {
const f32 valNorm = ld->access[acc].data_f32[i] / refValue;
*(u16*)dynarray_push(&ld->animData, sizeof(u16)).ptr = (u16)(valNorm * u16_max);
}
return res;
}

static bool gltf_accessor_check(const String typeString, u32* outCompCount) {
if (string_eq(typeString, string_lit("SCALAR"))) {
*outCompCount = 1;
Expand Down Expand Up @@ -1143,23 +1158,65 @@ static bool gltf_skeleton_is_topologically_sorted(GltfLoad* ld) {
return true;
}

static void gltf_optimize_anim_channel(
GltfLoad* ld, AssetMeshAnimChannel* ch, const AssetMeshAnimTarget target) {
static void gltf_process_remove_frame(GltfLoad* ld, AssetMeshAnimChannel* ch, const u32 frame) {
const usize toMove = --ch->frameCount - frame;
if (toMove) {
// Move time data.
{
const usize size = sizeof(u16);
const Mem dst = dynarray_at(&ld->animData, ch->timeData + frame * size, toMove * size);
const Mem src = dynarray_at(&ld->animData, ch->timeData + (frame + 1) * size, toMove * size);
mem_move(dst, src);
}
// Move value data.
{
const usize size = sizeof(GeoVector);
const Mem dst = dynarray_at(&ld->animData, ch->valueData + frame * size, toMove * size);
const Mem src = dynarray_at(&ld->animData, ch->valueData + (frame + 1) * size, toMove * size);
mem_move(dst, src);
}
}
}

/**
* If a channel consist of only two frames and both are identical we can skip the interpolation.
*/
static void gltf_process_anim_channel(
GltfLoad* ld, AssetMeshAnimChannel* ch, const AssetMeshAnimTarget target) {

typedef bool (*EqFunc)(GeoVector, GeoVector, f32);
const EqFunc eq = target == AssetMeshAnimTarget_Rotation ? geo_vector_equal : geo_vector_equal3;

/**
* If a channel consists of all identical frames we can skip the interpolation.
* TODO: Instead of just truncating the frame count we should avoid including data for the removed
* frames at all.
*/
GeoVector* data = dynarray_at(&ld->animData, ch->valueData, sizeof(GeoVector)).ptr;
if (ch->frameCount == 2 && eq(data[0], data[1], 1e-4f)) {
ch->frameCount = 1;
if (ch->frameCount > 1) {
bool allEq = true;
for (u32 i = 1; i != ch->frameCount; ++i) {
if (!eq(data[0], data[i], gltf_eq_threshold)) {
allEq = false;
break;
}
}
if (allEq) {
ch->frameCount = 1;
}
}

/**
* Remove redundant frames (frames that are the same as the previous and the next).
*/
if (ch->frameCount > 2) {
for (u32 i = 1; i < (ch->frameCount - 1); ++i) {
if (eq(data[i], data[i - 1], gltf_eq_threshold) &&
eq(data[i], data[i + 1], gltf_eq_threshold)) {
gltf_process_remove_frame(ld, ch, i);
}
}
}
}

static void gltf_optimize_anim_channel_rot(GltfLoad* ld, const AssetMeshAnimChannel* ch) {
static void gltf_process_anim_channel_rot(GltfLoad* ld, const AssetMeshAnimChannel* ch) {
GeoQuat* rotPoses = dynarray_at(&ld->animData, ch->valueData, sizeof(GeoQuat)).ptr;

/**
Expand All @@ -1176,6 +1233,24 @@ static void gltf_optimize_anim_channel_rot(GltfLoad* ld, const AssetMeshAnimChan
}
}

static bool gtlf_process_any_joint_scaled(GltfLoad* ld, const AssetMeshAnim* anims) {
static const GeoVector g_one = {.x = 1, .y = 1, .z = 1};

for (u32 animIndex = 0; animIndex != ld->animCount; ++animIndex) {
for (u32 jointIndex = 0; jointIndex != ld->jointCount; ++jointIndex) {
const AssetMeshAnimTarget tgt = AssetMeshAnimTarget_Scale;
const AssetMeshAnimChannel* ch = &anims[animIndex].joints[jointIndex][tgt];
const GeoVector* data = dynarray_at(&ld->animData, ch->valueData, sizeof(GeoVector)).ptr;
for (u32 frame = 0; frame != ch->frameCount; ++frame) {
if (!geo_vector_equal3(data[frame], g_one, gltf_eq_threshold)) {
return true;
}
}
}
}
return false;
}

static void gltf_build_skeleton(GltfLoad* ld, AssetMeshSkeletonComp* out, GltfError* err) {
diag_assert(ld->jointCount);

Expand Down Expand Up @@ -1249,13 +1324,13 @@ static void gltf_build_skeleton(GltfLoad* ld, AssetMeshSkeletonComp* out, GltfEr

*resChannel = (AssetMeshAnimChannel){
.frameCount = ld->access[srcChannel->accInput].count,
.timeData = gltf_anim_data_push_access(ld, srcChannel->accInput),
.timeData = gltf_anim_data_push_access_norm16(ld, srcChannel->accInput, duration),
.valueData = gltf_anim_data_push_access_vec(ld, srcChannel->accOutput),
};
gltf_optimize_anim_channel(ld, resChannel, target);
if (target == AssetMeshAnimTarget_Rotation) {
gltf_optimize_anim_channel_rot(ld, resChannel);
gltf_process_anim_channel_rot(ld, resChannel);
}
gltf_process_anim_channel(ld, resChannel, target);
} else {
*resChannel = (AssetMeshAnimChannel){0};
}
Expand All @@ -1264,6 +1339,16 @@ static void gltf_build_skeleton(GltfLoad* ld, AssetMeshSkeletonComp* out, GltfEr
resAnims[animIndex].duration = duration;
}

// Remove all scale channels if all of the channels use the identity scale.
// TODO: Instead of truncating the frameCount to zero we should skip the all the channel data.
if (!gtlf_process_any_joint_scaled(ld, resAnims)) {
for (u32 animIndex = 0; animIndex != ld->animCount; ++animIndex) {
for (u32 jointIndex = 0; jointIndex != ld->jointCount; ++jointIndex) {
resAnims[animIndex].joints[jointIndex][AssetMeshAnimTarget_Scale].frameCount = 0;
}
}
}

// Create the default pose output.
AssetMeshAnimPtr resDefaultPose = gltf_anim_data_begin(ld, alignof(GeoVector));
for (const GltfJoint* joint = ld->joints; joint != ld->joints + ld->jointCount; ++joint) {
Expand Down
28 changes: 24 additions & 4 deletions libs/core/include/core_simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ MAYBE_UNUSED INLINE_HINT static SimdVec simd_vec_load(const f32 values[PARAM_ARR
return _mm_load_ps(values);
}

/**
* Load 8 (128 bit aligned) u16 values into a Simd vector.
* Pre-condition: bits_aligned_ptr(values, 16)
*/
MAYBE_UNUSED INLINE_HINT static SimdVec simd_vec_load_u16(const u16 values[PARAM_ARRAY_SIZE(8)]) {
return _mm_load_ps((const f32*)values);
}

/**
* Store a Simd vector to 4 (128 bit aligned) float values.
* Pre-condition: bits_aligned_ptr(values, 16)
Expand All @@ -50,6 +58,10 @@ MAYBE_UNUSED INLINE_HINT static SimdVec simd_vec_broadcast(const f32 value) {
return _mm_set1_ps(value);
}

MAYBE_UNUSED INLINE_HINT static SimdVec simd_vec_broadcast_u16(const u16 value) {
return _mm_castsi128_ps(_mm_set1_epi16(value));
}

MAYBE_UNUSED INLINE_HINT static SimdVec simd_vec_sign_mask(void) {
return simd_vec_set(-0.0f, -0.0f, -0.0f, -0.0f);
}
Expand Down Expand Up @@ -113,20 +125,28 @@ MAYBE_UNUSED INLINE_HINT static SimdVec simd_vec_and(const SimdVec a, const Simd
return _mm_and_ps(a, b);
}

MAYBE_UNUSED INLINE_HINT static u32 simd_vec_mask_u32(const SimdVec a) {
return _mm_movemask_ps(a);
}

MAYBE_UNUSED INLINE_HINT static u32 simd_vec_mask_u8(const SimdVec a) {
return _mm_movemask_epi8(_mm_castps_si128(a));
}

MAYBE_UNUSED INLINE_HINT static bool simd_vec_any_true(const SimdVec a) {
return _mm_movemask_ps(a) != 0b0000;
return simd_vec_mask_u32(a) != 0b0000;
}

MAYBE_UNUSED INLINE_HINT static bool simd_vec_any_false(const SimdVec a) {
return _mm_movemask_ps(a) != 0b1111;
return simd_vec_mask_u32(a) != 0b1111;
}

MAYBE_UNUSED INLINE_HINT static bool simd_vec_all_true(const SimdVec a) {
return _mm_movemask_ps(a) == 0b1111;
return simd_vec_mask_u32(a) == 0b1111;
}

MAYBE_UNUSED INLINE_HINT static bool simd_vec_all_false(const SimdVec a) {
return _mm_movemask_ps(a) == 0b0000;
return simd_vec_mask_u32(a) == 0b0000;
}

MAYBE_UNUSED INLINE_HINT static SimdVec
Expand Down
4 changes: 2 additions & 2 deletions libs/debug/src/level.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ static void level_panel_draw(UiCanvasComp* canvas, DebugLevelContext* ctx, EcsVi
}

UiTable table = ui_table(.spacing = ui_vector(10, 5));
ui_table_add_column(&table, UiTableColumn_Fixed, 200);
ui_table_add_column(&table, UiTableColumn_Fixed, 275);
ui_table_add_column(&table, UiTableColumn_Flexible, 0);

ui_table_draw_header(
Expand Down Expand Up @@ -264,6 +264,6 @@ EcsEntityId debug_level_panel_open(EcsWorld* world, const EcsEntityId window) {
.flags = DebugLevelFlags_Default,
.idFilter = dynstring_create(g_alloc_heap, 32),
.levelAssets = dynarray_create_t(g_alloc_heap, EcsEntityId, 8),
.panel = ui_panel(.position = ui_vector(0.75f, 0.5f), .size = ui_vector(375, 250)));
.panel = ui_panel(.position = ui_vector(0.75f, 0.5f), .size = ui_vector(400, 250)));
return panelEntity;
}
17 changes: 16 additions & 1 deletion libs/geo/src/quat.c
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,15 @@ GeoQuat geo_quat_slerp(const GeoQuat a, const GeoQuat b, const f32 t) {
* https://www.euclideanspace.com/maths/algebra/realNormedAlgebra/quaternions/slerp
*/

#if geo_quat_simd_enable
const SimdVec aVec = simd_vec_load(a.comps);
const SimdVec bVec = simd_vec_load(b.comps);
const f32 dot = simd_vec_x(simd_vec_dot4(aVec, bVec));
#else
const f32 dot = geo_quat_dot(a, b);
f32 tA, tB;
#endif

f32 tA, tB;
if (math_abs(dot) < 0.99999f) {
const f32 x = intrinsic_acos_f32(dot);
const f32 y = 1.0f / intrinsic_sin_f32(x);
Expand All @@ -188,12 +194,21 @@ GeoQuat geo_quat_slerp(const GeoQuat a, const GeoQuat b, const f32 t) {
tB = t;
}

#if geo_quat_simd_enable
const SimdVec tAVec = simd_vec_broadcast(tA);
const SimdVec tBVec = simd_vec_broadcast(tB);

GeoQuat res;
simd_vec_store(simd_vec_add(simd_vec_mul(aVec, tAVec), simd_vec_mul(bVec, tBVec)), res.comps);
return res;
#else
return (GeoQuat){
a.x * tA + b.x * tB,
a.y * tA + b.y * tB,
a.z * tA + b.z * tB,
a.w * tA + b.w * tB,
};
#endif
}

bool geo_quat_towards(GeoQuat* q, const GeoQuat target, const f32 maxAngle) {
Expand Down
2 changes: 1 addition & 1 deletion libs/scene/src/footstep.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

#define scene_footstep_lift_threshold 0.05f
#define scene_footstep_decal_lifetime time_seconds(2)
#define scene_footstep_max_per_tick 100
#define scene_footstep_max_per_tick 150

ASSERT(scene_footstep_feet_max <= 8, "Feet state needs to be representable with 8 bits")

Expand Down
Loading

0 comments on commit 66542d2

Please sign in to comment.