Animation optimizations (#571)

* debug: Tweak size of level panel * asset: Minor renaming in gltf loader * scene: Store animation times normalized * scene: Store anim times as 16 bits * core: Add some 16 bit SIMD utils * geo: Minor optimization to slerp routine * scene: Minor tweaks to anim_find_frame * scene: Increase max number of created footstep decals * asset: Truncate anim channels with identical keys * assets: Prune identity scale anim channels * asset: Remove redundant animation frames
BastianBlokland · Sep 3, 2023 · 66542d2 · 66542d2
1 parent d03518b
commit 66542d2
Show file tree

Hide file tree

Showing 8 changed files with 180 additions and 56 deletions.
diff --git a/libs/asset/include/asset_mesh.h b/libs/asset/include/asset_mesh.h
@@ -44,7 +44,7 @@ typedef enum {
 
 typedef struct {
   u32              frameCount;
-  AssetMeshAnimPtr timeData;  // f32[frameCount].
+  AssetMeshAnimPtr timeData;  // u16[frameCount] (normalized, fractions of the anim duration).
   AssetMeshAnimPtr valueData; // (GeoVector | GeoQuat)[frameCount].
 } AssetMeshAnimChannel;
 

diff --git a/libs/asset/src/loader_mesh_gltf.c b/libs/asset/src/loader_mesh_gltf.c
@@ -35,6 +35,8 @@
  */
 #define gltf_skinned_bounds_mult 3.0f
 
+#define gltf_eq_threshold 1e-2f
+
 typedef enum {
   GltfLoadPhase_BuffersAcquire,
   GltfLoadPhase_BuffersWait,
@@ -414,7 +416,7 @@ static AssetMeshAnimPtr gltf_anim_data_push_trans(GltfLoad* ld, const GltfTransf
   return res;
 }
 
-static AssetMeshAnimPtr gltf_anim_data_push_access(GltfLoad* ld, const u32 acc) {
+MAYBE_UNUSED static AssetMeshAnimPtr gltf_anim_data_push_access(GltfLoad* ld, const u32 acc) {
   const u32 elemSize         = gltf_comp_size(ld->access[acc].compType) * ld->access[acc].compCount;
   const AssetMeshAnimPtr res = gltf_anim_data_begin(ld, bits_nextpow2(elemSize));
   const Mem accessorMem = mem_create(ld->access[acc].data_raw, elemSize * ld->access[acc].count);
@@ -459,6 +461,19 @@ static AssetMeshAnimPtr gltf_anim_data_push_access_mat(GltfLoad* ld, const u32 a
   return res;
 }
 
+static AssetMeshAnimPtr
+gltf_anim_data_push_access_norm16(GltfLoad* ld, const u32 acc, const f32 refValue) {
+  diag_assert(ld->access[acc].compType == GltfType_f32);
+  diag_assert(ld->access[acc].compCount == 1);
+
+  const AssetMeshAnimPtr res = gltf_anim_data_begin(ld, alignof(u16));
+  for (u32 i = 0; i != ld->access[acc].count; ++i) {
+    const f32 valNorm                                    = ld->access[acc].data_f32[i] / refValue;
+    *(u16*)dynarray_push(&ld->animData, sizeof(u16)).ptr = (u16)(valNorm * u16_max);
+  }
+  return res;
+}
+
 static bool gltf_accessor_check(const String typeString, u32* outCompCount) {
   if (string_eq(typeString, string_lit("SCALAR"))) {
     *outCompCount = 1;
@@ -1143,23 +1158,65 @@ static bool gltf_skeleton_is_topologically_sorted(GltfLoad* ld) {
   return true;
 }
 
-static void gltf_optimize_anim_channel(
-    GltfLoad* ld, AssetMeshAnimChannel* ch, const AssetMeshAnimTarget target) {
+static void gltf_process_remove_frame(GltfLoad* ld, AssetMeshAnimChannel* ch, const u32 frame) {
+  const usize toMove = --ch->frameCount - frame;
+  if (toMove) {
+    // Move time data.
+    {
+      const usize size = sizeof(u16);
+      const Mem   dst  = dynarray_at(&ld->animData, ch->timeData + frame * size, toMove * size);
+      const Mem src = dynarray_at(&ld->animData, ch->timeData + (frame + 1) * size, toMove * size);
+      mem_move(dst, src);
+    }
+    // Move value data.
+    {
+      const usize size = sizeof(GeoVector);
+      const Mem   dst  = dynarray_at(&ld->animData, ch->valueData + frame * size, toMove * size);
+      const Mem src = dynarray_at(&ld->animData, ch->valueData + (frame + 1) * size, toMove * size);
+      mem_move(dst, src);
+    }
+  }
+}
 
-  /**
-   * If a channel consist of only two frames and both are identical we can skip the interpolation.
-   */
+static void gltf_process_anim_channel(
+    GltfLoad* ld, AssetMeshAnimChannel* ch, const AssetMeshAnimTarget target) {
 
   typedef bool (*EqFunc)(GeoVector, GeoVector, f32);
   const EqFunc eq = target == AssetMeshAnimTarget_Rotation ? geo_vector_equal : geo_vector_equal3;
 
+  /**
+   * If a channel consists of all identical frames we can skip the interpolation.
+   * TODO: Instead of just truncating the frame count we should avoid including data for the removed
+   * frames at all.
+   */
   GeoVector* data = dynarray_at(&ld->animData, ch->valueData, sizeof(GeoVector)).ptr;
-  if (ch->frameCount == 2 && eq(data[0], data[1], 1e-4f)) {
-    ch->frameCount = 1;
+  if (ch->frameCount > 1) {
+    bool allEq = true;
+    for (u32 i = 1; i != ch->frameCount; ++i) {
+      if (!eq(data[0], data[i], gltf_eq_threshold)) {
+        allEq = false;
+        break;
+      }
+    }
+    if (allEq) {
+      ch->frameCount = 1;
+    }
+  }
+
+  /**
+   * Remove redundant frames (frames that are the same as the previous and the next).
+   */
+  if (ch->frameCount > 2) {
+    for (u32 i = 1; i < (ch->frameCount - 1); ++i) {
+      if (eq(data[i], data[i - 1], gltf_eq_threshold) &&
+          eq(data[i], data[i + 1], gltf_eq_threshold)) {
+        gltf_process_remove_frame(ld, ch, i);
+      }
+    }
   }
 }
 
-static void gltf_optimize_anim_channel_rot(GltfLoad* ld, const AssetMeshAnimChannel* ch) {
+static void gltf_process_anim_channel_rot(GltfLoad* ld, const AssetMeshAnimChannel* ch) {
   GeoQuat* rotPoses = dynarray_at(&ld->animData, ch->valueData, sizeof(GeoQuat)).ptr;
 
   /**
@@ -1176,6 +1233,24 @@ static void gltf_optimize_anim_channel_rot(GltfLoad* ld, const AssetMeshAnimChan
   }
 }
 
+static bool gtlf_process_any_joint_scaled(GltfLoad* ld, const AssetMeshAnim* anims) {
+  static const GeoVector g_one = {.x = 1, .y = 1, .z = 1};
+
+  for (u32 animIndex = 0; animIndex != ld->animCount; ++animIndex) {
+    for (u32 jointIndex = 0; jointIndex != ld->jointCount; ++jointIndex) {
+      const AssetMeshAnimTarget   tgt = AssetMeshAnimTarget_Scale;
+      const AssetMeshAnimChannel* ch  = &anims[animIndex].joints[jointIndex][tgt];
+      const GeoVector* data = dynarray_at(&ld->animData, ch->valueData, sizeof(GeoVector)).ptr;
+      for (u32 frame = 0; frame != ch->frameCount; ++frame) {
+        if (!geo_vector_equal3(data[frame], g_one, gltf_eq_threshold)) {
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+
 static void gltf_build_skeleton(GltfLoad* ld, AssetMeshSkeletonComp* out, GltfError* err) {
   diag_assert(ld->jointCount);
 
@@ -1249,13 +1324,13 @@ static void gltf_build_skeleton(GltfLoad* ld, AssetMeshSkeletonComp* out, GltfEr
 
           *resChannel = (AssetMeshAnimChannel){
               .frameCount = ld->access[srcChannel->accInput].count,
-              .timeData   = gltf_anim_data_push_access(ld, srcChannel->accInput),
+              .timeData   = gltf_anim_data_push_access_norm16(ld, srcChannel->accInput, duration),
               .valueData  = gltf_anim_data_push_access_vec(ld, srcChannel->accOutput),
           };
-          gltf_optimize_anim_channel(ld, resChannel, target);
           if (target == AssetMeshAnimTarget_Rotation) {
-            gltf_optimize_anim_channel_rot(ld, resChannel);
+            gltf_process_anim_channel_rot(ld, resChannel);
           }
+          gltf_process_anim_channel(ld, resChannel, target);
         } else {
           *resChannel = (AssetMeshAnimChannel){0};
         }
@@ -1264,6 +1339,16 @@ static void gltf_build_skeleton(GltfLoad* ld, AssetMeshSkeletonComp* out, GltfEr
     resAnims[animIndex].duration = duration;
   }
 
+  // Remove all scale channels if all of the channels use the identity scale.
+  // TODO: Instead of truncating the frameCount to zero we should skip the all the channel data.
+  if (!gtlf_process_any_joint_scaled(ld, resAnims)) {
+    for (u32 animIndex = 0; animIndex != ld->animCount; ++animIndex) {
+      for (u32 jointIndex = 0; jointIndex != ld->jointCount; ++jointIndex) {
+        resAnims[animIndex].joints[jointIndex][AssetMeshAnimTarget_Scale].frameCount = 0;
+      }
+    }
+  }
+
   // Create the default pose output.
   AssetMeshAnimPtr resDefaultPose = gltf_anim_data_begin(ld, alignof(GeoVector));
   for (const GltfJoint* joint = ld->joints; joint != ld->joints + ld->jointCount; ++joint) {

diff --git a/libs/core/include/core_simd.h b/libs/core/include/core_simd.h
@@ -28,6 +28,14 @@ MAYBE_UNUSED INLINE_HINT static SimdVec simd_vec_load(const f32 values[PARAM_ARR
   return _mm_load_ps(values);
 }
 
+/**
+ * Load 8 (128 bit aligned) u16 values into a Simd vector.
+ * Pre-condition: bits_aligned_ptr(values, 16)
+ */
+MAYBE_UNUSED INLINE_HINT static SimdVec simd_vec_load_u16(const u16 values[PARAM_ARRAY_SIZE(8)]) {
+  return _mm_load_ps((const f32*)values);
+}
+
 /**
  * Store a Simd vector to 4 (128 bit aligned) float values.
  * Pre-condition: bits_aligned_ptr(values, 16)
@@ -50,6 +58,10 @@ MAYBE_UNUSED INLINE_HINT static SimdVec simd_vec_broadcast(const f32 value) {
   return _mm_set1_ps(value);
 }
 
+MAYBE_UNUSED INLINE_HINT static SimdVec simd_vec_broadcast_u16(const u16 value) {
+  return _mm_castsi128_ps(_mm_set1_epi16(value));
+}
+
 MAYBE_UNUSED INLINE_HINT static SimdVec simd_vec_sign_mask(void) {
   return simd_vec_set(-0.0f, -0.0f, -0.0f, -0.0f);
 }
@@ -113,20 +125,28 @@ MAYBE_UNUSED INLINE_HINT static SimdVec simd_vec_and(const SimdVec a, const Simd
   return _mm_and_ps(a, b);
 }
 
+MAYBE_UNUSED INLINE_HINT static u32 simd_vec_mask_u32(const SimdVec a) {
+  return _mm_movemask_ps(a);
+}
+
+MAYBE_UNUSED INLINE_HINT static u32 simd_vec_mask_u8(const SimdVec a) {
+  return _mm_movemask_epi8(_mm_castps_si128(a));
+}
+
 MAYBE_UNUSED INLINE_HINT static bool simd_vec_any_true(const SimdVec a) {
-  return _mm_movemask_ps(a) != 0b0000;
+  return simd_vec_mask_u32(a) != 0b0000;
 }
 
 MAYBE_UNUSED INLINE_HINT static bool simd_vec_any_false(const SimdVec a) {
-  return _mm_movemask_ps(a) != 0b1111;
+  return simd_vec_mask_u32(a) != 0b1111;
 }
 
 MAYBE_UNUSED INLINE_HINT static bool simd_vec_all_true(const SimdVec a) {
-  return _mm_movemask_ps(a) == 0b1111;
+  return simd_vec_mask_u32(a) == 0b1111;
 }
 
 MAYBE_UNUSED INLINE_HINT static bool simd_vec_all_false(const SimdVec a) {
-  return _mm_movemask_ps(a) == 0b0000;
+  return simd_vec_mask_u32(a) == 0b0000;
 }
 
 MAYBE_UNUSED INLINE_HINT static SimdVec

diff --git a/libs/debug/src/level.c b/libs/debug/src/level.c
@@ -121,7 +121,7 @@ static void level_panel_draw(UiCanvasComp* canvas, DebugLevelContext* ctx, EcsVi
   }
 
   UiTable table = ui_table(.spacing = ui_vector(10, 5));
-  ui_table_add_column(&table, UiTableColumn_Fixed, 200);
+  ui_table_add_column(&table, UiTableColumn_Fixed, 275);
   ui_table_add_column(&table, UiTableColumn_Flexible, 0);
 
   ui_table_draw_header(
@@ -264,6 +264,6 @@ EcsEntityId debug_level_panel_open(EcsWorld* world, const EcsEntityId window) {
       .flags       = DebugLevelFlags_Default,
       .idFilter    = dynstring_create(g_alloc_heap, 32),
       .levelAssets = dynarray_create_t(g_alloc_heap, EcsEntityId, 8),
-      .panel       = ui_panel(.position = ui_vector(0.75f, 0.5f), .size = ui_vector(375, 250)));
+      .panel       = ui_panel(.position = ui_vector(0.75f, 0.5f), .size = ui_vector(400, 250)));
   return panelEntity;
 }
diff --git a/libs/geo/src/quat.c b/libs/geo/src/quat.c
@@ -174,9 +174,15 @@ GeoQuat geo_quat_slerp(const GeoQuat a, const GeoQuat b, const f32 t) {
    * https://www.euclideanspace.com/maths/algebra/realNormedAlgebra/quaternions/slerp
    */
 
+#if geo_quat_simd_enable
+  const SimdVec aVec = simd_vec_load(a.comps);
+  const SimdVec bVec = simd_vec_load(b.comps);
+  const f32     dot  = simd_vec_x(simd_vec_dot4(aVec, bVec));
+#else
   const f32 dot = geo_quat_dot(a, b);
-  f32       tA, tB;
+#endif
 
+  f32 tA, tB;
   if (math_abs(dot) < 0.99999f) {
     const f32 x = intrinsic_acos_f32(dot);
     const f32 y = 1.0f / intrinsic_sin_f32(x);
@@ -188,12 +194,21 @@ GeoQuat geo_quat_slerp(const GeoQuat a, const GeoQuat b, const f32 t) {
     tB = t;
   }
 
+#if geo_quat_simd_enable
+  const SimdVec tAVec = simd_vec_broadcast(tA);
+  const SimdVec tBVec = simd_vec_broadcast(tB);
+
+  GeoQuat res;
+  simd_vec_store(simd_vec_add(simd_vec_mul(aVec, tAVec), simd_vec_mul(bVec, tBVec)), res.comps);
+  return res;
+#else
   return (GeoQuat){
       a.x * tA + b.x * tB,
       a.y * tA + b.y * tB,
       a.z * tA + b.z * tB,
       a.w * tA + b.w * tB,
   };
+#endif
 }
 
 bool geo_quat_towards(GeoQuat* q, const GeoQuat target, const f32 maxAngle) {

diff --git a/libs/scene/src/footstep.c b/libs/scene/src/footstep.c
@@ -13,7 +13,7 @@
 
 #define scene_footstep_lift_threshold 0.05f
 #define scene_footstep_decal_lifetime time_seconds(2)
-#define scene_footstep_max_per_tick 100
+#define scene_footstep_max_per_tick 150
 
 ASSERT(scene_footstep_feet_max <= 8, "Feet state needs to be representable with 8 bits")