Skip to content

Commit

Permalink
Updated meshoptimizer.
Browse files Browse the repository at this point in the history
  • Loading branch information
bkaradzic committed Aug 31, 2024
1 parent 8fc9516 commit 69eb4a5
Show file tree
Hide file tree
Showing 6 changed files with 287 additions and 97 deletions.
97 changes: 97 additions & 0 deletions 3rdparty/meshoptimizer/src/indexgenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

// This work is based on:
// John McDonald, Mark Kilgard. Crack-Free Point-Normal Triangles using Adjacent Edge Normals. 2010
// John Hable. Variable Rate Shading with Visibility Buffer Rendering. 2024
namespace meshopt
{

Expand Down Expand Up @@ -576,3 +577,99 @@ void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const un
memcpy(destination + i * 4, patch, sizeof(patch));
}
}

size_t meshopt_generateProvokingIndexBuffer(unsigned int* destination, unsigned int* reorder, const unsigned int* indices, size_t index_count, size_t vertex_count)
{
assert(index_count % 3 == 0);

meshopt_Allocator allocator;

unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
memset(remap, -1, vertex_count * sizeof(unsigned int));

// compute vertex valence; this is used to prioritize least used corner
// note: we use 8-bit counters for performance; for outlier vertices the valence is incorrect but that just affects the heuristic
unsigned char* valence = allocator.allocate<unsigned char>(vertex_count);
memset(valence, 0, vertex_count);

for (size_t i = 0; i < index_count; ++i)
{
unsigned int index = indices[i];
assert(index < vertex_count);

valence[index]++;
}

unsigned int reorder_offset = 0;

// assign provoking vertices; leave the rest for the next pass
for (size_t i = 0; i < index_count; i += 3)
{
unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
assert(a < vertex_count && b < vertex_count && c < vertex_count);

// try to rotate triangle such that provoking vertex hasn't been seen before
// if multiple vertices are new, prioritize the one with least valence
// this reduces the risk that a future triangle will have all three vertices seen
unsigned int va = remap[a] == ~0u ? valence[a] : ~0u;
unsigned int vb = remap[b] == ~0u ? valence[b] : ~0u;
unsigned int vc = remap[c] == ~0u ? valence[c] : ~0u;

if (vb != ~0u && vb <= va && vb <= vc)
{
// abc -> bca
unsigned int t = a;
a = b, b = c, c = t;
}
else if (vc != ~0u && vc <= va && vc <= vb)
{
// abc -> cab
unsigned int t = c;
c = b, b = a, a = t;
}

unsigned int newidx = reorder_offset;

// now remap[a] = ~0u or all three vertices are old
// recording remap[a] makes it possible to remap future references to the same index, conserving space
if (remap[a] == ~0u)
remap[a] = newidx;

// we need to clone the provoking vertex to get a unique index
// if all three are used the choice is arbitrary since no future triangle will be able to reuse any of these
reorder[reorder_offset++] = a;

// note: first vertex is final, the other two will be fixed up in next pass
destination[i + 0] = newidx;
destination[i + 1] = b;
destination[i + 2] = c;

// update vertex valences for corner heuristic
valence[a]--;
valence[b]--;
valence[c]--;
}

// remap or clone non-provoking vertices (iterating to skip provoking vertices)
int step = 1;

for (size_t i = 1; i < index_count; i += step, step ^= 3)
{
unsigned int index = destination[i];

if (remap[index] == ~0u)
{
// we haven't seen the vertex before as a provoking vertex
// to maintain the reference to the original vertex we need to clone it
unsigned int newidx = reorder_offset;

remap[index] = newidx;
reorder[reorder_offset++] = index;
}

destination[i] = remap[index];
}

assert(reorder_offset <= vertex_count + index_count / 3);
return reorder_offset;
}
59 changes: 50 additions & 9 deletions 3rdparty/meshoptimizer/src/meshoptimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@
#endif

/* Experimental APIs have unstable interface and might have implementation that's not fully tested or optimized */
#ifndef MESHOPTIMIZER_EXPERIMENTAL
#define MESHOPTIMIZER_EXPERIMENTAL MESHOPTIMIZER_API
#endif

/* C interface */
#ifdef __cplusplus
Expand Down Expand Up @@ -137,6 +139,19 @@ MESHOPTIMIZER_API void meshopt_generateAdjacencyIndexBuffer(unsigned int* destin
*/
MESHOPTIMIZER_API void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);

/**
* Experimental: Generate index buffer that can be used for visibility buffer rendering and returns the size of the reorder table
* Each triangle's provoking vertex index is equal to primitive id; this allows passing it to the fragment shader using nointerpolate attribute.
* This is important for performance on hardware where primitive id can't be accessed efficiently in fragment shader.
* The reorder table stores the original vertex id for each vertex in the new index buffer, and should be used in the vertex shader to load vertex data.
* The provoking vertex is assumed to be the first vertex in the triangle; if this is not the case (OpenGL), rotate each triangle (abc -> bca) before rendering.
* For maximum efficiency the input index buffer should be optimized for vertex cache first.
*
* destination must contain enough space for the resulting index buffer (index_count elements)
* reorder must contain enough space for the worst case reorder table (vertex_count + index_count/3 elements)
*/
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_generateProvokingIndexBuffer(unsigned int* destination, unsigned int* reorder, const unsigned int* indices, size_t index_count, size_t vertex_count);

/**
* Vertex transform cache optimizer
* Reorders indices to reduce the number of GPU vertex shader invocations
Expand Down Expand Up @@ -340,7 +355,7 @@ enum
* Mesh simplifier
* Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible
* The algorithm tries to preserve mesh topology and can stop short of the target goal based on topology constraints or target error.
* If not all attributes from the input mesh are required, it's recommended to reindex the mesh using meshopt_generateShadowIndexBuffer prior to simplification.
* If not all attributes from the input mesh are required, it's recommended to reindex the mesh without them prior to simplification.
* Returns the number of indices after simplification, with destination containing new index data
* The resulting index buffer references vertices from the original vertex buffer.
* If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
Expand All @@ -360,9 +375,8 @@ MESHOPTIMIZER_API size_t meshopt_simplify(unsigned int* destination, const unsig
*
* vertex_attributes should have attribute_count floats for each vertex
* attribute_weights should have attribute_count floats in total; the weights determine relative priority of attributes between each other and wrt position. The recommended weight range is [1e-3..1e-1], assuming attribute data is in [0..1] range.
* attribute_count must be <= 16
* attribute_count must be <= 32
* vertex_lock can be NULL; when it's not NULL, it should have a value for each vertex; 1 denotes vertices that can't be moved
* TODO target_error/result_error currently use combined distance+attribute error; this may change in the future
*/
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* result_error);

Expand Down Expand Up @@ -469,6 +483,13 @@ struct meshopt_VertexFetchStatistics
*/
MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size);

/**
* Meshlet is a small mesh cluster (subset) that consists of:
* - triangles, an 8-bit micro triangle (index) buffer, that for each triangle specifies three local vertices to use;
* - vertices, a 32-bit vertex indirection buffer, that for each local vertex specifies which mesh vertex to fetch vertex attributes from.
*
* For efficiency, meshlet triangles and vertices are packed into two large arrays; this structure contains offsets and counts to access the data.
*/
struct meshopt_Meshlet
{
/* offsets within meshlet_vertices and meshlet_triangles arrays with meshlet data */
Expand All @@ -484,6 +505,7 @@ struct meshopt_Meshlet
* Meshlet builder
* Splits the mesh into a set of meshlets where each meshlet has a micro index buffer indexing into meshlet vertices that refer to the original vertex buffer
* The resulting data can be used to render meshes using NVidia programmable mesh shading pipeline, or in other cluster-based renderers.
* When targeting mesh shading hardware, for maximum efficiency meshlets should be further optimized using meshopt_optimizeMeshlet.
* When using buildMeshlets, vertex positions need to be provided to minimize the size of the resulting clusters.
* When using buildMeshletsScan, for maximum efficiency the index buffer being converted has to be optimized for vertex cache first.
*
Expand Down Expand Up @@ -544,7 +566,8 @@ struct meshopt_Bounds
* Real-Time Rendering 4th Edition, section 19.3).
*
* vertex_positions should have float3 position in the first 12 bytes of each vertex
* index_count/3 should be less than or equal to 512 (the function assumes clusters of limited size)
* vertex_count should specify the number of vertices in the entire mesh, not cluster or meshlet
* index_count/3 and triangle_count must not exceed implementation limits (<= 512)
*/
MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
Expand Down Expand Up @@ -642,6 +665,8 @@ inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indice
template <typename T>
inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
template <typename T>
inline size_t meshopt_generateProvokingIndexBuffer(T* destination, unsigned int* reorder, const T* indices, size_t index_count, size_t vertex_count);
template <typename T>
inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count);
template <typename T>
inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count);
Expand Down Expand Up @@ -727,8 +752,8 @@ class meshopt_Allocator
typedef StorageT<void> Storage;

meshopt_Allocator()
: blocks()
, count(0)
: blocks()
, count(0)
{
}

Expand All @@ -738,7 +763,8 @@ class meshopt_Allocator
Storage::deallocate(blocks[i - 1]);
}

template <typename T> T* allocate(size_t size)
template <typename T>
T* allocate(size_t size)
{
assert(count < sizeof(blocks) / sizeof(blocks[0]));
T* result = static_cast<T*>(Storage::allocate(size > size_t(-1) / sizeof(T) ? size_t(-1) : size * sizeof(T)));
Expand All @@ -759,8 +785,10 @@ class meshopt_Allocator
};

// This makes sure that allocate/deallocate are lazily generated in translation units that need them and are deduplicated by the linker
template <typename T> void* (MESHOPTIMIZER_ALLOC_CALLCONV *meshopt_Allocator::StorageT<T>::allocate)(size_t) = operator new;
template <typename T> void (MESHOPTIMIZER_ALLOC_CALLCONV *meshopt_Allocator::StorageT<T>::deallocate)(void*) = operator delete;
template <typename T>
void* (MESHOPTIMIZER_ALLOC_CALLCONV *meshopt_Allocator::StorageT<T>::allocate)(size_t) = operator new;
template <typename T>
void (MESHOPTIMIZER_ALLOC_CALLCONV *meshopt_Allocator::StorageT<T>::deallocate)(void*) = operator delete;
#endif

/* Inline implementation for C++ templated wrappers */
Expand Down Expand Up @@ -875,6 +903,19 @@ inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* ind
meshopt_generateTessellationIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
}

template <typename T>
inline size_t meshopt_generateProvokingIndexBuffer(T* destination, unsigned int* reorder, const T* indices, size_t index_count, size_t vertex_count)
{
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
meshopt_IndexAdapter<T> out(destination, NULL, index_count);

size_t bound = vertex_count + (index_count / 3);
assert(size_t(T(bound - 1)) == bound - 1); // bound - 1 must fit in T
(void)bound;

return meshopt_generateProvokingIndexBuffer(out.data, reorder, in.data, index_count, vertex_count);
}

template <typename T>
inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count)
{
Expand Down
Loading

0 comments on commit 69eb4a5

Please sign in to comment.