From 0f434832369668cef2fee80489f707c965d6ab2d Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 30 Sep 2024 13:43:56 -0700 Subject: [PATCH 1/6] demo: Implement manual boundary locking for clusters While using automatic border locking usually works very well, it prevents simplifying open geometric borders that the source mesh might contain. When a mesh has many edges like this, this negatively affects the DAG quality by locking edges contained within one cluster group. Instead, we can simply mark vertices that are shared by more than one group after a partition and feed that data to simplifier. The effect of this is situational; it's beneficial overall but because the rest of the pipeline has odd issues with partitioning, even when using METIS for everything, sometimes this ends up constraining the build process a little more for unclear reasons. Theoretically this should be strictly better though, and on some meshes like kitten.obj it does measurably help, however it needs fixes for attribute seams to keep it enabled. --- demo/nanite.cpp | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/demo/nanite.cpp b/demo/nanite.cpp index a52a61a39..45e7ca694 100644 --- a/demo/nanite.cpp +++ b/demo/nanite.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -50,6 +51,7 @@ struct Cluster }; const size_t kClusterSize = 128; +const bool kUseLocks = false; static LODBounds bounds(const std::vector& vertices, const std::vector& indices, float error) { @@ -386,15 +388,40 @@ static std::vector > partition(const std::vector& clus return result; } -static std::vector simplify(const std::vector& vertices, const std::vector& indices, size_t target_count, float* error = NULL) +static void lockBoundary(std::vector& locks, const std::vector >& groups, const std::vector& clusters) +{ + std::vector groupmap(locks.size(), -1); + + memset(&locks[0], 0, locks.size()); + + for (size_t i = 0; i < groups.size(); ++i) + for (size_t j = 0; j < groups[i].size(); ++j) + { + const Cluster& cluster = clusters[groups[i][j]]; + + for (size_t k = 0; k < cluster.indices.size(); ++k) + { + unsigned int v = cluster.indices[k]; + + if (groupmap[v] == -1 || groupmap[v] == int(i)) + groupmap[v] = int(i); + else + locks[v] = 1; + } + } +} + +static std::vector simplify(const std::vector& vertices, const std::vector& indices, const std::vector* locks, size_t target_count, float* error = NULL) { if (target_count > indices.size()) return indices; std::vector lod(indices.size()); - unsigned int options = meshopt_SimplifyLockBorder | meshopt_SimplifySparse | meshopt_SimplifyErrorAbsolute; - lod.resize(meshopt_simplify(&lod[0], &indices[0], indices.size(), &vertices[0].px, vertices.size(), sizeof(Vertex), target_count, FLT_MAX, options, error)); - + unsigned int options = meshopt_SimplifySparse | meshopt_SimplifyErrorAbsolute; + if (locks) + lod.resize(meshopt_simplifyWithAttributes(&lod[0], &indices[0], indices.size(), &vertices[0].px, vertices.size(), sizeof(Vertex), NULL, 0, NULL, 0, &(*locks)[0], target_count, FLT_MAX, options, error)); + else + lod.resize(meshopt_simplify(&lod[0], &indices[0], indices.size(), &vertices[0].px, vertices.size(), sizeof(Vertex), target_count, FLT_MAX, options | meshopt_SimplifyLockBorder, error)); return lod; } @@ -431,6 +458,7 @@ void nanite(const std::vector& vertices, const std::vector #endif int depth = 0; + std::vector locks(vertices.size()); // merge and simplify clusters until we can't merge anymore while (pending.size() > 1) @@ -449,6 +477,9 @@ void nanite(const std::vector& vertices, const std::vector if (dump && depth == atoi(dump)) dumpObj(vertices, std::vector()); + if (kUseLocks) + lockBoundary(locks, groups, clusters); + // every group needs to be simplified now for (size_t i = 0; i < groups.size(); ++i) { @@ -479,7 +510,7 @@ void nanite(const std::vector& vertices, const std::vector dumpObj("group", merged); float error = 0.f; - std::vector simplified = simplify(vertices, merged, kClusterSize * 2 * 3, &error); + std::vector simplified = simplify(vertices, merged, kUseLocks ? &locks : NULL, kClusterSize * 2 * 3, &error); if (simplified.size() > merged.size() * 0.85f || simplified.size() > kClusterSize * 3 * 3) { #if TRACE From 43dac0d016c74e524a7184d3c797d96fdd7e8a7b Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Tue, 1 Oct 2024 18:07:28 -0700 Subject: [PATCH 2/6] demo: Add total triangle/cluster counter This is helpful to estimate the memory impact of the full LOD chain and makes it easier to compare stats with UE which reports them in log file. --- demo/nanite.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/demo/nanite.cpp b/demo/nanite.cpp index 45e7ca694..b238195ad 100644 --- a/demo/nanite.cpp +++ b/demo/nanite.cpp @@ -567,11 +567,16 @@ void nanite(const std::vector& vertices, const std::vector pending.insert(pending.end(), retry.begin(), retry.end()); } + size_t total_triangles = 0; size_t lowest_triangles = 0; for (size_t i = 0; i < clusters.size(); ++i) + { + total_triangles += clusters[i].indices.size() / 3; if (clusters[i].parent.error == FLT_MAX) lowest_triangles += clusters[i].indices.size() / 3; + } + printf("total: %d triangles in %d clusters\n", int(total_triangles), int(clusters.size())); printf("lowest lod: %d triangles\n", int(lowest_triangles)); // for testing purposes, we can compute a DAG cut from a given viewpoint and dump it as an OBJ From a7f7198b72ccbae08624e0eb62add5d6e7272d9d Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Tue, 1 Oct 2024 18:35:37 -0700 Subject: [PATCH 3/6] demo: Add individual clusters to debug mesh It is pretty difficult to debug the visualized data with just groups without a sense as to where the cluster boundaries lie; this outputs duplicate geometry but makes it easier to work with in Blender. --- demo/nanite.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/demo/nanite.cpp b/demo/nanite.cpp index b238195ad..f05b9304f 100644 --- a/demo/nanite.cpp +++ b/demo/nanite.cpp @@ -507,7 +507,12 @@ void nanite(const std::vector& vertices, const std::vector merged.insert(merged.end(), clusters[groups[i][j]].indices.begin(), clusters[groups[i][j]].indices.end()); if (dump && depth == atoi(dump)) + { + for (size_t j = 0; j < groups[i].size(); ++j) + dumpObj("cluster", clusters[groups[i][j]].indices); + dumpObj("group", merged); + } float error = 0.f; std::vector simplified = simplify(vertices, merged, kUseLocks ? &locks : NULL, kClusterSize * 2 * 3, &error); From b35474e3298e654130568d50d29c1fbee52e5074 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Tue, 1 Oct 2024 18:51:20 -0700 Subject: [PATCH 4/6] demo: Use position remap for locking and partitioning Previously when we built adjacency between groups we would not create edges between clusters when they shared the position of the vertex with a different attribute. This meant that when the cluster boundary contained a crease, it would not be reflected correctly in the adjacency. Similarly, but more importantly, we need this information when locking the cluster boundary for the same reason; before, lockBoundary could result in gaps because the locking information on the boundary was inconsistent. This allows us to enable kUseLocks by default. --- demo/nanite.cpp | 43 +++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/demo/nanite.cpp b/demo/nanite.cpp index f05b9304f..6ec748104 100644 --- a/demo/nanite.cpp +++ b/demo/nanite.cpp @@ -51,7 +51,7 @@ struct Cluster }; const size_t kClusterSize = 128; -const bool kUseLocks = false; +const bool kUseLocks = true; static LODBounds bounds(const std::vector& vertices, const std::vector& indices, float error) { @@ -276,7 +276,7 @@ static std::vector clusterize(const std::vector& vertices, cons } #ifdef METIS -static std::vector > partitionMetis(const std::vector& clusters, const std::vector& pending) +static std::vector > partitionMetis(const std::vector& clusters, const std::vector& pending, const std::vector& remap) { std::vector > result; @@ -288,8 +288,8 @@ static std::vector > partitionMetis(const std::vector& for (size_t j = 0; j < cluster.indices.size(); ++j) { - int v0 = cluster.indices[j + 0]; - int v1 = cluster.indices[j + (j % 3 == 2 ? -2 : 1)]; + int v0 = remap[cluster.indices[j + 0]]; + int v1 = remap[cluster.indices[j + (j % 3 == 2 ? -2 : 1)]]; std::vector& list = edges[std::make_pair(std::min(v0, v1), std::max(v0, v1))]; if (list.empty() || list.back() != int(i)) @@ -360,14 +360,16 @@ static std::vector > partitionMetis(const std::vector& } #endif -static std::vector > partition(const std::vector& clusters, const std::vector& pending) +static std::vector > partition(const std::vector& clusters, const std::vector& pending, const std::vector& remap) { #ifdef METIS static const char* metis = getenv("METIS"); if (metis && atoi(metis) >= 1) - return partitionMetis(clusters, pending); + return partitionMetis(clusters, pending, remap); #endif + (void)remap; + std::vector > result; size_t last_indices = 0; @@ -388,12 +390,10 @@ static std::vector > partition(const std::vector& clus return result; } -static void lockBoundary(std::vector& locks, const std::vector >& groups, const std::vector& clusters) +static void lockBoundary(std::vector& locks, const std::vector >& groups, const std::vector& clusters, const std::vector& remap) { std::vector groupmap(locks.size(), -1); - memset(&locks[0], 0, locks.size()); - for (size_t i = 0; i < groups.size(); ++i) for (size_t j = 0; j < groups[i].size(); ++j) { @@ -402,13 +402,22 @@ static void lockBoundary(std::vector& locks, const std::vector simplify(const std::vector& vertices, const std::vector& indices, const std::vector* locks, size_t target_count, float* error = NULL) @@ -460,10 +469,16 @@ void nanite(const std::vector& vertices, const std::vector int depth = 0; std::vector locks(vertices.size()); + // for cluster connectivity, we need a position-only remap that maps vertices with the same position to the same index + // it's more efficient to build it once; unfortunately, meshopt_generateVertexRemap doesn't support stride so we need to use *Multi version + std::vector remap(vertices.size()); + meshopt_Stream position = {&vertices[0].px, sizeof(float) * 3, sizeof(Vertex)}; + meshopt_generateVertexRemapMulti(&remap[0], &indices[0], indices.size(), vertices.size(), &position, 1); + // merge and simplify clusters until we can't merge anymore while (pending.size() > 1) { - std::vector > groups = partition(clusters, pending); + std::vector > groups = partition(clusters, pending, remap); pending.clear(); std::vector retry; @@ -478,7 +493,7 @@ void nanite(const std::vector& vertices, const std::vector dumpObj(vertices, std::vector()); if (kUseLocks) - lockBoundary(locks, groups, clusters); + lockBoundary(locks, groups, clusters, remap); // every group needs to be simplified now for (size_t i = 0; i < groups.size(); ++i) From 8a0beac45266571a450935b1b669332b19671efb Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Tue, 1 Oct 2024 19:03:24 -0700 Subject: [PATCH 5/6] demo: Use vertex adjacency for partitioning Instead of using shared edges to provide adjacency, we now use shared vertices; this works more or less the same way but is easier to compute. For some reason this also results in fewer stuck triangles when using full METIS pipeline. --- demo/nanite.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/demo/nanite.cpp b/demo/nanite.cpp index 6ec748104..13f7c2167 100644 --- a/demo/nanite.cpp +++ b/demo/nanite.cpp @@ -279,8 +279,7 @@ static std::vector clusterize(const std::vector& vertices, cons static std::vector > partitionMetis(const std::vector& clusters, const std::vector& pending, const std::vector& remap) { std::vector > result; - - std::map, std::vector > edges; + std::vector > vertices(remap.size()); for (size_t i = 0; i < pending.size(); ++i) { @@ -288,10 +287,9 @@ static std::vector > partitionMetis(const std::vector& for (size_t j = 0; j < cluster.indices.size(); ++j) { - int v0 = remap[cluster.indices[j + 0]]; - int v1 = remap[cluster.indices[j + (j % 3 == 2 ? -2 : 1)]]; + int v = remap[cluster.indices[j]]; - std::vector& list = edges[std::make_pair(std::min(v0, v1), std::max(v0, v1))]; + std::vector& list = vertices[v]; if (list.empty() || list.back() != int(i)) list.push_back(int(i)); } @@ -299,9 +297,9 @@ static std::vector > partitionMetis(const std::vector& std::map, int> adjacency; - for (std::map, std::vector >::iterator it = edges.begin(); it != edges.end(); ++it) + for (size_t v = 0; v < vertices.size(); ++v) { - const std::vector& list = it->second; + const std::vector& list = vertices[v]; for (size_t i = 0; i < list.size(); ++i) for (size_t j = i + 1; j < list.size(); ++j) From a2f74c6a0e8f36db30be6f3b2904135167e9711f Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Tue, 1 Oct 2024 19:45:31 -0700 Subject: [PATCH 6/6] demo: Use a flexible (and larger) cluster group size Using 4 clusters is fairly limiting wrt partition quality, as it demands very careful grouping that METIS is not willing to provide. Instead use a larger group size (8 atm), and adjust the simplification condition to be adaptive to the group size so that partitioning is free to produce groups of varying sizes. --- demo/nanite.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/demo/nanite.cpp b/demo/nanite.cpp index 13f7c2167..868bc1c2a 100644 --- a/demo/nanite.cpp +++ b/demo/nanite.cpp @@ -51,6 +51,7 @@ struct Cluster }; const size_t kClusterSize = 128; +const size_t kGroupSize = 8; const bool kUseLocks = true; static LODBounds bounds(const std::vector& vertices, const std::vector& indices, float error) @@ -335,7 +336,7 @@ static std::vector > partitionMetis(const std::vector& int nvtxs = int(pending.size()); int ncon = 1; - int nparts = int(pending.size() + 3) / 4; + int nparts = int(pending.size() + kGroupSize - 1) / kGroupSize; int edgecut = 0; if (nparts <= 1) @@ -375,7 +376,7 @@ static std::vector > partition(const std::vector& clus // rough merge; while clusters are approximately spatially ordered, this should use a proper partitioning algorithm for (size_t i = 0; i < pending.size(); ++i) { - if (result.empty() || last_indices + clusters[pending[i]].indices.size() > kClusterSize * 4 * 3) + if (result.empty() || last_indices + clusters[pending[i]].indices.size() > kClusterSize * kGroupSize * 3) { result.push_back(std::vector()); last_indices = 0; @@ -527,9 +528,10 @@ void nanite(const std::vector& vertices, const std::vector dumpObj("group", merged); } + size_t target_size = ((groups[i].size() + 1) / 2) * kClusterSize * 3; float error = 0.f; - std::vector simplified = simplify(vertices, merged, kUseLocks ? &locks : NULL, kClusterSize * 2 * 3, &error); - if (simplified.size() > merged.size() * 0.85f || simplified.size() > kClusterSize * 3 * 3) + std::vector simplified = simplify(vertices, merged, kUseLocks ? &locks : NULL, target_size, &error); + if (simplified.size() > merged.size() * 0.85f || simplified.size() / (kClusterSize * 3) >= merged.size() / (kClusterSize * 3)) { #if TRACE printf("stuck cluster: simplified %d => %d over threshold\n", int(merged.size() / 3), int(simplified.size() / 3));