From 39a95e24c352cc3bc2da9c76fdef3a2d03699434 Mon Sep 17 00:00:00 2001 From: Roz K Date: Tue, 3 Jan 2023 21:31:36 +0100 Subject: [PATCH] Switch to buckets sorting. --- cpp/render/render_opengles.cpp | 230 ++++++++++++--------------------- 1 file changed, 85 insertions(+), 145 deletions(-) diff --git a/cpp/render/render_opengles.cpp b/cpp/render/render_opengles.cpp index 15d5d24..ce16bda 100644 --- a/cpp/render/render_opengles.cpp +++ b/cpp/render/render_opengles.cpp @@ -13,8 +13,6 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -#define RK_BUCKETS_SORT - #include "../render.hpp" #include "render_opengles.hpp" #include "../display/display_glx.hpp" @@ -29,8 +27,6 @@ typedef void (*rk_MultiDrawElementsIndirectFunc)(rk_uint, rk_uint, const void *, static rk_DrawElementsInstancedBaseInstanceFunc rk_DrawElementsInstancedBaseInstance = nullptr; static rk_MultiDrawElementsIndirectFunc rk_MultiDrawElementsIndirect = nullptr; -#ifdef RK_BUCKETS_SORT - struct rk_bucket { unsigned size; unsigned count; @@ -40,104 +36,6 @@ struct rk_bucket { static unsigned rk_nbuckets = 0; static rk_bucket * rk_buckets = nullptr; -static void rk_buckets_alloc( - rk_batch const & batch) { - unsigned const count = batch.vertices->nmeshes; - unsigned const size = batch.max_size / count; - if (!rk_nbuckets) { - rk_nbuckets = count; - rk_buckets = reinterpret_cast(malloc(count * sizeof(rk_bucket))); - for (unsigned index = 0; index < count; ++index) { - rk_bucket & bucket = rk_buckets[index]; - bucket.size = size; - bucket.indices = reinterpret_cast(malloc(size * sizeof(rk_ushort))); - } - } - else if (count <= rk_nbuckets) { - for (unsigned index = 0; index < count; ++index) { - rk_bucket & bucket = rk_buckets[index]; - if (bucket.size < size) { - bucket.size = size; - bucket.indices = reinterpret_cast(realloc(bucket.indices, size * sizeof(rk_ushort))); - } - } - } - else { - rk_buckets = reinterpret_cast(realloc(rk_buckets, count * sizeof(rk_bucket))); - for (unsigned index = 0; index < rk_nbuckets; ++index) { - rk_bucket & bucket = rk_buckets[index]; - if (bucket.size < size) { - bucket.size = size; - bucket.indices = reinterpret_cast(realloc(bucket.indices, size * sizeof(rk_ushort))); - } - } - for (unsigned index = rk_nbuckets; index < count; ++index) { - rk_bucket & bucket = rk_buckets[index]; - bucket.size = size; - bucket.indices = reinterpret_cast(malloc(size * sizeof(rk_ushort))); - } - rk_nbuckets = count; - } - unsigned total_size = rk_nbuckets * sizeof(rk_bucket); - for (unsigned index = 0; index < rk_nbuckets; ++index) { - rk_bucket const & bucket = rk_buckets[index]; - total_size += bucket.size * sizeof(rk_ushort); - } - printf("[RK] rk_buckets_alloc() -> %d KiB\n", total_size / 1024); -} - -static bool rk_buckets_sort( - rk_batch const & batch) { - bool reallocated = false; - rk_bucket const * const last_bucket = rk_buckets + batch.vertices->nmeshes; - for (rk_bucket * bucket = rk_buckets; bucket < last_bucket; ++bucket) { - bucket->count = 0; - } - rk_instance_flags const * flags = batch.flags; - rk_ushort const * mesh_index = batch.meshes; - for (unsigned index = 0; index < batch.count; ++index, ++flags, ++mesh_index) { - if ((*flags & RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) == RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) { - rk_bucket & bucket = rk_buckets[*mesh_index]; - if (bucket.count == bucket.size) { - bucket.size *= 2; - bucket.indices = reinterpret_cast( - realloc(bucket.indices, bucket.size * sizeof(rk_ushort))); - reallocated = true; - } - bucket.indices[bucket.count++] = index; - } - } - bool modified = false; - rk_ushort * indices = batch.indices; - rk_command * command = batch.commands; - rk_mesh const * mesh = batch.vertices->meshes; - for (rk_bucket const * bucket = rk_buckets; bucket < last_bucket; ++bucket, ++mesh) { - if (bucket->count) { - command->nvertices = static_cast(mesh->ntriangles) * 3; - command->ninstances = bucket->count; - command->base_index = mesh->base_index; - command->base_vertex = 0; - command->base_instance = indices - batch.indices; - modified |= rk_cmp_memcpy(indices, bucket->indices, bucket->count * sizeof(rk_ushort)); - indices += bucket->count; - ++command; - } - } - batch.ninstances = indices - batch.indices; - batch.ncommands = command - batch.commands; - if (reallocated) { - unsigned total_size = rk_nbuckets * sizeof(rk_bucket); - for (unsigned index = 0; index < rk_nbuckets; ++index) { - rk_bucket const & bucket = rk_buckets[index]; - total_size += bucket.size * sizeof(rk_ushort); - } - printf("[RK] rk_buckets_sort() -> %d KiB\n", total_size / 1024); - } - return modified; -} - -#endif // RK_BUCKETS_SORT - static void rk_gl_printf(char const * message) { printf("[GL] %s\n", message); } @@ -474,6 +372,58 @@ rk_vertices_t rk_create_vertices( return reinterpret_cast(vertices); } +static void rk_buckets_alloc( + rk_batch const & batch) { + unsigned const count = batch.vertices->nmeshes; + unsigned const size = batch.max_size; + bool reallocated = false; + if (!rk_nbuckets) { + rk_nbuckets = count; + rk_buckets = reinterpret_cast(malloc(count * sizeof(rk_bucket))); + for (unsigned index = 0; index < count; ++index) { + rk_bucket & bucket = rk_buckets[index]; + bucket.size = size; + bucket.indices = reinterpret_cast(malloc(size * sizeof(rk_ushort))); + } + reallocated = true; + } + else if (count <= rk_nbuckets) { + for (unsigned index = 0; index < count; ++index) { + rk_bucket & bucket = rk_buckets[index]; + if (bucket.size < size) { + bucket.size = size; + bucket.indices = reinterpret_cast(realloc(bucket.indices, size * sizeof(rk_ushort))); + reallocated = true; + } + } + } + else { + rk_buckets = reinterpret_cast(realloc(rk_buckets, count * sizeof(rk_bucket))); + for (unsigned index = 0; index < rk_nbuckets; ++index) { + rk_bucket & bucket = rk_buckets[index]; + if (bucket.size < size) { + bucket.size = size; + bucket.indices = reinterpret_cast(realloc(bucket.indices, size * sizeof(rk_ushort))); + } + } + for (unsigned index = rk_nbuckets; index < count; ++index) { + rk_bucket & bucket = rk_buckets[index]; + bucket.size = size; + bucket.indices = reinterpret_cast(malloc(size * sizeof(rk_ushort))); + } + rk_nbuckets = count; + reallocated = true; + } + if (reallocated) { + unsigned total_size = rk_nbuckets * sizeof(rk_bucket); + for (unsigned index = 0; index < rk_nbuckets; ++index) { + rk_bucket const & bucket = rk_buckets[index]; + total_size += bucket.size * sizeof(rk_ushort); + } + printf("[RK] rk_buckets_alloc() -> %d KiB\n", total_size / 1024); + } +} + static void rk_pack_vec3_float( unsigned const count, rk_ushort const * const __restrict indices, @@ -666,7 +616,9 @@ rk_batch_t rk_create_batch( batch->flags = new rk_instance_flags[max_size]; batch->meshes = new rk_ushort[max_size]; batch->indices = new rk_ushort[max_size]; + memset(batch->indices, 0xFF, max_size * sizeof(rk_ushort)); batch->commands = new rk_command[vertices->nmeshes]; + memset(batch->commands, 0, vertices->nmeshes * sizeof(rk_command)); if (nparams) { batch->params = new rk_parameter[nparams]; } else { @@ -800,58 +752,46 @@ rk_batch_t rk_create_batch( } } glBindVertexArray(0); -#ifdef RK_BUCKETS_SORT rk_buckets_alloc(*batch); -#endif return reinterpret_cast(batch); } static void rk_sort_batch( rk_batch const & batch) { -#ifdef RK_BUCKETS_SORT - bool const modified = rk_buckets_sort(batch); + rk_bucket const * const last_bucket = rk_buckets + batch.vertices->nmeshes; + for (rk_bucket * __restrict bucket = rk_buckets; bucket < last_bucket; ++bucket) { + bucket->count = 0; + } + rk_instance_flags const * __restrict flags = batch.flags; + rk_ushort const * __restrict mesh_index = batch.meshes; + for (unsigned index = 0; index < batch.count; ++index, ++flags, ++mesh_index) { + if ((*flags & RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) == RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) { + rk_bucket & __restrict bucket = rk_buckets[*mesh_index]; + bucket.indices[bucket.count++] = index; + } + } + bool modified = false; + rk_ushort * __restrict indices = batch.indices; + rk_command * __restrict command = batch.commands; + rk_mesh const * __restrict mesh = batch.vertices->meshes; + for (rk_bucket const * __restrict bucket = rk_buckets; bucket < last_bucket; ++bucket, ++mesh) { + if (bucket->count) { + command->nvertices = static_cast(mesh->ntriangles) * 3; + command->ninstances = bucket->count; + command->base_index = mesh->base_index; + command->base_instance = indices - batch.indices; + modified |= rk_cmp_memcpy(indices, bucket->indices, bucket->count * sizeof(rk_ushort)); + indices += bucket->count; + ++command; + } + } + unsigned const ninstances = indices - batch.indices; + modified |= (ninstances != batch.ninstances); + batch.ninstances = ninstances; + batch.ncommands = command - batch.commands; if (modified && rk_MultiDrawElementsIndirect) { glBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, batch.ncommands * sizeof(rk_command), batch.commands); } -#else - rk_instance_flags const * flags = batch.flags; - rk_ushort * indices = batch.indices; - for (unsigned index = 0; index < batch.count; ++index, ++flags) { - if ((*flags & RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) == RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) { - *indices++ = index; - } - } - batch.ninstances = indices - batch.indices; - batch.ncommands = 0; - if (batch.ninstances) { - rk_command * const last_command = batch.commands + batch.vertices->nmeshes; - rk_command * command = batch.commands; - rk_ushort * base = batch.indices; - rk_ushort * const last = batch.indices + batch.ninstances; - for (rk_ushort * first = batch.indices; first < last && command < last_command; base = first, ++command) { - unsigned const mesh_index = batch.meshes[*first++]; - for ( ; first < last && mesh_index == batch.meshes[*first]; ++first) { - } - for (rk_ushort * second = first; second < last; ++second) { - unsigned const index = *second; - if (mesh_index == batch.meshes[index]) { - *second = *first; - *first++ = index; - } - } - rk_mesh const & mesh = batch.vertices->meshes[mesh_index]; - command->nvertices = static_cast(mesh.ntriangles) * 3; - command->ninstances = first - base; - command->base_index = mesh.base_index; - command->base_vertex = 0; - command->base_instance = base - batch.indices; - } - batch.ncommands = command - batch.commands; - if (rk_MultiDrawElementsIndirect && batch.ncommands) { - glBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, batch.ncommands * sizeof(rk_command), batch.commands); - } - } -#endif // RK_BUCKETS_SORT batch.state = RK_BATCH_STATE_SORTED; }