diff --git a/cpp/render/render_opengles.cpp b/cpp/render/render_opengles.cpp index 0d0246e..f56a815 100644 --- a/cpp/render/render_opengles.cpp +++ b/cpp/render/render_opengles.cpp @@ -13,9 +13,12 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . +#define RK_BUCKETS_SORT + #include "../render.hpp" #include "render_opengles.hpp" #include "../display/display_glx.hpp" +#include #include #include @@ -25,6 +28,141 @@ typedef void (*rk_MultiDrawElementsIndirectFunc)(rk_uint, rk_uint, const void *, static rk_DrawElementsInstancedBaseInstanceFunc rk_DrawElementsInstancedBaseInstance = nullptr; static rk_MultiDrawElementsIndirectFunc rk_MultiDrawElementsIndirect = nullptr; +static bool rk_compare_replace( + void * __restrict _dst, + void const * __restrict _src, + unsigned const size) { + rk_ulong hash = 0; + rk_ulong * dst = reinterpret_cast(_dst); + rk_ulong const * src = reinterpret_cast(_src); + unsigned count = size / sizeof(rk_ulong); + unsigned remain = (size - count * sizeof(rk_ulong)); + if (count) { + do { + hash |= *dst ^ *src; + *dst++ = *src++; + } while(--count > 0); + } + if (remain) { + rk_ubyte * rdst = reinterpret_cast(dst); + rk_ubyte const * rsrc = reinterpret_cast(src); + do { + hash |= *rdst ^ *rsrc; + *rdst++ = *rsrc++; + } while(--remain > 0); + } + return (hash != 0); +} + +#ifdef RK_BUCKETS_SORT + +struct rk_bucket { + unsigned size; + unsigned count; + rk_ushort * indices; +}; + +static unsigned rk_nbuckets = 0; +static rk_bucket * rk_buckets = nullptr; + +static void rk_buckets_alloc( + rk_batch const & batch) { + unsigned const count = batch.vertices->nmeshes; + unsigned const size = batch.max_size / count; + if (!rk_nbuckets) { + rk_nbuckets = count; + rk_buckets = reinterpret_cast(malloc(count * sizeof(rk_bucket))); + for (unsigned index = 0; index < count; ++index) { + rk_bucket & bucket = rk_buckets[index]; + bucket.size = size; + bucket.indices = reinterpret_cast(malloc(size * sizeof(rk_ushort))); + } + } + else if (count <= rk_nbuckets) { + for (unsigned index = 0; index < count; ++index) { + rk_bucket & bucket = rk_buckets[index]; + if (bucket.size < size) { + bucket.size = size; + bucket.indices = reinterpret_cast(realloc(bucket.indices, size * sizeof(rk_ushort))); + } + } + } + else { + rk_buckets = reinterpret_cast(realloc(rk_buckets, count * sizeof(rk_bucket))); + for (unsigned index = 0; index < rk_nbuckets; ++index) { + rk_bucket & bucket = rk_buckets[index]; + if (bucket.size < size) { + bucket.size = size; + bucket.indices = reinterpret_cast(realloc(bucket.indices, size * sizeof(rk_ushort))); + } + } + for (unsigned index = rk_nbuckets; index < count; ++index) { + rk_bucket & bucket = rk_buckets[index]; + bucket.size = size; + bucket.indices = reinterpret_cast(malloc(size * sizeof(rk_ushort))); + } + rk_nbuckets = count; + } + unsigned total_size = rk_nbuckets * sizeof(rk_bucket); + for (unsigned index = 0; index < rk_nbuckets; ++index) { + rk_bucket const & bucket = rk_buckets[index]; + total_size += bucket.size * sizeof(rk_ushort); + } + printf("[RK] rk_buckets_alloc() -> %d KiB\n", total_size / 1024); +} + +static bool rk_buckets_sort( + rk_batch const & batch) { + bool reallocated = false; + rk_bucket const * const last_bucket = rk_buckets + batch.vertices->nmeshes; + for (rk_bucket * bucket = rk_buckets; bucket < last_bucket; ++bucket) { + bucket->count = 0; + } + rk_instance_flags const * flags = batch.flags; + rk_ushort const * mesh_index = batch.meshes; + for (unsigned index = 0; index < batch.count; ++index, ++flags, ++mesh_index) { + if ((*flags & RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) == RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) { + rk_bucket & bucket = rk_buckets[*mesh_index]; + if (bucket.count == bucket.size) { + bucket.size *= 2; + bucket.indices = reinterpret_cast( + realloc(bucket.indices, bucket.size * sizeof(rk_ushort))); + reallocated = true; + } + bucket.indices[bucket.count++] = index; + } + } + bool modified = false; + rk_ushort * indices = batch.indices; + rk_command * command = batch.commands; + rk_mesh const * mesh = batch.vertices->meshes; + for (rk_bucket const * bucket = rk_buckets; bucket < last_bucket; ++bucket, ++mesh) { + if (bucket->count) { + command->nvertices = static_cast(mesh->ntriangles) * 3; + command->ninstances = bucket->count; + command->base_index = mesh->base_index; + command->base_vertex = 0; + command->base_instance = indices - batch.indices; + modified |= rk_compare_replace(indices, bucket->indices, bucket->count * sizeof(rk_ushort)); + indices += bucket->count; + ++command; + } + } + batch.ninstances = indices - batch.indices; + batch.ncommands = command - batch.commands; + if (reallocated) { + unsigned total_size = rk_nbuckets * sizeof(rk_bucket); + for (unsigned index = 0; index < rk_nbuckets; ++index) { + rk_bucket const & bucket = rk_buckets[index]; + total_size += bucket.size * sizeof(rk_ushort); + } + printf("[RK] rk_buckets_sort() -> %d KiB\n", total_size / 1024); + } + return modified; +} + +#endif // RK_BUCKETS_SORT + static void rk_gl_printf(char const * message) { printf("[GL] %s\n", message); } @@ -687,11 +825,20 @@ rk_batch_t rk_create_batch( } } glBindVertexArray(0); +#ifdef RK_BUCKETS_SORT + rk_buckets_alloc(*batch); +#endif return reinterpret_cast(batch); } static void rk_sort_batch( rk_batch const & batch) { +#ifdef RK_BUCKETS_SORT + bool const modified = rk_buckets_sort(batch); + if (modified && rk_MultiDrawElementsIndirect) { + glBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, batch.ncommands * sizeof(rk_command), batch.commands); + } +#else rk_instance_flags const * flags = batch.flags; rk_ushort * indices = batch.indices; for (unsigned index = 0; index < batch.count; ++index, ++flags) { @@ -729,6 +876,7 @@ static void rk_sort_batch( glBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, batch.ncommands * sizeof(rk_command), batch.commands); } } +#endif // RK_BUCKETS_SORT batch.state = RK_BATCH_STATE_SORTED; } @@ -755,32 +903,6 @@ static void rk_pack_batch( batch.state = RK_BATCH_STATE_PACKED; } -static bool rk_compare_replace( - void * __restrict _dst, - void const * __restrict _src, - unsigned const size) { - rk_ulong hash = 0; - rk_ulong * dst = reinterpret_cast(_dst); - rk_ulong const * src = reinterpret_cast(_src); - unsigned count = size / sizeof(rk_ulong); - unsigned remain = (size - count * sizeof(rk_ulong)); - if (count) { - do { - hash |= *dst ^ *src; - *dst++ = *src++; - } while(--count > 0); - } - rk_ubyte * rdst = reinterpret_cast(dst); - rk_ubyte const * rsrc = reinterpret_cast(src); - if (remain) { - do { - hash |= *rdst ^ *rsrc; - *rdst++ = *rsrc++; - } while(--remain > 0); - } - return (hash != 0); -} - void rk_fill_batch( rk_batch_t _batch, rk_uint count,