From ed87f292ffa3cf89903cddcdec396115893f7d66 Mon Sep 17 00:00:00 2001 From: Roz K Date: Mon, 2 Jan 2023 17:01:36 +0100 Subject: [PATCH] Split batch fill and batch draw. --- __init__.py | 11 +- cpp/render.hpp | 7 +- cpp/render/render_opengles.cpp | 275 ++++++++++++++++++++------------- cpp/render/render_opengles.hpp | 18 ++- 4 files changed, 196 insertions(+), 115 deletions(-) diff --git a/__init__.py b/__init__.py index b7d8ea3..dbbbbb0 100644 --- a/__init__.py +++ b/__init__.py @@ -444,15 +444,20 @@ draw_triangles.restype = None draw_triangles.argtypes = ( ctypes.c_void_p,) # triangles -draw_batch = _engine.rk_draw_batch -draw_batch.restype = None -draw_batch.argtypes = ( +fill_batch = _engine.rk_fill_batch +fill_batch.restype = None +fill_batch.argtypes = ( ctypes.c_void_p, # batch ctypes.c_uint, # count ctypes.POINTER(ctypes.c_ubyte), # flags ctypes.POINTER(ctypes.c_uint), # meshes ctypes.POINTER(ctypes.c_void_p)) # params +draw_batch = _engine.rk_draw_batch +draw_batch.restype = None +draw_batch.argtypes = ( + ctypes.c_void_p,) # batch + unselect_texture = _engine.rk_unselect_texture unselect_texture.restype = None unselect_texture.argtypes = ( diff --git a/cpp/render.hpp b/cpp/render.hpp index b39ddd9..de74b6a 100644 --- a/cpp/render.hpp +++ b/cpp/render.hpp @@ -163,13 +163,16 @@ RK_EXPORT void rk_select_texture( RK_EXPORT void rk_draw_triangles( rk_triangles_t triangles); -RK_EXPORT void rk_draw_batch( +RK_EXPORT void rk_fill_batch( rk_batch_t batch, - rk_uint size, + rk_uint count, rk_instance_flags const * flags, rk_mesh const * meshes, rk_ubyte const * const * params); +RK_EXPORT void rk_draw_batch( + rk_batch_t batch); + RK_EXPORT void rk_unselect_texture( rk_uint slot, rk_texture_t texture); diff --git a/cpp/render/render_opengles.cpp b/cpp/render/render_opengles.cpp index c1f2798..62e7fc9 100644 --- a/cpp/render/render_opengles.cpp +++ b/cpp/render/render_opengles.cpp @@ -17,6 +17,7 @@ #include "render_opengles.hpp" #include "../display/display_glx.hpp" #include +#include typedef void (*rk_DrawElementsInstancedBaseInstanceFunc)(rk_uint, rk_uint, rk_uint, const void *, rk_uint, rk_uint); typedef void (*rk_MultiDrawElementsIndirectFunc)(rk_uint, rk_uint, const void *, rk_uint, rk_uint); @@ -536,12 +537,22 @@ rk_batch_t rk_create_batch( } } rk_batch * batch = new rk_batch; + batch->state = RK_BATCH_STATE_EMPTY; + batch->count = 0; + batch->ncommands = 0; + batch->ninstances = 0; batch->max_size = max_size; batch->max_meshes = max_meshes; batch->nparams = nparams; + batch->flags = new rk_instance_flags[max_size]; + batch->meshes = new rk_mesh[max_size]; batch->indices = new rk_ushort[max_size]; batch->commands = new rk_command[max_meshes]; - batch->params = new rk_parameter[nparams]; + if (nparams) { + batch->params = new rk_parameter[nparams]; + } else { + batch->params = nullptr; + } glGenVertexArrays(1, &batch->vertex_array); glBindVertexArray(batch->vertex_array); glGenBuffers(1, &batch->vertices_buffer); @@ -596,49 +607,42 @@ rk_batch_t rk_create_batch( rk_parameter * param = batch->params; for (rk_param_format const * f = params_format; *f; ++f, ++param, ++binding) { GLboolean const norm = (*f & RK_PARAM_FORMAT_NORMALIZE) != 0; + param->dirty = false; + param->binding = binding; + param->offset = offset; switch (*f & RK_PARAM_FORMAT_MASK) { case RK_PARAM_FORMAT_VEC3_FLOAT: - param->binding = binding; - param->offset = offset; - param->size = sizeof(rk_vec3_float); + param->src_size = sizeof(rk_vec3); + param->dst_size = sizeof(rk_vec3_float); param->packer = rk_pack_vec3_float; - glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->size); + glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->dst_size); glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, 0); glVertexAttribBinding(attrib++, binding); - glVertexBindingDivisor(binding, 1); - offset += max_size * param->size; break; case RK_PARAM_FORMAT_VEC3_SHORT: - param->binding = binding; - param->offset = offset; - param->size = sizeof(rk_vec3_short); + param->src_size = sizeof(rk_vec3); + param->dst_size = sizeof(rk_vec3_short); param->packer = norm ? rk_pack_vec3_short_norm : rk_pack_vec3_short; - glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->size); + glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->dst_size); glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 3, GL_SHORT, norm, 0); glVertexAttribBinding(attrib++, binding); - glVertexBindingDivisor(binding, 1); - offset += max_size * param->size; break; case RK_PARAM_FORMAT_VEC3_INT10: - param->binding = binding; - param->offset = offset; - param->size = sizeof(rk_vec3_int10); + param->src_size = sizeof(rk_vec3); + param->dst_size = sizeof(rk_vec3_int10); param->packer = norm ? rk_pack_vec3_int10_norm : rk_pack_vec3_int10; - glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->size); + glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->dst_size); glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, 0); glVertexAttribBinding(attrib++, binding); - glVertexBindingDivisor(binding, 1); - offset += max_size * param->size; break; case RK_PARAM_FORMAT_MAT3_FLOAT: - param->binding = binding; - param->offset = offset; - param->size = sizeof(rk_mat3_float); + param->src_size = sizeof(rk_mat3); + param->dst_size = sizeof(rk_mat3_float); param->packer = rk_pack_mat3_float; - glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->size); + glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->dst_size); glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, offsetof(rk_mat3_float, x)); glVertexAttribBinding(attrib++, binding); @@ -648,15 +652,12 @@ rk_batch_t rk_create_batch( glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, offsetof(rk_mat3_float, z)); glVertexAttribBinding(attrib++, binding); - glVertexBindingDivisor(binding, 1); - offset += max_size * param->size; break; case RK_PARAM_FORMAT_MAT3_INT10: - param->binding = binding; - param->offset = offset; - param->size = sizeof(rk_mat3_int10); + param->src_size = sizeof(rk_mat3); + param->dst_size = sizeof(rk_mat3_int10); param->packer = norm ? rk_pack_mat3_int10_norm : rk_pack_mat3_int10; - glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->size); + glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->dst_size); glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, offsetof(rk_mat3_int10, x)); glVertexAttribBinding(attrib++, binding); @@ -666,10 +667,11 @@ rk_batch_t rk_create_batch( glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, offsetof(rk_mat3_int10, z)); glVertexAttribBinding(attrib++, binding); - glVertexBindingDivisor(binding, 1); - offset += max_size * param->size; break; } + glVertexBindingDivisor(binding, 1); + param->source = new rk_ubyte[max_size * param->src_size]; + offset += max_size * param->dst_size; } } glBindVertexArray(0); @@ -771,95 +773,152 @@ RK_EXPORT void rk_draw_triangles( } } -static unsigned rk_batch_filter( - rk_batch const & batch, - unsigned const size, - rk_instance_flags const * flags) { - rk_ushort * indices = batch.indices; - for (unsigned index = 0; index < size; ++index, ++flags) { - if ((*flags & RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) == RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) { - *indices++ = static_cast(index); - } - } - return indices - batch.indices; -} - -static unsigned rk_batch_build_commands( - rk_batch const & batch, - unsigned const ninstances, - rk_mesh const * const meshes) { - rk_command * const last_command = batch.commands + batch.max_meshes; - rk_command * command = batch.commands; - rk_ushort * base = batch.indices; - rk_ushort * const last = batch.indices + ninstances; - for (rk_ushort * first = batch.indices; first < last && command < last_command; base = first, ++command) { - rk_mesh const & mesh = meshes[*first++]; - for ( ; first < last && meshes[*first].packed == mesh.packed; ++first) { - } - for (rk_ushort * second = first; second < last; ++second) { - unsigned const index = *second; - if (meshes[index].packed == mesh.packed) { - *second = *first; - *first++ = static_cast(index); - } - } - command->nvertices = static_cast(mesh.ntriangles) * 3; - command->ninstances = first - base; - command->base_index = mesh.base_index; - command->base_vertex = 0; - command->base_instance = base - batch.indices; - } - return command - batch.commands; -} - -static void rk_batch_pack( - rk_batch const & batch, - unsigned const ninstances, - rk_ubyte const * const * srcs) { - rk_parameter const * const last_param = batch.params + batch.nparams; - for (rk_parameter const * param = batch.params; param < last_param; ++param) { - rk_ubyte const * const src = *srcs++; - if (src) { - rk_ubyte * const dst = reinterpret_cast( - glMapBufferRange(GL_ARRAY_BUFFER, param->offset, ninstances * param->size, - GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT)); - if (dst) { - param->packer(ninstances, batch.indices, dst, src); - glUnmapBuffer(GL_ARRAY_BUFFER); - } - } - } -} - -void rk_draw_batch( +void rk_fill_batch( rk_batch_t _batch, rk_uint count, rk_instance_flags const * flags, rk_mesh const * meshes, rk_ubyte const * const * params) { rk_batch const * const batch = reinterpret_cast(_batch); - if (!batch || !count || count > batch->max_size || !flags || !meshes) { + if (!batch || !count || count > batch->max_size) { + rk_printf("rk_fill_batch(): invalid params."); return; } - unsigned const ninstances = rk_batch_filter(*batch, count, flags); - if (!ninstances) { + bool const need_params = (batch->nparams > 0); + bool got_params = false; + bool all_params = false; + if (params != nullptr) { + all_params = true; + for (rk_ubyte const * const * param = params; param < params + batch->nparams; ++param) { + bool const got_param = (*param != nullptr); + got_params |= got_param; + all_params &= got_param; + } + } + bool const got_all = (flags && meshes && (!need_params || all_params)); + if (count > batch->count && !got_all) { + rk_printf("rk_fill_batch(): cannot grow without all flags, meshes and params."); + return; + } + bool const need_sorting = (flags || meshes || count != batch->count); + batch->count = count; + if (flags) { + memcpy(batch->flags, flags, count * sizeof(rk_instance_flags)); + } + if (meshes) { + memcpy(batch->meshes, meshes, count * sizeof(rk_mesh)); + } + if (need_params && got_params) { + rk_ubyte const * const * src = params; + for (rk_parameter const * dst = batch->params; dst < batch->params + batch->nparams; ++dst, ++src) { + dst->dirty = (*src || need_sorting); + if (*src) { + memcpy(dst->source, *src, count * dst->src_size); + } + } + } + if (need_sorting) { + batch->state = RK_BATCH_STATE_FILLED; + } else { + batch->state = RK_BATCH_STATE_SORTED; + } +} + +static void rk_sort_batch( + rk_batch const & batch) { + rk_instance_flags const * flags = batch.flags; + rk_ushort * indices = batch.indices; + for (unsigned index = 0; index < batch.count; ++index, ++flags) { + if ((*flags & RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) == RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) { + *indices++ = index; + } + } + batch.ninstances = indices - batch.indices; + batch.ncommands = 0; + if (batch.ninstances) { + rk_command * const last_command = batch.commands + batch.max_meshes; + rk_command * command = batch.commands; + rk_ushort * base = batch.indices; + rk_ushort * const last = batch.indices + batch.ninstances; + for (rk_ushort * first = batch.indices; first < last && command < last_command; base = first, ++command) { + rk_mesh const & mesh = batch.meshes[*first++]; + for ( ; first < last && batch.meshes[*first].packed == mesh.packed; ++first) { + } + for (rk_ushort * second = first; second < last; ++second) { + unsigned const index = *second; + if (batch.meshes[index].packed == mesh.packed) { + *second = *first; + *first++ = index; + } + } + command->nvertices = static_cast(mesh.ntriangles) * 3; + command->ninstances = first - base; + command->base_index = mesh.base_index; + command->base_vertex = 0; + command->base_instance = base - batch.indices; + } + batch.ncommands = command - batch.commands; + } + if (batch.nparams) { + batch.state = RK_BATCH_STATE_SORTED; + } else { + batch.state = RK_BATCH_STATE_PACKED; + } +} + +static void rk_pack_batch( + rk_batch const & batch) { + for (rk_parameter const * param = batch.params; param < batch.params + batch.nparams; ++param) { + if (param->dirty) { + param->dirty = false; + if (batch.ninstances) { + rk_ubyte * const dst = reinterpret_cast( + glMapBufferRange(GL_ARRAY_BUFFER, param->offset, batch.ninstances * param->dst_size, + GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT)); + if (dst) { + param->packer(batch.ninstances, batch.indices, dst, param->source); + glUnmapBuffer(GL_ARRAY_BUFFER); + } + } + } + } + batch.state = RK_BATCH_STATE_PACKED; +} + +void rk_draw_batch( + rk_batch_t _batch) { + rk_batch * const batch = reinterpret_cast(_batch); + if (!batch) { + rk_printf("rk_draw_batch(): invalid params."); + return; + } + if (batch->state < RK_BATCH_STATE_FILLED) { + rk_printf("rk_draw_batch(): invalid state."); + return; + } + if (batch->state < RK_BATCH_STATE_SORTED) { + rk_sort_batch(*batch); + } + if (!batch->ncommands) { return; } glBindVertexArray(batch->vertex_array); - unsigned const ncommands = rk_batch_build_commands(*batch, ninstances, meshes); if (rk_MultiDrawElementsIndirect) { glBindBuffer(GL_DRAW_INDIRECT_BUFFER, batch->commands_buffer); - glBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, ncommands * sizeof(rk_command), batch->commands); + glBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, batch->ncommands * sizeof(rk_command), batch->commands); } - if (batch->nparams && params) { + if (batch->state < RK_BATCH_STATE_PACKED) { glBindBuffer(GL_ARRAY_BUFFER, batch->params_buffer); - rk_batch_pack(*batch, ninstances, params); + rk_pack_batch(*batch); + glBindBuffer(GL_ARRAY_BUFFER, 0); } + rk_command const * const last_command = batch->commands + batch->ncommands; if (rk_DrawElementsInstancedBaseInstance) { if (rk_MultiDrawElementsIndirect) { - rk_MultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_SHORT, nullptr, ncommands, sizeof(rk_command)); + rk_MultiDrawElementsIndirect( + GL_TRIANGLES, GL_UNSIGNED_SHORT, nullptr, batch->ncommands, sizeof(rk_command)); + glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); } else { - rk_command const * const last_command = batch->commands + ncommands; for (rk_command const * command = batch->commands; command < last_command; ++command) { rk_DrawElementsInstancedBaseInstance( GL_TRIANGLES, command->nvertices, GL_UNSIGNED_SHORT, @@ -869,12 +928,11 @@ void rk_draw_batch( } } else { unsigned param_index = 0; - rk_command const * const last_command = batch->commands + ncommands; rk_parameter const * const last_param = batch->params + batch->nparams; for (rk_command const * command = batch->commands; command < last_command; ++command) { for (rk_parameter const * param = batch->params; param < last_param; ++param) { glBindVertexBuffer(param->binding, batch->params_buffer, - param->offset + param_index * param->size, param->size); + param->offset + param_index * param->dst_size, param->dst_size); } param_index += command->ninstances; glDrawElementsInstanced( @@ -883,12 +941,6 @@ void rk_draw_batch( command->ninstances); } } - if (rk_MultiDrawElementsIndirect) { - glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); - } - if (batch->nparams && params) { - glBindBuffer(GL_ARRAY_BUFFER, 0); - } glBindVertexArray(0); } @@ -917,10 +969,15 @@ void rk_destroy_batch( if (batch) { delete[] batch->indices; delete[] batch->commands; + delete[] batch->flags; + delete[] batch->meshes; if (rk_MultiDrawElementsIndirect) { glDeleteBuffers(1, &batch->commands_buffer); } if (batch->nparams) { + for (rk_parameter * param = batch->params; param < batch->params + batch->nparams; ++param) { + delete[] param->source; + } delete[] batch->params; glDeleteBuffers(1, &batch->params_buffer); } diff --git a/cpp/render/render_opengles.hpp b/cpp/render/render_opengles.hpp index 3d8af34..7906238 100644 --- a/cpp/render/render_opengles.hpp +++ b/cpp/render/render_opengles.hpp @@ -89,16 +89,32 @@ typedef void (*rk_packer)( rk_ubyte const * const); // src struct rk_parameter { + mutable bool dirty; unsigned binding; unsigned offset; - unsigned size; + unsigned src_size; + unsigned dst_size; + rk_ubyte * source; rk_packer packer; }; +enum rk_batch_state { + RK_BATCH_STATE_EMPTY = 0, + RK_BATCH_STATE_FILLED = 1, + RK_BATCH_STATE_SORTED = 2, + RK_BATCH_STATE_PACKED = 3 +}; + struct rk_batch { + mutable rk_batch_state state; + mutable unsigned count; + mutable unsigned ninstances; + mutable unsigned ncommands; unsigned max_size; unsigned max_meshes; unsigned nparams; + rk_instance_flags * flags; + rk_mesh * meshes; rk_ushort * indices; rk_command * commands; rk_parameter * params;