From 7db5304d40013c7e4631b66ab5199f9bbd109285 Mon Sep 17 00:00:00 2001 From: Roz K Date: Fri, 30 Dec 2022 10:50:56 +0100 Subject: [PATCH] Switch back to SoA parameters. --- __init__.py | 16 +- cpp/render.hpp | 2 +- cpp/render/render_opengles.cpp | 411 ++++++++++++++++++--------------- cpp/render/render_opengles.hpp | 65 ++---- 4 files changed, 262 insertions(+), 232 deletions(-) diff --git a/__init__.py b/__init__.py index 3b2dc86..d30461a 100644 --- a/__init__.py +++ b/__init__.py @@ -257,6 +257,18 @@ PARAM_FORMAT_VEC3_INT10 = 3 PARAM_FORMAT_MAT3_FLOAT = 4 PARAM_FORMAT_MAT3_INT10 = 5 PARAM_FORMAT_NORMALIZE = _flag(7) +_PARAM_FORMAT_MASK = PARAM_FORMAT_NORMALIZE - 1 + +_PARAMS_TYPES = ( + None, + vec3, # PARAM_FORMAT_VEC3_FLOAT + vec3, # PARAM_FORMAT_VEC3_SHORT + vec3, # PARAM_FORMAT_VEC3_INT10 + mat3, # PARAM_FORMAT_MAT3_FLOAT + mat3) # PARAM_FORMAT_MAT3_INT10 + +def param_type(format): + return _PARAMS_TYPES[format & _PARAM_FORMAT_MASK] def params_format(*format): return array('B', format).tobytes() @@ -395,10 +407,10 @@ select_vertices.argtypes = ( draw_batch = _engine.rk_draw_batch draw_batch.argtypes = ( ctypes.c_void_p, # batch - ctypes.c_uint, # size + ctypes.c_uint, # count ctypes.POINTER(ctypes.c_ubyte), # flags ctypes.POINTER(ctypes.c_uint), # meshes - ctypes.c_void_p) # params + ctypes.POINTER(ctypes.c_void_p)) # params unselect_vertices = _engine.rk_unselect_vertices unselect_vertices.argtypes = ( diff --git a/cpp/render.hpp b/cpp/render.hpp index 603210c..a629b76 100644 --- a/cpp/render.hpp +++ b/cpp/render.hpp @@ -167,7 +167,7 @@ RK_EXPORT void rk_draw_batch( rk_uint size, rk_instance_flags const * flags, rk_mesh const * meshes, - rk_ubyte const * params); + rk_ubyte const ** params); RK_EXPORT void rk_unselect_vertices( rk_vertices_t vertices); diff --git a/cpp/render/render_opengles.cpp b/cpp/render/render_opengles.cpp index 73c51a1..11404a7 100644 --- a/cpp/render/render_opengles.cpp +++ b/cpp/render/render_opengles.cpp @@ -348,220 +348,262 @@ rk_vertices_t rk_create_vertices( } static void rk_pack_vec3_float( - rk_pack_dst const dst, - rk_pack_src const src) { - *dst.vec3_float = *src.vec3_float; + unsigned const count, + rk_ushort const * const __restrict indices, + rk_ubyte * __restrict _dst, + rk_ubyte const * const __restrict _src) { + rk_ushort const * const last_index = indices + count; + rk_vec3_float * __restrict dst = reinterpret_cast(_dst); + rk_vec3_float const * const __restrict src = reinterpret_cast(_src); + for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { + *dst = src[*index]; + } } static void rk_pack_vec3_short( - rk_pack_dst const dst, - rk_pack_src const src) { - dst.vec3_short->x = static_cast(src.vec3_float->x); - dst.vec3_short->y = static_cast(src.vec3_float->y); - dst.vec3_short->z = static_cast(src.vec3_float->z); + unsigned const count, + rk_ushort const * const __restrict indices, + rk_ubyte * __restrict _dst, + rk_ubyte const * const __restrict _src) { + rk_ushort const * const last_index = indices + count; + rk_vec3_short * __restrict dst = reinterpret_cast(_dst); + rk_vec3_float const * const __restrict src = reinterpret_cast(_src); + for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { + rk_vec3_float const & input = src[*index]; + dst->x = static_cast(input.x); + dst->y = static_cast(input.y); + dst->z = static_cast(input.z); + } } static void rk_pack_vec3_short_norm( - rk_pack_dst const dst, - rk_pack_src const src) { + unsigned const count, + rk_ushort const * const __restrict indices, + rk_ubyte * __restrict _dst, + rk_ubyte const * const __restrict _src) { + rk_ushort const * const last_index = indices + count; + rk_vec3_short * __restrict dst = reinterpret_cast(_dst); + rk_vec3_float const * const __restrict src = reinterpret_cast(_src); #define _convert(s) (static_cast((s) * ((s) < 0.f ? 32768.f : 32767.f))) - dst.vec3_short->x = _convert(src.vec3_float->x); - dst.vec3_short->y = _convert(src.vec3_float->y); - dst.vec3_short->z = _convert(src.vec3_float->z); + for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { + rk_vec3_float const & input = src[*index]; + dst->x = _convert(input.x); + dst->y = _convert(input.y); + dst->z = _convert(input.z); + } #undef _convert } static void rk_pack_vec3_int10( - rk_pack_dst const dst, - rk_pack_src const src) { + unsigned const count, + rk_ushort const * const __restrict indices, + rk_ubyte * __restrict _dst, + rk_ubyte const * const __restrict _src) { + rk_ushort const * const last_index = indices + count; + rk_vec3_int10 * __restrict dst = reinterpret_cast(_dst); + rk_vec3_float const * const __restrict src = reinterpret_cast(_src); #define _convert(s) (static_cast((s)) & 1023) - *dst.vec3_int10 = - _convert(src.vec3_float->x) | (_convert(src.vec3_float->y) << 10) | (_convert(src.vec3_float->z) << 20); + for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { + rk_vec3_float const & input = src[*index]; + *dst = _convert(input.x) | (_convert(input.y) << 10) | (_convert(input.z) << 20); + } #undef _convert } static void rk_pack_vec3_int10_norm( - rk_pack_dst const dst, - rk_pack_src const src) { + unsigned const count, + rk_ushort const * const __restrict indices, + rk_ubyte * __restrict _dst, + rk_ubyte const * const __restrict _src) { + rk_ushort const * const last_index = indices + count; + rk_vec3_int10 * __restrict dst = reinterpret_cast(_dst); + rk_vec3_float const * const __restrict src = reinterpret_cast(_src); #define _convert(s) (static_cast((s) * ((s) < 0.f ? 512.f : 511.f)) & 1023) - *dst.vec3_int10 = - _convert(src.vec3_float->x) | (_convert(src.vec3_float->y) << 10) | (_convert(src.vec3_float->z) << 20); + for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { + rk_vec3_float const & input = src[*index]; + *dst = _convert(input.x) | (_convert(input.y) << 10) | (_convert(input.z) << 20); + } #undef _convert } static void rk_pack_mat3_float( - rk_pack_dst const dst, - rk_pack_src const src) { - *dst.mat3_float = *src.mat3_float; + unsigned const count, + rk_ushort const * const __restrict indices, + rk_ubyte * __restrict _dst, + rk_ubyte const * const __restrict _src) { + rk_ushort const * const last_index = indices + count; + rk_mat3_float * __restrict dst = reinterpret_cast(_dst); + rk_mat3_float const * const __restrict src = reinterpret_cast(_src); + for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { + *dst = src[*index]; + } + #undef _convert } static void rk_pack_mat3_int10( - rk_pack_dst const dst, - rk_pack_src const src) { + unsigned const count, + rk_ushort const * const __restrict indices, + rk_ubyte * __restrict _dst, + rk_ubyte const * const __restrict _src) { + rk_ushort const * const last_index = indices + count; + rk_mat3_int10 * __restrict dst = reinterpret_cast(_dst); + rk_mat3_float const * const __restrict src = reinterpret_cast(_src); #define _convert(s) (static_cast((s)) & 1023) - dst.mat3_int10->x = - _convert(src.mat3_float->x.x) | (_convert(src.mat3_float->x.y) << 10) | (_convert(src.mat3_float->x.z) << 20); - dst.mat3_int10->y = - _convert(src.mat3_float->y.x) | (_convert(src.mat3_float->y.y) << 10) | (_convert(src.mat3_float->y.z) << 20); - dst.mat3_int10->z = - _convert(src.mat3_float->z.x) | (_convert(src.mat3_float->z.y) << 10) | (_convert(src.mat3_float->z.z) << 20); + for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { + rk_mat3_float const & input = src[*index]; + dst->x = _convert(input.x.x) | (_convert(input.x.y) << 10) | (_convert(input.x.z) << 20); + dst->y = _convert(input.y.x) | (_convert(input.y.y) << 10) | (_convert(input.y.z) << 20); + dst->z = _convert(input.z.x) | (_convert(input.z.y) << 10) | (_convert(input.z.z) << 20); + } #undef _convert } static void rk_pack_mat3_int10_norm( - rk_pack_dst const dst, - rk_pack_src const src) { + unsigned const count, + rk_ushort const * const __restrict indices, + rk_ubyte * __restrict _dst, + rk_ubyte const * const __restrict _src) { + rk_ushort const * const last_index = indices + count; + rk_mat3_int10 * __restrict dst = reinterpret_cast(_dst); + rk_mat3_float const * const __restrict src = reinterpret_cast(_src); #define _convert(s) (static_cast((s) * ((s) < 0.f ? 512.f : 511.f)) & 1023) - dst.mat3_int10->x = - _convert(src.mat3_float->x.x) | (_convert(src.mat3_float->x.y) << 10) | (_convert(src.mat3_float->x.z) << 20); - dst.mat3_int10->y = - _convert(src.mat3_float->y.x) | (_convert(src.mat3_float->y.y) << 10) | (_convert(src.mat3_float->y.z) << 20); - dst.mat3_int10->z = - _convert(src.mat3_float->z.x) | (_convert(src.mat3_float->z.y) << 10) | (_convert(src.mat3_float->z.z) << 20); + for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { + rk_mat3_float const & input = src[*index]; + dst->x = _convert(input.x.x) | (_convert(input.x.y) << 10) | (_convert(input.x.z) << 20); + dst->y = _convert(input.y.x) | (_convert(input.y.y) << 10) | (_convert(input.y.z) << 20); + dst->z = _convert(input.z.x) | (_convert(input.z.y) << 10) | (_convert(input.z.z) << 20); + } #undef _convert } -//TODO: multiple batches per vertices +//TODO: multiple batches per vertices with their own buffers rk_batch_t rk_create_batch( rk_vertices_t _vertices, rk_uint max_size, rk_param_format const * params_format) { rk_vertices const * const vertices = reinterpret_cast(_vertices); - if (!vertices || !max_size || !params_format || max_size > RK_BATCH_MAX_SIZE) { + if (!vertices || !max_size || max_size > RK_BATCH_MAX_SIZE) { rk_printf("rk_create_batch(): invalid parameters."); return nullptr; } unsigned nparams = 0; - unsigned params_size = 0; - unsigned packed_size = 0; - unsigned nattribs = 0; - for (rk_param_format const * f = params_format; *f; ++f, ++nparams) { - switch (*f & RK_PARAM_FORMAT_MASK) { - case RK_PARAM_FORMAT_VEC3_FLOAT: - params_size += sizeof(rk_vec3_float); - packed_size += sizeof(rk_vec3_float); - nattribs += 1; - break; - case RK_PARAM_FORMAT_VEC3_SHORT: - params_size += sizeof(rk_vec3_float); - packed_size += sizeof(rk_vec3_short); - nattribs += 1; - break; - case RK_PARAM_FORMAT_VEC3_INT10: - params_size += sizeof(rk_vec3_float); - packed_size += sizeof(rk_vec3_int10); - nattribs += 1; - break; - case RK_PARAM_FORMAT_MAT3_FLOAT: - params_size += sizeof(rk_mat3_float); - packed_size += sizeof(rk_mat3_float); - nattribs += 3; - break; - case RK_PARAM_FORMAT_MAT3_INT10: - params_size += sizeof(rk_mat3_float); - packed_size += sizeof(rk_mat3_int10); - nattribs += 3; - break; - default: - rk_printf("rk_create_batch(): invalid param format."); - return nullptr; - break; - } + if (params_format) { + for ( ; params_format[nparams]; ++nparams); } - glBindVertexArray(vertices->array); rk_batch * batch = new rk_batch; - batch->size = max_size; + batch->max_size = max_size; batch->nparams = nparams; - batch->params_size = params_size; - batch->packed_size = packed_size; - batch->indices = new rk_ushort[max_size]; - batch->commands = new rk_command[max_size * sizeof(rk_command)]; - memset(batch->commands, 0, max_size * sizeof(rk_command)); - if (rk_MultiDrawElementsIndirect) { - glGenBuffers(1, &batch->commands_buffer); - glBindBuffer(GL_DRAW_INDIRECT_BUFFER, batch->commands_buffer); - glBufferData(GL_DRAW_INDIRECT_BUFFER, max_size * sizeof(rk_command), batch->commands, GL_DYNAMIC_DRAW); - glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); - } else { - batch->commands_buffer = 0; - } - batch->packers = nullptr; + batch->indices = nullptr; + batch->commands = nullptr; batch->params = nullptr; + batch->packed_params = nullptr; + batch->indirect_buffer = 0; + batch->params_array = 0; + unsigned params_size = 0; if (nparams) { - batch->packers = new rk_packer[nparams]; - batch->params = new rk_ubyte[max_size * packed_size]; - memset(batch->params, 0, max_size * packed_size); - glGenBuffers(1, &batch->params_buffer); - glBindBuffer(GL_ARRAY_BUFFER, batch->params_buffer); - glBufferData(GL_ARRAY_BUFFER, max_size * batch->packed_size, batch->params, GL_DYNAMIC_DRAW); - glBindBuffer(GL_ARRAY_BUFFER, 0); - glBindVertexBuffer(RK_PARAMS_BINDING, batch->params_buffer, 0, batch->packed_size); - for (unsigned attrib = vertices->layout; attrib < vertices->layout + nattribs; ++attrib) { - glEnableVertexAttribArray(attrib); - } - rk_packer * packer = batch->packers; - unsigned layout = vertices->layout; - unsigned offset = 0; - for (rk_param_format const * f = params_format; *f; ++f, ++packer) { - GLboolean const norm = (*f & RK_PARAM_FORMAT_NORMALIZE) != 0; + batch->params = new rk_parameter[nparams]; + rk_parameter * param = batch->params; + for (rk_param_format const * f = params_format; *f; ++f, ++param) { + param->offset = params_size; + bool const norm = (*f & RK_PARAM_FORMAT_NORMALIZE) != 0; switch (*f & RK_PARAM_FORMAT_MASK) { case RK_PARAM_FORMAT_VEC3_FLOAT: - glVertexAttribFormat(layout++, 3, GL_FLOAT, GL_FALSE, offset); - packer->pack = rk_pack_vec3_float; - packer->src_incr = sizeof(rk_vec3_float); - packer->dst_incr = sizeof(rk_vec3_float); + param->size = sizeof(rk_vec3_float); + param->packer = rk_pack_vec3_float; break; case RK_PARAM_FORMAT_VEC3_SHORT: - glVertexAttribFormat(layout++, 3, GL_SHORT, norm, offset); - if (norm) { - packer->pack = rk_pack_vec3_short_norm; - } else { - packer->pack = rk_pack_vec3_short; - } - packer->src_incr = sizeof(rk_vec3_float); - packer->dst_incr = sizeof(rk_vec3_short); + param->size = sizeof(rk_vec3_short); + param->packer = norm ? rk_pack_vec3_short_norm : rk_pack_vec3_short; break; case RK_PARAM_FORMAT_VEC3_INT10: - glVertexAttribFormat(layout++, 4, GL_INT_2_10_10_10_REV, norm, offset); - if (norm) { - packer->pack = rk_pack_vec3_int10_norm; - } else { - packer->pack = rk_pack_vec3_int10; - } - packer->src_incr = sizeof(rk_vec3_float); - packer->dst_incr = sizeof(rk_vec3_int10); + param->size = sizeof(rk_vec3_int10); + param->packer = norm ? rk_pack_vec3_int10_norm : rk_pack_vec3_int10; break; case RK_PARAM_FORMAT_MAT3_FLOAT: - glVertexAttribFormat(layout++, 3, GL_FLOAT, GL_FALSE, offset + offsetof(rk_mat3_float, x)); - glVertexAttribFormat(layout++, 3, GL_FLOAT, GL_FALSE, offset + offsetof(rk_mat3_float, y)); - glVertexAttribFormat(layout++, 3, GL_FLOAT, GL_FALSE, offset + offsetof(rk_mat3_float, z)); - packer->pack = rk_pack_mat3_float; - packer->src_incr = sizeof(rk_mat3_float); - packer->dst_incr = sizeof(rk_mat3_float); + param->size = sizeof(rk_mat3_float); + param->packer = rk_pack_mat3_float; break; case RK_PARAM_FORMAT_MAT3_INT10: - glVertexAttribFormat(layout++, 4, GL_INT_2_10_10_10_REV, norm, offset + offsetof(rk_mat3_int10, x)); - glVertexAttribFormat(layout++, 4, GL_INT_2_10_10_10_REV, norm, offset + offsetof(rk_mat3_int10, y)); - glVertexAttribFormat(layout++, 4, GL_INT_2_10_10_10_REV, norm, offset + offsetof(rk_mat3_int10, z)); - if (norm) { - packer->pack = rk_pack_mat3_int10_norm; - } else { - packer->pack = rk_pack_mat3_int10; - } - packer->src_incr = sizeof(rk_mat3_float); - packer->dst_incr = sizeof(rk_mat3_int10); + param->size = sizeof(rk_mat3_int10); + param->packer = norm ? rk_pack_mat3_int10_norm : rk_pack_mat3_int10; + break; + default: + rk_printf("rk_create_batch(): invalid param format."); + delete[] batch->params; + delete batch; + return nullptr; break; } - offset += packer->dst_incr; + params_size += max_size * param->size; } - for (unsigned attrib = vertices->layout; attrib < vertices->layout + nattribs; ++attrib) { - glVertexAttribBinding(attrib, RK_PARAMS_BINDING); - } - glVertexBindingDivisor(RK_PARAMS_BINDING, 1); - } else { - batch->params_buffer = 0; } - glBindVertexArray(0); + batch->indices = new rk_ushort[max_size]; + batch->commands = new rk_command[max_size]; + memset(batch->commands, 0, max_size * sizeof(rk_command)); + batch->indirect_buffer = 0; + if (rk_MultiDrawElementsIndirect) { + glGenBuffers(1, &batch->indirect_buffer); + glBindBuffer(GL_DRAW_INDIRECT_BUFFER, batch->indirect_buffer); + glBufferData(GL_DRAW_INDIRECT_BUFFER, max_size * sizeof(rk_command), nullptr, GL_DYNAMIC_DRAW); + glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); + } + if (nparams) { + batch->packed_params = new rk_ubyte[params_size]; + memset(batch->packed_params, 0, params_size); + glBindVertexArray(vertices->array); + glGenBuffers(1, &batch->params_array); + glBindBuffer(GL_ARRAY_BUFFER, batch->params_array); + glBufferData(GL_ARRAY_BUFFER, params_size, nullptr, GL_DYNAMIC_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, 0); + rk_parameter const * param = batch->params; + unsigned binding = RK_PARAMS_BINDING_BASE; + unsigned attrib = vertices->layout; + for (rk_param_format const * f = params_format; *f; ++f, ++param, ++binding) { + bool const norm = (*f & RK_PARAM_FORMAT_NORMALIZE) != 0; + glBindVertexBuffer(binding, batch->params_array, param->offset, param->size); + switch (*f & RK_PARAM_FORMAT_MASK) { + case RK_PARAM_FORMAT_VEC3_FLOAT: + glEnableVertexAttribArray(attrib); + glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, 0); + glVertexAttribBinding(attrib++, binding); + break; + case RK_PARAM_FORMAT_VEC3_SHORT: + glEnableVertexAttribArray(attrib); + glVertexAttribFormat(attrib, 3, GL_SHORT, norm, 0); + glVertexAttribBinding(attrib++, binding); + break; + case RK_PARAM_FORMAT_VEC3_INT10: + glEnableVertexAttribArray(attrib); + glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, 0); + glVertexAttribBinding(attrib++, binding); + break; + case RK_PARAM_FORMAT_MAT3_FLOAT: + glEnableVertexAttribArray(attrib); + glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, offsetof(rk_mat3_float, x)); + glVertexAttribBinding(attrib++, binding); + glEnableVertexAttribArray(attrib); + glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, offsetof(rk_mat3_float, y)); + glVertexAttribBinding(attrib++, binding); + glEnableVertexAttribArray(attrib); + glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, offsetof(rk_mat3_float, z)); + glVertexAttribBinding(attrib++, binding); + break; + case RK_PARAM_FORMAT_MAT3_INT10: + glEnableVertexAttribArray(attrib); + glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, offsetof(rk_mat3_int10, x)); + glVertexAttribBinding(attrib++, binding); + glEnableVertexAttribArray(attrib); + glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, offsetof(rk_mat3_int10, y)); + glVertexAttribBinding(attrib++, binding); + glEnableVertexAttribArray(attrib); + glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, offsetof(rk_mat3_int10, z)); + glVertexAttribBinding(attrib++, binding); + break; + } + glVertexBindingDivisor(binding, 1); + } + glBindVertexArray(0); + } return batch; } @@ -704,7 +746,6 @@ static unsigned rk_batch_build_commands( commands->nvertices = static_cast(mesh.ntriangles) * 3; commands->ninstances = first - base; commands->base_index = mesh.base_index; - commands->base_vertex = 0; commands->base_instance = base - batch.indices; } return commands - batch.commands; @@ -713,47 +754,40 @@ static unsigned rk_batch_build_commands( static void rk_batch_pack( rk_batch & batch, unsigned const ninstances, - rk_ubyte const * const params) { - rk_pack_dst dst(batch.params); - rk_ushort const * const last_index = batch.indices + ninstances; - rk_packer const * const last_packer = batch.packers + batch.nparams; - for (rk_ushort const * index = batch.indices; index < last_index; ++index) { - rk_pack_src src(¶ms[batch.params_size * (*index)]); - for (rk_packer const * packer = batch.packers; packer < last_packer; ++packer) { - packer->pack(dst, src); - src.ptr += packer->src_incr; - dst.ptr += packer->dst_incr; + rk_ubyte const ** srcs) { + rk_parameter const * const last_param = batch.params + batch.nparams; + for (rk_parameter const * param = batch.params; param < last_param; ++param) { + rk_ubyte const * const src = *srcs++; + if (src) { + rk_ubyte * const dst = batch.packed_params + param->offset; + param->packer(ninstances, batch.indices, dst, src); + glBufferSubData(GL_ARRAY_BUFFER, param->offset, ninstances * param->size, dst); } } } void rk_draw_batch( rk_batch_t _batch, - rk_uint size, + rk_uint count, rk_instance_flags const * flags, rk_mesh const * meshes, - rk_ubyte const * params) { + rk_ubyte const ** params) { rk_batch & batch = *reinterpret_cast(_batch); - if (!size || size > batch.size || !flags || !meshes || !rk_current_shader || !rk_current_vertices) { + if (!count || count > batch.max_size || !flags || !meshes || !rk_current_shader || !rk_current_vertices) { return; } - unsigned const ninstances = rk_batch_filter(batch, size, flags); + unsigned const ninstances = rk_batch_filter(batch, count, flags); if (!ninstances) { return; } - if (rk_MultiDrawElementsIndirect) { - glBindBuffer(GL_DRAW_INDIRECT_BUFFER, batch.commands_buffer); - } - if (batch.nparams) { - glBindBuffer(GL_ARRAY_BUFFER, batch.params_buffer); - } unsigned const ncommands = rk_batch_build_commands(batch, ninstances, meshes); if (rk_MultiDrawElementsIndirect) { + glBindBuffer(GL_DRAW_INDIRECT_BUFFER, batch.indirect_buffer); glBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, ncommands * sizeof(rk_command), batch.commands); } - if (batch.nparams) { + if (batch.nparams && params) { + glBindBuffer(GL_ARRAY_BUFFER, batch.params_array); rk_batch_pack(batch, ninstances, params); - glBufferSubData(GL_ARRAY_BUFFER, 0, ninstances * batch.packed_size, batch.params); } if (rk_DrawElementsInstancedBaseInstance) { if (rk_MultiDrawElementsIndirect) { @@ -768,13 +802,16 @@ void rk_draw_batch( } } } else { - unsigned params_offset = 0; + unsigned param_index = 0; rk_command const * const last_command = batch.commands + ncommands; + rk_parameter const * const last_param = batch.params + batch.nparams; for (rk_command const * command = batch.commands; command < last_command; ++command) { - if (batch.nparams) { - glBindVertexBuffer(RK_PARAMS_BINDING, batch.params_buffer, params_offset, batch.packed_size); - params_offset += command->ninstances * batch.packed_size; + unsigned binding = RK_PARAMS_BINDING_BASE; + for (rk_parameter const * param = batch.params; param < last_param; ++param, ++binding) { + glBindVertexBuffer(binding, batch.params_array, + param->offset + param_index * param->size, param->size); } + param_index += command->ninstances; glDrawElementsInstanced( GL_TRIANGLES, command->nvertices, GL_UNSIGNED_SHORT, reinterpret_cast(command->base_index << 1), @@ -784,7 +821,7 @@ void rk_draw_batch( if (rk_MultiDrawElementsIndirect) { glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); } - if (batch.nparams) { + if (batch.nparams && params) { glBindBuffer(GL_ARRAY_BUFFER, 0); } } @@ -824,13 +861,13 @@ void rk_destroy_batch( if (batch) { delete[] batch->indices; delete[] batch->commands; - if (batch->nparams) { - delete[] batch->packers; - delete[] batch->params; - glDeleteBuffers(1, &batch->params_buffer); - } if (rk_MultiDrawElementsIndirect) { - glDeleteBuffers(1, &batch->commands_buffer); + glDeleteBuffers(1, &batch->indirect_buffer); + } + if (batch->nparams) { + delete[] batch->params; + delete[] batch->packed_params; + glDeleteBuffers(1, &batch->params_array); } delete batch; } diff --git a/cpp/render/render_opengles.hpp b/cpp/render/render_opengles.hpp index 61ef1de..d980dfd 100644 --- a/cpp/render/render_opengles.hpp +++ b/cpp/render/render_opengles.hpp @@ -23,7 +23,7 @@ enum : GLuint { RK_VERTICES_BINDING = 0, - RK_PARAMS_BINDING = 1 + RK_PARAMS_BINDING_BASE = 1 }; struct rk_shader { @@ -51,6 +51,14 @@ struct rk_vertices { GLuint indices; }; +struct rk_command { + GLuint nvertices; + GLuint ninstances; + GLuint base_index; + GLint base_vertex; + GLuint base_instance; +}; + struct rk_vec3_float { float x; float y; @@ -79,54 +87,27 @@ struct rk_mat3_int10 { rk_vec3_int10 z; }; -union rk_pack_src { - rk_ubyte const * __restrict ptr; - rk_vec3_float const * __restrict vec3_float; - rk_mat3_float const * __restrict mat3_float; +typedef void (*rk_packer)( + unsigned const, // count + rk_ushort const * const, // indices + rk_ubyte *, // dst + rk_ubyte const * const); // src - inline rk_pack_src() {} - inline rk_pack_src(rk_ubyte const * const __restrict src) : ptr(src) {} -}; - -union rk_pack_dst { - rk_ubyte * __restrict ptr; - rk_vec3_float * __restrict vec3_float; - rk_vec3_short * __restrict vec3_short; - rk_vec3_int10 * __restrict vec3_int10; - rk_mat3_float * __restrict mat3_float; - rk_mat3_int10 * __restrict mat3_int10; - - inline rk_pack_dst() {} - inline rk_pack_dst(rk_ubyte * const __restrict dst) : ptr(dst) {} -}; - -typedef void (*rk_packer_fn)(rk_pack_dst const, rk_pack_src const); - -struct rk_packer { - rk_packer_fn pack; - unsigned src_incr; - unsigned dst_incr; -}; - -struct rk_command { - GLuint nvertices; - GLuint ninstances; - GLuint base_index; - GLint base_vertex; - GLuint base_instance; +struct rk_parameter { + unsigned offset; + unsigned size; + rk_packer packer; }; struct rk_batch { - unsigned size; + unsigned max_size; unsigned nparams; - unsigned params_size; - unsigned packed_size; rk_ushort * indices; rk_command * commands; - rk_packer * packers; - rk_ubyte * params; - GLuint commands_buffer; - GLuint params_buffer; + rk_parameter * params; + rk_ubyte * packed_params; + GLuint indirect_buffer; + GLuint params_array; }; #endif // _RK_ENGINE_RENDER_OPENGLES_H