From 5c9e10eea8059353c7bd8d48e669a9009b60c218 Mon Sep 17 00:00:00 2001 From: Roz K Date: Sat, 17 Dec 2022 16:50:41 +0100 Subject: [PATCH] Change instance parameters from SoA to AoS. --- __init__.py | 31 +++--- cpp/opengl/render_opengles.cpp | 196 ++++++++++++++++----------------- cpp/opengl/render_opengles.hpp | 13 ++- cpp/render.hpp | 20 ++-- 4 files changed, 131 insertions(+), 129 deletions(-) diff --git a/__init__.py b/__init__.py index 3a38387..c0cbf26 100644 --- a/__init__.py +++ b/__init__.py @@ -280,6 +280,20 @@ def set_input_mat4(input, value): assert len(value) == 16 _set_input_mat4(input, _mat4(value)) +resolve_param = _lib.rk_resolve_param +resolve_param.restype = ctypes.c_void_p +resolve_param.argtypes = ( + ctypes.c_char_p,) # name + +_set_param_vec3 = _lib.rk_set_param_vec3 +_set_param_vec3.argtypes = ( + ctypes.c_uint, # layout + _vec3_t) # value + +def set_param_vec3(param, value): + assert len(value) == 3 + _set_param_vec3(param, _vec3(value)) + _create_texture = _lib.rk_create_texture _create_texture.restype = ctypes.c_void_p _create_texture.argtypes = ( @@ -339,20 +353,6 @@ select_vertices = _lib.rk_select_vertices select_vertices.argtypes = ( ctypes.c_void_p,) # vertices -resolve_param = _lib.rk_resolve_param -resolve_param.restype = ctypes.c_void_p -resolve_param.argtypes = ( - ctypes.c_char_p,) # name - -_set_param_vec3 = _lib.rk_set_param_vec3 -_set_param_vec3.argtypes = ( - ctypes.c_uint, # layout - _vec3_t) # value - -def set_param_vec3(param, value): - assert len(value) == 3 - _set_param_vec3(param, _vec3(value)) - _draw_batch = _lib.rk_draw_batch _draw_batch.argtypes = ( ctypes.c_void_p, # batch @@ -364,8 +364,7 @@ _draw_batch.argtypes = ( def draw_batch(batch, flags, meshes, params): size = len(flags) assert len(meshes) == size - _params = array('L', map(_voidp, params)) - _draw_batch(batch, size, _ubytep(flags), _uintp(meshes), _voidp(_params)) + _draw_batch(batch, size, _ubytep(flags), _uintp(meshes), _voidp(params)) unselect_vertices = _lib.rk_unselect_vertices unselect_vertices.argtypes = ( diff --git a/cpp/opengl/render_opengles.cpp b/cpp/opengl/render_opengles.cpp index a8456a4..9ad7881 100644 --- a/cpp/opengl/render_opengles.cpp +++ b/cpp/opengl/render_opengles.cpp @@ -176,6 +176,24 @@ void rk_set_input_mat4( } } +rk_param_t rk_resolve_param( + char const * name) { + if (!rk_current_shader || !name) { + return nullptr; + } + GLint const location = glGetAttribLocation(rk_current_shader->program, name); + return reinterpret_cast(location + 1); +} + +void rk_set_param_vec3( + rk_param_t _param, + rk_vec3 const & value) { + GLint const param = reinterpret_cast(_param) - 1; + if (rk_current_shader && param > -1) { + glVertexAttrib3fv(param, glm::value_ptr(value)); + } +} + rk_texture_t rk_create_texture( rk_uint slot, char const * input, @@ -357,49 +375,36 @@ rk_vertices_t rk_create_vertices( return vertices; } -static rk_uint rk_convert_vec3_float( - rk_ubyte * const dst, - rk_ubyte const * const src, - rk_ushort const idx) { - *reinterpret_cast(dst) = reinterpret_cast(src)[idx]; - return sizeof(rk_vec3); +static void rk_pack_vec3_float( + rk_vec3 * const __restrict dst, + rk_vec3 const * const __restrict src) { + *dst = *src; } -static rk_uint rk_convert_vec3_short( - rk_ubyte * const _dst, - rk_ubyte const * const _src, - rk_ushort const idx) { - rk_vec3_short * const dst = reinterpret_cast(_dst); - rk_vec3 const & src = reinterpret_cast(_src)[idx]; - dst->x = static_cast(src.x); - dst->y = static_cast(src.y); - dst->z = static_cast(src.z); - return sizeof(rk_vec3_short); +static void rk_pack_vec3_short( + rk_vec3_short * const __restrict dst, + rk_vec3 const * const __restrict src) { + dst->x = static_cast(src->x); + dst->y = static_cast(src->y); + dst->z = static_cast(src->z); } -static rk_uint rk_convert_vec3_short_normalize( - rk_ubyte * const _dst, - rk_ubyte const * const _src, - rk_ushort const idx) { - rk_vec3_short * const dst = reinterpret_cast(_dst); - rk_vec3 const & src = reinterpret_cast(_src)[idx]; +static void rk_pack_vec3_short_norm( + rk_vec3_short * const __restrict dst, + rk_vec3 const * const __restrict src) { #define _convert(s) (static_cast((s) * ((s) < 0.f ? 32768.f : 32767.f))) - dst->x = _convert(src.x); - dst->y = _convert(src.y); - dst->z = _convert(src.z); + dst->x = _convert(src->x); + dst->y = _convert(src->y); + dst->z = _convert(src->z); #undef _convert - return sizeof(rk_vec3_short); } -static rk_uint rk_convert_vec3_int10( - rk_ubyte * const dst, - rk_ubyte const * const _src, - rk_ushort const idx) { - rk_vec3 const & src = reinterpret_cast(_src)[idx]; +static void rk_pack_vec3_int10( + rk_int * const __restrict dst, + rk_vec3 const * const __restrict src) { #define _convert(s) (static_cast((s) * ((s) < 0.f ? 512.f : 511.f)) & 1023) - *reinterpret_cast(dst) = _convert(src.x) | (_convert(src.y) << 10) | (_convert(src.z) << 20); + *dst = _convert(src->x) | (_convert(src->y) << 10) | (_convert(src->z) << 20); #undef _convert - return sizeof(rk_int); } rk_batch_t rk_create_batch( @@ -411,16 +416,20 @@ rk_batch_t rk_create_batch( } rk_uint nparams = 0; rk_uint params_size = 0; + rk_uint packed_size = 0; for (rk_param_format const * f = params_format; *f; ++f, ++nparams) { switch (*f & RK_PARAM_FORMAT_MASK) { case RK_PARAM_FORMAT_VEC3_FLOAT: params_size += sizeof(rk_vec3); + packed_size += sizeof(rk_vec3); break; case RK_PARAM_FORMAT_VEC3_SHORT: - params_size += sizeof(rk_vec3_short); + params_size += sizeof(rk_vec3); + packed_size += sizeof(rk_vec3_short); break; case RK_PARAM_FORMAT_VEC3_INT10: - params_size += sizeof(rk_int); + params_size += sizeof(rk_vec3); + packed_size += sizeof(rk_int); break; default: rk_printf("rk_create_batch(): invalid param format."); @@ -432,44 +441,48 @@ rk_batch_t rk_create_batch( batch->size = max_size; batch->nparams = nparams; batch->params_size = params_size; + batch->packed_size = packed_size; batch->indices = new rk_ushort[max_size]; batch->commands = new rk_command[max_size * sizeof(rk_command)]; if (nparams) { - batch->converters = new rk_param_converter[nparams]; - batch->params = new rk_ubyte[max_size * params_size]; + batch->packers = new rk_packer[nparams]; + batch->params = new rk_ubyte[max_size * packed_size]; glGenBuffers(1, &batch->params_buffer); rk_uint layout = rk_current_vertices->layout; - rk_param_converter * converter = batch->converters; + rk_packer * packer = batch->packers; rk_uint offset = 0; - for (rk_param_format const * f = params_format; *f; ++f, ++layout, ++converter) { + for (rk_param_format const * f = params_format; *f; ++f, ++layout, ++packer) { GLboolean const normalize = (*f & RK_PARAM_FORMAT_NORMALIZE) != 0; glEnableVertexAttribArray(layout); switch (*f & RK_PARAM_FORMAT_MASK) { case RK_PARAM_FORMAT_VEC3_FLOAT: glVertexAttribFormat(layout, 3, GL_FLOAT, normalize, offset); - *converter = rk_convert_vec3_float; - offset += sizeof(rk_vec3); + packer->pack = reinterpret_cast(rk_pack_vec3_float); + packer->dst_incr = sizeof(rk_vec3); + packer->src_incr = sizeof(rk_vec3); break; case RK_PARAM_FORMAT_VEC3_SHORT: glVertexAttribFormat(layout, 3, GL_SHORT, normalize, offset); if (normalize) { - *converter = rk_convert_vec3_short_normalize; + packer->pack = reinterpret_cast(rk_pack_vec3_short_norm); + } else { + packer->pack = reinterpret_cast(rk_pack_vec3_short); } - else { - *converter = rk_convert_vec3_short; - } - offset += sizeof(rk_vec3_short); + packer->dst_incr = sizeof(rk_vec3_short); + packer->src_incr = sizeof(rk_vec3); break; case RK_PARAM_FORMAT_VEC3_INT10: glVertexAttribFormat(layout, 4, GL_INT_2_10_10_10_REV, normalize, offset); - *converter = rk_convert_vec3_int10; - offset += sizeof(rk_int); + packer->pack = reinterpret_cast(rk_pack_vec3_int10); + packer->dst_incr = sizeof(rk_int); + packer->src_incr = sizeof(rk_vec3); break; } + offset += packer->dst_incr; glVertexAttribBinding(layout, RK_PARAMS_BINDING); } glVertexBindingDivisor(RK_PARAMS_BINDING, 1); - glBindVertexBuffer(RK_PARAMS_BINDING, batch->params_buffer, 0, batch->params_size); + glBindVertexBuffer(RK_PARAMS_BINDING, batch->params_buffer, 0, batch->packed_size); } if (rk_MultiDrawElementsIndirect) { glGenBuffers(1, &batch->commands_buffer); @@ -516,46 +529,27 @@ void rk_select_vertices( } } -rk_param_t rk_resolve_param( - char const * name) { - if (!rk_current_shader || !name) { - return nullptr; - } - GLint const location = glGetAttribLocation(rk_current_shader->program, name); - return reinterpret_cast(location + 1); -} - -void rk_set_param_vec3( - rk_param_t param, - rk_vec3 const & value) { - GLint const location = reinterpret_cast(param) - 1; - if (rk_current_shader && location > -1) { - glVertexAttrib3fv(location, glm::value_ptr(value)); - } -} - static rk_uint rk_batch_filter( + rk_batch & batch, rk_uint const size, - rk_ushort * const _indices, rk_instance_flags const * flags) { - rk_ushort * indices = _indices; + rk_ushort * indices = batch.indices; for (rk_ushort index = 0; index < size; ++index, ++flags) { if ((*flags & RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) == RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) { *indices++ = index; } } - return indices - _indices; + return indices - batch.indices; } static rk_uint rk_batch_build_commands( - rk_uint const count, - rk_ushort * const indices, - rk_command * const _commands, + rk_batch & batch, + rk_uint const ninstances, rk_mesh const * const meshes) { - rk_command * commands = _commands; - rk_ushort * base = indices; - rk_ushort * const last = indices + count; - for (rk_ushort * first = indices; first < last; base = first, ++commands) { + rk_command * commands = batch.commands; + rk_ushort * base = batch.indices; + rk_ushort * const last = batch.indices + ninstances; + for (rk_ushort * first = batch.indices; first < last; base = first, ++commands) { rk_mesh const & mesh = meshes[*first++]; for ( ; first < last && meshes[*first].packed == mesh.packed; ++first) { } @@ -566,26 +560,28 @@ static rk_uint rk_batch_build_commands( *first++ = index; } } - commands->count = static_cast(mesh.count) * 3; + commands->nvertices = static_cast(mesh.ntriangles) * 3; commands->ninstances = first - base; - commands->base_index = mesh.offset; + commands->base_index = mesh.base_index; commands->base_vertex = 0; - commands->base_instance = base - indices; + commands->base_instance = base - batch.indices; } - return commands - _commands; + return commands - batch.commands; } static void rk_batch_convert_params( rk_batch & batch, - rk_uint const count, - rk_ubyte const ** const params) { - rk_ubyte * dst = batch.params; - rk_ushort const * const last_index = batch.indices + count; - rk_ubyte const ** const last_param = params + batch.nparams; + rk_uint const ninstances, + rk_ubyte const * const params) { + rk_ubyte * __restrict dst = batch.params; + rk_ushort const * const last_index = batch.indices + ninstances; + rk_packer const * const last_packer = batch.packers + batch.nparams; for (rk_ushort const * index = batch.indices; index < last_index; ++index) { - rk_param_converter const * converter = batch.converters; - for (rk_ubyte const ** src = params; src < last_param; ++src, ++converter) { - dst += (*converter)(dst, *src, *index); + rk_ubyte const * __restrict src = ¶ms[batch.params_size * (*index)]; + for (rk_packer const * packer = batch.packers; packer < last_packer; ++packer) { + packer->pack(dst, src); + dst += packer->dst_incr; + src += packer->src_incr; } } } @@ -595,24 +591,24 @@ void rk_draw_batch( rk_uint size, rk_instance_flags const * flags, rk_mesh const * meshes, - rk_ubyte const ** params) { + rk_ubyte const * params) { rk_batch & batch = *reinterpret_cast(_batch); if (!size || size > batch.size || !flags || !meshes || !rk_current_shader || !rk_current_vertices) { return; } - rk_uint const count = rk_batch_filter(size, batch.indices, flags); - if (!count) { + rk_uint const ninstances = rk_batch_filter(batch, size, flags); + if (!ninstances) { return; } - rk_uint const ncommands = rk_batch_build_commands(count, batch.indices, batch.commands, meshes); + rk_uint const ncommands = rk_batch_build_commands(batch, ninstances, meshes); if (rk_MultiDrawElementsIndirect) { glBindBuffer(GL_DRAW_INDIRECT_BUFFER, batch.commands_buffer); glBufferData(GL_DRAW_INDIRECT_BUFFER, ncommands * sizeof(rk_command), batch.commands, GL_STREAM_DRAW); } if (batch.nparams) { - rk_batch_convert_params(batch, count, params); + rk_batch_convert_params(batch, ninstances, params); glBindBuffer(GL_ARRAY_BUFFER, batch.params_buffer); - glBufferData(GL_ARRAY_BUFFER, count * batch.params_size, batch.params, GL_STREAM_DRAW); + glBufferData(GL_ARRAY_BUFFER, ninstances * batch.packed_size, batch.params, GL_STREAM_DRAW); glBindBuffer(GL_ARRAY_BUFFER, 0); } if (rk_DrawElementsInstancedBaseInstance) { @@ -623,7 +619,7 @@ void rk_draw_batch( rk_command const * const last_command = batch.commands + ncommands; for (rk_command const * command = batch.commands; command < last_command; ++command) { rk_DrawElementsInstancedBaseInstance( - GL_TRIANGLES, command->count, GL_UNSIGNED_SHORT, + GL_TRIANGLES, command->nvertices, GL_UNSIGNED_SHORT, reinterpret_cast(command->base_index << 1), command->ninstances, command->base_instance); } @@ -633,11 +629,11 @@ void rk_draw_batch( rk_command const * const last_command = batch.commands + ncommands; for (rk_command const * command = batch.commands; command < last_command; ++command) { if (batch.nparams) { - glBindVertexBuffer(RK_PARAMS_BINDING, batch.params_buffer, params_offset, batch.params_size); - params_offset += command->ninstances * batch.params_size; + glBindVertexBuffer(RK_PARAMS_BINDING, batch.params_buffer, params_offset, batch.packed_size); + params_offset += command->ninstances * batch.packed_size; } glDrawElementsInstanced( - GL_TRIANGLES, command->count, GL_UNSIGNED_SHORT, + GL_TRIANGLES, command->nvertices, GL_UNSIGNED_SHORT, reinterpret_cast(command->base_index << 1), command->ninstances); } @@ -680,7 +676,7 @@ void rk_destroy_batch( delete[] batch->indices; delete[] batch->commands; if (batch->nparams) { - delete[] batch->converters; + delete[] batch->packers; delete[] batch->params; glDeleteBuffers(1, &batch->params_buffer); } diff --git a/cpp/opengl/render_opengles.hpp b/cpp/opengl/render_opengles.hpp index 3490f1c..763b143 100644 --- a/cpp/opengl/render_opengles.hpp +++ b/cpp/opengl/render_opengles.hpp @@ -54,10 +54,16 @@ struct rk_vertices { GLuint indices; }; -typedef rk_uint (*rk_param_converter)(rk_ubyte * const, rk_ubyte const * const, rk_ushort const); +typedef void (*rk_packer_fn)(rk_ubyte * const __restrict, rk_ubyte const * const __restrict); + +struct rk_packer { + rk_packer_fn pack; + rk_uint dst_incr; + rk_uint src_incr; +}; struct rk_command { - GLuint count; + GLuint nvertices; GLuint ninstances; GLuint base_index; GLint base_vertex; @@ -68,9 +74,10 @@ struct rk_batch { rk_uint size; rk_uint nparams; rk_uint params_size; + rk_uint packed_size; rk_ushort * indices; rk_command * commands; - rk_param_converter * converters; + rk_packer * packers; rk_ubyte * params; GLuint params_buffer; GLuint commands_buffer; diff --git a/cpp/render.hpp b/cpp/render.hpp index a4cff1a..2e81ffa 100644 --- a/cpp/render.hpp +++ b/cpp/render.hpp @@ -76,8 +76,8 @@ enum : rk_uint { RK_BATCH_MAX_SIZE = 65536 }; union rk_mesh { rk_uint packed; struct { - rk_ushort offset; - rk_ushort count; + rk_ushort base_index; + rk_ushort ntriangles; }; }; @@ -114,6 +114,13 @@ RK_EXPORT void rk_set_input_mat4( rk_input_t input, rk_mat4 const & value); +RK_EXPORT rk_param_t rk_resolve_param( + char const * name); + +RK_EXPORT void rk_set_param_vec3( + rk_param_t param, + rk_vec3 const & value); + RK_EXPORT rk_texture_t rk_create_texture( rk_uint slot, char const * input, @@ -150,19 +157,12 @@ RK_EXPORT void rk_draw_triangles( RK_EXPORT void rk_select_vertices( rk_vertices_t vertices); -RK_EXPORT rk_param_t rk_resolve_param( - char const * name); - -RK_EXPORT void rk_set_param_vec3( - rk_param_t param, - rk_vec3 const & value); - RK_EXPORT void rk_draw_batch( rk_batch_t batch, rk_uint size, rk_instance_flags const * flags, rk_mesh const * meshes, - rk_ubyte const ** params); + rk_ubyte const * params); RK_EXPORT void rk_unselect_vertices( rk_vertices_t vertices);