Change instance parameters from SoA to AoS.

This commit is contained in:
Roz K 2022-12-17 16:50:41 +01:00
parent 17b246469f
commit 5c9e10eea8
Signed by: roz
GPG Key ID: 51FBF4E483E1C822
4 changed files with 131 additions and 129 deletions

View File

@ -280,6 +280,20 @@ def set_input_mat4(input, value):
assert len(value) == 16 assert len(value) == 16
_set_input_mat4(input, _mat4(value)) _set_input_mat4(input, _mat4(value))
resolve_param = _lib.rk_resolve_param
resolve_param.restype = ctypes.c_void_p
resolve_param.argtypes = (
ctypes.c_char_p,) # name
_set_param_vec3 = _lib.rk_set_param_vec3
_set_param_vec3.argtypes = (
ctypes.c_uint, # layout
_vec3_t) # value
def set_param_vec3(param, value):
assert len(value) == 3
_set_param_vec3(param, _vec3(value))
_create_texture = _lib.rk_create_texture _create_texture = _lib.rk_create_texture
_create_texture.restype = ctypes.c_void_p _create_texture.restype = ctypes.c_void_p
_create_texture.argtypes = ( _create_texture.argtypes = (
@ -339,20 +353,6 @@ select_vertices = _lib.rk_select_vertices
select_vertices.argtypes = ( select_vertices.argtypes = (
ctypes.c_void_p,) # vertices ctypes.c_void_p,) # vertices
resolve_param = _lib.rk_resolve_param
resolve_param.restype = ctypes.c_void_p
resolve_param.argtypes = (
ctypes.c_char_p,) # name
_set_param_vec3 = _lib.rk_set_param_vec3
_set_param_vec3.argtypes = (
ctypes.c_uint, # layout
_vec3_t) # value
def set_param_vec3(param, value):
assert len(value) == 3
_set_param_vec3(param, _vec3(value))
_draw_batch = _lib.rk_draw_batch _draw_batch = _lib.rk_draw_batch
_draw_batch.argtypes = ( _draw_batch.argtypes = (
ctypes.c_void_p, # batch ctypes.c_void_p, # batch
@ -364,8 +364,7 @@ _draw_batch.argtypes = (
def draw_batch(batch, flags, meshes, params): def draw_batch(batch, flags, meshes, params):
size = len(flags) size = len(flags)
assert len(meshes) == size assert len(meshes) == size
_params = array('L', map(_voidp, params)) _draw_batch(batch, size, _ubytep(flags), _uintp(meshes), _voidp(params))
_draw_batch(batch, size, _ubytep(flags), _uintp(meshes), _voidp(_params))
unselect_vertices = _lib.rk_unselect_vertices unselect_vertices = _lib.rk_unselect_vertices
unselect_vertices.argtypes = ( unselect_vertices.argtypes = (

View File

@ -176,6 +176,24 @@ void rk_set_input_mat4(
} }
} }
rk_param_t rk_resolve_param(
char const * name) {
if (!rk_current_shader || !name) {
return nullptr;
}
GLint const location = glGetAttribLocation(rk_current_shader->program, name);
return reinterpret_cast<rk_param_t>(location + 1);
}
void rk_set_param_vec3(
rk_param_t _param,
rk_vec3 const & value) {
GLint const param = reinterpret_cast<intptr_t>(_param) - 1;
if (rk_current_shader && param > -1) {
glVertexAttrib3fv(param, glm::value_ptr(value));
}
}
rk_texture_t rk_create_texture( rk_texture_t rk_create_texture(
rk_uint slot, rk_uint slot,
char const * input, char const * input,
@ -357,49 +375,36 @@ rk_vertices_t rk_create_vertices(
return vertices; return vertices;
} }
static rk_uint rk_convert_vec3_float( static void rk_pack_vec3_float(
rk_ubyte * const dst, rk_vec3 * const __restrict dst,
rk_ubyte const * const src, rk_vec3 const * const __restrict src) {
rk_ushort const idx) { *dst = *src;
*reinterpret_cast<rk_vec3 *>(dst) = reinterpret_cast<rk_vec3 const *>(src)[idx];
return sizeof(rk_vec3);
} }
static rk_uint rk_convert_vec3_short( static void rk_pack_vec3_short(
rk_ubyte * const _dst, rk_vec3_short * const __restrict dst,
rk_ubyte const * const _src, rk_vec3 const * const __restrict src) {
rk_ushort const idx) { dst->x = static_cast<rk_short>(src->x);
rk_vec3_short * const dst = reinterpret_cast<rk_vec3_short *>(_dst); dst->y = static_cast<rk_short>(src->y);
rk_vec3 const & src = reinterpret_cast<rk_vec3 const *>(_src)[idx]; dst->z = static_cast<rk_short>(src->z);
dst->x = static_cast<rk_short>(src.x);
dst->y = static_cast<rk_short>(src.y);
dst->z = static_cast<rk_short>(src.z);
return sizeof(rk_vec3_short);
} }
static rk_uint rk_convert_vec3_short_normalize( static void rk_pack_vec3_short_norm(
rk_ubyte * const _dst, rk_vec3_short * const __restrict dst,
rk_ubyte const * const _src, rk_vec3 const * const __restrict src) {
rk_ushort const idx) {
rk_vec3_short * const dst = reinterpret_cast<rk_vec3_short *>(_dst);
rk_vec3 const & src = reinterpret_cast<rk_vec3 const *>(_src)[idx];
#define _convert(s) (static_cast<rk_short>((s) * ((s) < 0.f ? 32768.f : 32767.f))) #define _convert(s) (static_cast<rk_short>((s) * ((s) < 0.f ? 32768.f : 32767.f)))
dst->x = _convert(src.x); dst->x = _convert(src->x);
dst->y = _convert(src.y); dst->y = _convert(src->y);
dst->z = _convert(src.z); dst->z = _convert(src->z);
#undef _convert #undef _convert
return sizeof(rk_vec3_short);
} }
static rk_uint rk_convert_vec3_int10( static void rk_pack_vec3_int10(
rk_ubyte * const dst, rk_int * const __restrict dst,
rk_ubyte const * const _src, rk_vec3 const * const __restrict src) {
rk_ushort const idx) {
rk_vec3 const & src = reinterpret_cast<rk_vec3 const *>(_src)[idx];
#define _convert(s) (static_cast<rk_int>((s) * ((s) < 0.f ? 512.f : 511.f)) & 1023) #define _convert(s) (static_cast<rk_int>((s) * ((s) < 0.f ? 512.f : 511.f)) & 1023)
*reinterpret_cast<rk_int *>(dst) = _convert(src.x) | (_convert(src.y) << 10) | (_convert(src.z) << 20); *dst = _convert(src->x) | (_convert(src->y) << 10) | (_convert(src->z) << 20);
#undef _convert #undef _convert
return sizeof(rk_int);
} }
rk_batch_t rk_create_batch( rk_batch_t rk_create_batch(
@ -411,16 +416,20 @@ rk_batch_t rk_create_batch(
} }
rk_uint nparams = 0; rk_uint nparams = 0;
rk_uint params_size = 0; rk_uint params_size = 0;
rk_uint packed_size = 0;
for (rk_param_format const * f = params_format; *f; ++f, ++nparams) { for (rk_param_format const * f = params_format; *f; ++f, ++nparams) {
switch (*f & RK_PARAM_FORMAT_MASK) { switch (*f & RK_PARAM_FORMAT_MASK) {
case RK_PARAM_FORMAT_VEC3_FLOAT: case RK_PARAM_FORMAT_VEC3_FLOAT:
params_size += sizeof(rk_vec3); params_size += sizeof(rk_vec3);
packed_size += sizeof(rk_vec3);
break; break;
case RK_PARAM_FORMAT_VEC3_SHORT: case RK_PARAM_FORMAT_VEC3_SHORT:
params_size += sizeof(rk_vec3_short); params_size += sizeof(rk_vec3);
packed_size += sizeof(rk_vec3_short);
break; break;
case RK_PARAM_FORMAT_VEC3_INT10: case RK_PARAM_FORMAT_VEC3_INT10:
params_size += sizeof(rk_int); params_size += sizeof(rk_vec3);
packed_size += sizeof(rk_int);
break; break;
default: default:
rk_printf("rk_create_batch(): invalid param format."); rk_printf("rk_create_batch(): invalid param format.");
@ -432,44 +441,48 @@ rk_batch_t rk_create_batch(
batch->size = max_size; batch->size = max_size;
batch->nparams = nparams; batch->nparams = nparams;
batch->params_size = params_size; batch->params_size = params_size;
batch->packed_size = packed_size;
batch->indices = new rk_ushort[max_size]; batch->indices = new rk_ushort[max_size];
batch->commands = new rk_command[max_size * sizeof(rk_command)]; batch->commands = new rk_command[max_size * sizeof(rk_command)];
if (nparams) { if (nparams) {
batch->converters = new rk_param_converter[nparams]; batch->packers = new rk_packer[nparams];
batch->params = new rk_ubyte[max_size * params_size]; batch->params = new rk_ubyte[max_size * packed_size];
glGenBuffers(1, &batch->params_buffer); glGenBuffers(1, &batch->params_buffer);
rk_uint layout = rk_current_vertices->layout; rk_uint layout = rk_current_vertices->layout;
rk_param_converter * converter = batch->converters; rk_packer * packer = batch->packers;
rk_uint offset = 0; rk_uint offset = 0;
for (rk_param_format const * f = params_format; *f; ++f, ++layout, ++converter) { for (rk_param_format const * f = params_format; *f; ++f, ++layout, ++packer) {
GLboolean const normalize = (*f & RK_PARAM_FORMAT_NORMALIZE) != 0; GLboolean const normalize = (*f & RK_PARAM_FORMAT_NORMALIZE) != 0;
glEnableVertexAttribArray(layout); glEnableVertexAttribArray(layout);
switch (*f & RK_PARAM_FORMAT_MASK) { switch (*f & RK_PARAM_FORMAT_MASK) {
case RK_PARAM_FORMAT_VEC3_FLOAT: case RK_PARAM_FORMAT_VEC3_FLOAT:
glVertexAttribFormat(layout, 3, GL_FLOAT, normalize, offset); glVertexAttribFormat(layout, 3, GL_FLOAT, normalize, offset);
*converter = rk_convert_vec3_float; packer->pack = reinterpret_cast<rk_packer_fn>(rk_pack_vec3_float);
offset += sizeof(rk_vec3); packer->dst_incr = sizeof(rk_vec3);
packer->src_incr = sizeof(rk_vec3);
break; break;
case RK_PARAM_FORMAT_VEC3_SHORT: case RK_PARAM_FORMAT_VEC3_SHORT:
glVertexAttribFormat(layout, 3, GL_SHORT, normalize, offset); glVertexAttribFormat(layout, 3, GL_SHORT, normalize, offset);
if (normalize) { if (normalize) {
*converter = rk_convert_vec3_short_normalize; packer->pack = reinterpret_cast<rk_packer_fn>(rk_pack_vec3_short_norm);
} else {
packer->pack = reinterpret_cast<rk_packer_fn>(rk_pack_vec3_short);
} }
else { packer->dst_incr = sizeof(rk_vec3_short);
*converter = rk_convert_vec3_short; packer->src_incr = sizeof(rk_vec3);
}
offset += sizeof(rk_vec3_short);
break; break;
case RK_PARAM_FORMAT_VEC3_INT10: case RK_PARAM_FORMAT_VEC3_INT10:
glVertexAttribFormat(layout, 4, GL_INT_2_10_10_10_REV, normalize, offset); glVertexAttribFormat(layout, 4, GL_INT_2_10_10_10_REV, normalize, offset);
*converter = rk_convert_vec3_int10; packer->pack = reinterpret_cast<rk_packer_fn>(rk_pack_vec3_int10);
offset += sizeof(rk_int); packer->dst_incr = sizeof(rk_int);
packer->src_incr = sizeof(rk_vec3);
break; break;
} }
offset += packer->dst_incr;
glVertexAttribBinding(layout, RK_PARAMS_BINDING); glVertexAttribBinding(layout, RK_PARAMS_BINDING);
} }
glVertexBindingDivisor(RK_PARAMS_BINDING, 1); glVertexBindingDivisor(RK_PARAMS_BINDING, 1);
glBindVertexBuffer(RK_PARAMS_BINDING, batch->params_buffer, 0, batch->params_size); glBindVertexBuffer(RK_PARAMS_BINDING, batch->params_buffer, 0, batch->packed_size);
} }
if (rk_MultiDrawElementsIndirect) { if (rk_MultiDrawElementsIndirect) {
glGenBuffers(1, &batch->commands_buffer); glGenBuffers(1, &batch->commands_buffer);
@ -516,46 +529,27 @@ void rk_select_vertices(
} }
} }
rk_param_t rk_resolve_param(
char const * name) {
if (!rk_current_shader || !name) {
return nullptr;
}
GLint const location = glGetAttribLocation(rk_current_shader->program, name);
return reinterpret_cast<rk_param_t>(location + 1);
}
void rk_set_param_vec3(
rk_param_t param,
rk_vec3 const & value) {
GLint const location = reinterpret_cast<intptr_t>(param) - 1;
if (rk_current_shader && location > -1) {
glVertexAttrib3fv(location, glm::value_ptr(value));
}
}
static rk_uint rk_batch_filter( static rk_uint rk_batch_filter(
rk_batch & batch,
rk_uint const size, rk_uint const size,
rk_ushort * const _indices,
rk_instance_flags const * flags) { rk_instance_flags const * flags) {
rk_ushort * indices = _indices; rk_ushort * indices = batch.indices;
for (rk_ushort index = 0; index < size; ++index, ++flags) { for (rk_ushort index = 0; index < size; ++index, ++flags) {
if ((*flags & RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) == RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) { if ((*flags & RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) == RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) {
*indices++ = index; *indices++ = index;
} }
} }
return indices - _indices; return indices - batch.indices;
} }
static rk_uint rk_batch_build_commands( static rk_uint rk_batch_build_commands(
rk_uint const count, rk_batch & batch,
rk_ushort * const indices, rk_uint const ninstances,
rk_command * const _commands,
rk_mesh const * const meshes) { rk_mesh const * const meshes) {
rk_command * commands = _commands; rk_command * commands = batch.commands;
rk_ushort * base = indices; rk_ushort * base = batch.indices;
rk_ushort * const last = indices + count; rk_ushort * const last = batch.indices + ninstances;
for (rk_ushort * first = indices; first < last; base = first, ++commands) { for (rk_ushort * first = batch.indices; first < last; base = first, ++commands) {
rk_mesh const & mesh = meshes[*first++]; rk_mesh const & mesh = meshes[*first++];
for ( ; first < last && meshes[*first].packed == mesh.packed; ++first) { for ( ; first < last && meshes[*first].packed == mesh.packed; ++first) {
} }
@ -566,26 +560,28 @@ static rk_uint rk_batch_build_commands(
*first++ = index; *first++ = index;
} }
} }
commands->count = static_cast<GLuint>(mesh.count) * 3; commands->nvertices = static_cast<GLuint>(mesh.ntriangles) * 3;
commands->ninstances = first - base; commands->ninstances = first - base;
commands->base_index = mesh.offset; commands->base_index = mesh.base_index;
commands->base_vertex = 0; commands->base_vertex = 0;
commands->base_instance = base - indices; commands->base_instance = base - batch.indices;
} }
return commands - _commands; return commands - batch.commands;
} }
static void rk_batch_convert_params( static void rk_batch_convert_params(
rk_batch & batch, rk_batch & batch,
rk_uint const count, rk_uint const ninstances,
rk_ubyte const ** const params) { rk_ubyte const * const params) {
rk_ubyte * dst = batch.params; rk_ubyte * __restrict dst = batch.params;
rk_ushort const * const last_index = batch.indices + count; rk_ushort const * const last_index = batch.indices + ninstances;
rk_ubyte const ** const last_param = params + batch.nparams; rk_packer const * const last_packer = batch.packers + batch.nparams;
for (rk_ushort const * index = batch.indices; index < last_index; ++index) { for (rk_ushort const * index = batch.indices; index < last_index; ++index) {
rk_param_converter const * converter = batch.converters; rk_ubyte const * __restrict src = &params[batch.params_size * (*index)];
for (rk_ubyte const ** src = params; src < last_param; ++src, ++converter) { for (rk_packer const * packer = batch.packers; packer < last_packer; ++packer) {
dst += (*converter)(dst, *src, *index); packer->pack(dst, src);
dst += packer->dst_incr;
src += packer->src_incr;
} }
} }
} }
@ -595,24 +591,24 @@ void rk_draw_batch(
rk_uint size, rk_uint size,
rk_instance_flags const * flags, rk_instance_flags const * flags,
rk_mesh const * meshes, rk_mesh const * meshes,
rk_ubyte const ** params) { rk_ubyte const * params) {
rk_batch & batch = *reinterpret_cast<rk_batch *>(_batch); rk_batch & batch = *reinterpret_cast<rk_batch *>(_batch);
if (!size || size > batch.size || !flags || !meshes || !rk_current_shader || !rk_current_vertices) { if (!size || size > batch.size || !flags || !meshes || !rk_current_shader || !rk_current_vertices) {
return; return;
} }
rk_uint const count = rk_batch_filter(size, batch.indices, flags); rk_uint const ninstances = rk_batch_filter(batch, size, flags);
if (!count) { if (!ninstances) {
return; return;
} }
rk_uint const ncommands = rk_batch_build_commands(count, batch.indices, batch.commands, meshes); rk_uint const ncommands = rk_batch_build_commands(batch, ninstances, meshes);
if (rk_MultiDrawElementsIndirect) { if (rk_MultiDrawElementsIndirect) {
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, batch.commands_buffer); glBindBuffer(GL_DRAW_INDIRECT_BUFFER, batch.commands_buffer);
glBufferData(GL_DRAW_INDIRECT_BUFFER, ncommands * sizeof(rk_command), batch.commands, GL_STREAM_DRAW); glBufferData(GL_DRAW_INDIRECT_BUFFER, ncommands * sizeof(rk_command), batch.commands, GL_STREAM_DRAW);
} }
if (batch.nparams) { if (batch.nparams) {
rk_batch_convert_params(batch, count, params); rk_batch_convert_params(batch, ninstances, params);
glBindBuffer(GL_ARRAY_BUFFER, batch.params_buffer); glBindBuffer(GL_ARRAY_BUFFER, batch.params_buffer);
glBufferData(GL_ARRAY_BUFFER, count * batch.params_size, batch.params, GL_STREAM_DRAW); glBufferData(GL_ARRAY_BUFFER, ninstances * batch.packed_size, batch.params, GL_STREAM_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_ARRAY_BUFFER, 0);
} }
if (rk_DrawElementsInstancedBaseInstance) { if (rk_DrawElementsInstancedBaseInstance) {
@ -623,7 +619,7 @@ void rk_draw_batch(
rk_command const * const last_command = batch.commands + ncommands; rk_command const * const last_command = batch.commands + ncommands;
for (rk_command const * command = batch.commands; command < last_command; ++command) { for (rk_command const * command = batch.commands; command < last_command; ++command) {
rk_DrawElementsInstancedBaseInstance( rk_DrawElementsInstancedBaseInstance(
GL_TRIANGLES, command->count, GL_UNSIGNED_SHORT, GL_TRIANGLES, command->nvertices, GL_UNSIGNED_SHORT,
reinterpret_cast<void const *>(command->base_index << 1), reinterpret_cast<void const *>(command->base_index << 1),
command->ninstances, command->base_instance); command->ninstances, command->base_instance);
} }
@ -633,11 +629,11 @@ void rk_draw_batch(
rk_command const * const last_command = batch.commands + ncommands; rk_command const * const last_command = batch.commands + ncommands;
for (rk_command const * command = batch.commands; command < last_command; ++command) { for (rk_command const * command = batch.commands; command < last_command; ++command) {
if (batch.nparams) { if (batch.nparams) {
glBindVertexBuffer(RK_PARAMS_BINDING, batch.params_buffer, params_offset, batch.params_size); glBindVertexBuffer(RK_PARAMS_BINDING, batch.params_buffer, params_offset, batch.packed_size);
params_offset += command->ninstances * batch.params_size; params_offset += command->ninstances * batch.packed_size;
} }
glDrawElementsInstanced( glDrawElementsInstanced(
GL_TRIANGLES, command->count, GL_UNSIGNED_SHORT, GL_TRIANGLES, command->nvertices, GL_UNSIGNED_SHORT,
reinterpret_cast<void const *>(command->base_index << 1), reinterpret_cast<void const *>(command->base_index << 1),
command->ninstances); command->ninstances);
} }
@ -680,7 +676,7 @@ void rk_destroy_batch(
delete[] batch->indices; delete[] batch->indices;
delete[] batch->commands; delete[] batch->commands;
if (batch->nparams) { if (batch->nparams) {
delete[] batch->converters; delete[] batch->packers;
delete[] batch->params; delete[] batch->params;
glDeleteBuffers(1, &batch->params_buffer); glDeleteBuffers(1, &batch->params_buffer);
} }

View File

@ -54,10 +54,16 @@ struct rk_vertices {
GLuint indices; GLuint indices;
}; };
typedef rk_uint (*rk_param_converter)(rk_ubyte * const, rk_ubyte const * const, rk_ushort const); typedef void (*rk_packer_fn)(rk_ubyte * const __restrict, rk_ubyte const * const __restrict);
struct rk_packer {
rk_packer_fn pack;
rk_uint dst_incr;
rk_uint src_incr;
};
struct rk_command { struct rk_command {
GLuint count; GLuint nvertices;
GLuint ninstances; GLuint ninstances;
GLuint base_index; GLuint base_index;
GLint base_vertex; GLint base_vertex;
@ -68,9 +74,10 @@ struct rk_batch {
rk_uint size; rk_uint size;
rk_uint nparams; rk_uint nparams;
rk_uint params_size; rk_uint params_size;
rk_uint packed_size;
rk_ushort * indices; rk_ushort * indices;
rk_command * commands; rk_command * commands;
rk_param_converter * converters; rk_packer * packers;
rk_ubyte * params; rk_ubyte * params;
GLuint params_buffer; GLuint params_buffer;
GLuint commands_buffer; GLuint commands_buffer;

View File

@ -76,8 +76,8 @@ enum : rk_uint { RK_BATCH_MAX_SIZE = 65536 };
union rk_mesh { union rk_mesh {
rk_uint packed; rk_uint packed;
struct { struct {
rk_ushort offset; rk_ushort base_index;
rk_ushort count; rk_ushort ntriangles;
}; };
}; };
@ -114,6 +114,13 @@ RK_EXPORT void rk_set_input_mat4(
rk_input_t input, rk_input_t input,
rk_mat4 const & value); rk_mat4 const & value);
RK_EXPORT rk_param_t rk_resolve_param(
char const * name);
RK_EXPORT void rk_set_param_vec3(
rk_param_t param,
rk_vec3 const & value);
RK_EXPORT rk_texture_t rk_create_texture( RK_EXPORT rk_texture_t rk_create_texture(
rk_uint slot, rk_uint slot,
char const * input, char const * input,
@ -150,19 +157,12 @@ RK_EXPORT void rk_draw_triangles(
RK_EXPORT void rk_select_vertices( RK_EXPORT void rk_select_vertices(
rk_vertices_t vertices); rk_vertices_t vertices);
RK_EXPORT rk_param_t rk_resolve_param(
char const * name);
RK_EXPORT void rk_set_param_vec3(
rk_param_t param,
rk_vec3 const & value);
RK_EXPORT void rk_draw_batch( RK_EXPORT void rk_draw_batch(
rk_batch_t batch, rk_batch_t batch,
rk_uint size, rk_uint size,
rk_instance_flags const * flags, rk_instance_flags const * flags,
rk_mesh const * meshes, rk_mesh const * meshes,
rk_ubyte const ** params); rk_ubyte const * params);
RK_EXPORT void rk_unselect_vertices( RK_EXPORT void rk_unselect_vertices(
rk_vertices_t vertices); rk_vertices_t vertices);