Switch back to SoA parameters.

This commit is contained in:
Roz K 2022-12-30 10:50:56 +01:00
parent 414630ecfd
commit 7db5304d40
Signed by: roz
GPG Key ID: 51FBF4E483E1C822
4 changed files with 262 additions and 232 deletions

View File

@ -257,6 +257,18 @@ PARAM_FORMAT_VEC3_INT10 = 3
PARAM_FORMAT_MAT3_FLOAT = 4 PARAM_FORMAT_MAT3_FLOAT = 4
PARAM_FORMAT_MAT3_INT10 = 5 PARAM_FORMAT_MAT3_INT10 = 5
PARAM_FORMAT_NORMALIZE = _flag(7) PARAM_FORMAT_NORMALIZE = _flag(7)
_PARAM_FORMAT_MASK = PARAM_FORMAT_NORMALIZE - 1
_PARAMS_TYPES = (
None,
vec3, # PARAM_FORMAT_VEC3_FLOAT
vec3, # PARAM_FORMAT_VEC3_SHORT
vec3, # PARAM_FORMAT_VEC3_INT10
mat3, # PARAM_FORMAT_MAT3_FLOAT
mat3) # PARAM_FORMAT_MAT3_INT10
def param_type(format):
return _PARAMS_TYPES[format & _PARAM_FORMAT_MASK]
def params_format(*format): def params_format(*format):
return array('B', format).tobytes() return array('B', format).tobytes()
@ -395,10 +407,10 @@ select_vertices.argtypes = (
draw_batch = _engine.rk_draw_batch draw_batch = _engine.rk_draw_batch
draw_batch.argtypes = ( draw_batch.argtypes = (
ctypes.c_void_p, # batch ctypes.c_void_p, # batch
ctypes.c_uint, # size ctypes.c_uint, # count
ctypes.POINTER(ctypes.c_ubyte), # flags ctypes.POINTER(ctypes.c_ubyte), # flags
ctypes.POINTER(ctypes.c_uint), # meshes ctypes.POINTER(ctypes.c_uint), # meshes
ctypes.c_void_p) # params ctypes.POINTER(ctypes.c_void_p)) # params
unselect_vertices = _engine.rk_unselect_vertices unselect_vertices = _engine.rk_unselect_vertices
unselect_vertices.argtypes = ( unselect_vertices.argtypes = (

View File

@ -167,7 +167,7 @@ RK_EXPORT void rk_draw_batch(
rk_uint size, rk_uint size,
rk_instance_flags const * flags, rk_instance_flags const * flags,
rk_mesh const * meshes, rk_mesh const * meshes,
rk_ubyte const * params); rk_ubyte const ** params);
RK_EXPORT void rk_unselect_vertices( RK_EXPORT void rk_unselect_vertices(
rk_vertices_t vertices); rk_vertices_t vertices);

View File

@ -348,220 +348,262 @@ rk_vertices_t rk_create_vertices(
} }
static void rk_pack_vec3_float( static void rk_pack_vec3_float(
rk_pack_dst const dst, unsigned const count,
rk_pack_src const src) { rk_ushort const * const __restrict indices,
*dst.vec3_float = *src.vec3_float; rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_vec3_float * __restrict dst = reinterpret_cast<rk_vec3_float *>(_dst);
rk_vec3_float const * const __restrict src = reinterpret_cast<rk_vec3_float const *>(_src);
for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
*dst = src[*index];
}
} }
static void rk_pack_vec3_short( static void rk_pack_vec3_short(
rk_pack_dst const dst, unsigned const count,
rk_pack_src const src) { rk_ushort const * const __restrict indices,
dst.vec3_short->x = static_cast<rk_short>(src.vec3_float->x); rk_ubyte * __restrict _dst,
dst.vec3_short->y = static_cast<rk_short>(src.vec3_float->y); rk_ubyte const * const __restrict _src) {
dst.vec3_short->z = static_cast<rk_short>(src.vec3_float->z); rk_ushort const * const last_index = indices + count;
rk_vec3_short * __restrict dst = reinterpret_cast<rk_vec3_short *>(_dst);
rk_vec3_float const * const __restrict src = reinterpret_cast<rk_vec3_float const *>(_src);
for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
rk_vec3_float const & input = src[*index];
dst->x = static_cast<rk_short>(input.x);
dst->y = static_cast<rk_short>(input.y);
dst->z = static_cast<rk_short>(input.z);
}
} }
static void rk_pack_vec3_short_norm( static void rk_pack_vec3_short_norm(
rk_pack_dst const dst, unsigned const count,
rk_pack_src const src) { rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_vec3_short * __restrict dst = reinterpret_cast<rk_vec3_short *>(_dst);
rk_vec3_float const * const __restrict src = reinterpret_cast<rk_vec3_float const *>(_src);
#define _convert(s) (static_cast<rk_short>((s) * ((s) < 0.f ? 32768.f : 32767.f))) #define _convert(s) (static_cast<rk_short>((s) * ((s) < 0.f ? 32768.f : 32767.f)))
dst.vec3_short->x = _convert(src.vec3_float->x); for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
dst.vec3_short->y = _convert(src.vec3_float->y); rk_vec3_float const & input = src[*index];
dst.vec3_short->z = _convert(src.vec3_float->z); dst->x = _convert(input.x);
dst->y = _convert(input.y);
dst->z = _convert(input.z);
}
#undef _convert #undef _convert
} }
static void rk_pack_vec3_int10( static void rk_pack_vec3_int10(
rk_pack_dst const dst, unsigned const count,
rk_pack_src const src) { rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_vec3_int10 * __restrict dst = reinterpret_cast<rk_vec3_int10 *>(_dst);
rk_vec3_float const * const __restrict src = reinterpret_cast<rk_vec3_float const *>(_src);
#define _convert(s) (static_cast<rk_int>((s)) & 1023) #define _convert(s) (static_cast<rk_int>((s)) & 1023)
*dst.vec3_int10 = for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
_convert(src.vec3_float->x) | (_convert(src.vec3_float->y) << 10) | (_convert(src.vec3_float->z) << 20); rk_vec3_float const & input = src[*index];
*dst = _convert(input.x) | (_convert(input.y) << 10) | (_convert(input.z) << 20);
}
#undef _convert #undef _convert
} }
static void rk_pack_vec3_int10_norm( static void rk_pack_vec3_int10_norm(
rk_pack_dst const dst, unsigned const count,
rk_pack_src const src) { rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_vec3_int10 * __restrict dst = reinterpret_cast<rk_vec3_int10 *>(_dst);
rk_vec3_float const * const __restrict src = reinterpret_cast<rk_vec3_float const *>(_src);
#define _convert(s) (static_cast<rk_int>((s) * ((s) < 0.f ? 512.f : 511.f)) & 1023) #define _convert(s) (static_cast<rk_int>((s) * ((s) < 0.f ? 512.f : 511.f)) & 1023)
*dst.vec3_int10 = for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
_convert(src.vec3_float->x) | (_convert(src.vec3_float->y) << 10) | (_convert(src.vec3_float->z) << 20); rk_vec3_float const & input = src[*index];
*dst = _convert(input.x) | (_convert(input.y) << 10) | (_convert(input.z) << 20);
}
#undef _convert #undef _convert
} }
static void rk_pack_mat3_float( static void rk_pack_mat3_float(
rk_pack_dst const dst, unsigned const count,
rk_pack_src const src) { rk_ushort const * const __restrict indices,
*dst.mat3_float = *src.mat3_float; rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_mat3_float * __restrict dst = reinterpret_cast<rk_mat3_float *>(_dst);
rk_mat3_float const * const __restrict src = reinterpret_cast<rk_mat3_float const *>(_src);
for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
*dst = src[*index];
}
#undef _convert
} }
static void rk_pack_mat3_int10( static void rk_pack_mat3_int10(
rk_pack_dst const dst, unsigned const count,
rk_pack_src const src) { rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_mat3_int10 * __restrict dst = reinterpret_cast<rk_mat3_int10 *>(_dst);
rk_mat3_float const * const __restrict src = reinterpret_cast<rk_mat3_float const *>(_src);
#define _convert(s) (static_cast<rk_int>((s)) & 1023) #define _convert(s) (static_cast<rk_int>((s)) & 1023)
dst.mat3_int10->x = for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
_convert(src.mat3_float->x.x) | (_convert(src.mat3_float->x.y) << 10) | (_convert(src.mat3_float->x.z) << 20); rk_mat3_float const & input = src[*index];
dst.mat3_int10->y = dst->x = _convert(input.x.x) | (_convert(input.x.y) << 10) | (_convert(input.x.z) << 20);
_convert(src.mat3_float->y.x) | (_convert(src.mat3_float->y.y) << 10) | (_convert(src.mat3_float->y.z) << 20); dst->y = _convert(input.y.x) | (_convert(input.y.y) << 10) | (_convert(input.y.z) << 20);
dst.mat3_int10->z = dst->z = _convert(input.z.x) | (_convert(input.z.y) << 10) | (_convert(input.z.z) << 20);
_convert(src.mat3_float->z.x) | (_convert(src.mat3_float->z.y) << 10) | (_convert(src.mat3_float->z.z) << 20); }
#undef _convert #undef _convert
} }
static void rk_pack_mat3_int10_norm( static void rk_pack_mat3_int10_norm(
rk_pack_dst const dst, unsigned const count,
rk_pack_src const src) { rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_mat3_int10 * __restrict dst = reinterpret_cast<rk_mat3_int10 *>(_dst);
rk_mat3_float const * const __restrict src = reinterpret_cast<rk_mat3_float const *>(_src);
#define _convert(s) (static_cast<rk_int>((s) * ((s) < 0.f ? 512.f : 511.f)) & 1023) #define _convert(s) (static_cast<rk_int>((s) * ((s) < 0.f ? 512.f : 511.f)) & 1023)
dst.mat3_int10->x = for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
_convert(src.mat3_float->x.x) | (_convert(src.mat3_float->x.y) << 10) | (_convert(src.mat3_float->x.z) << 20); rk_mat3_float const & input = src[*index];
dst.mat3_int10->y = dst->x = _convert(input.x.x) | (_convert(input.x.y) << 10) | (_convert(input.x.z) << 20);
_convert(src.mat3_float->y.x) | (_convert(src.mat3_float->y.y) << 10) | (_convert(src.mat3_float->y.z) << 20); dst->y = _convert(input.y.x) | (_convert(input.y.y) << 10) | (_convert(input.y.z) << 20);
dst.mat3_int10->z = dst->z = _convert(input.z.x) | (_convert(input.z.y) << 10) | (_convert(input.z.z) << 20);
_convert(src.mat3_float->z.x) | (_convert(src.mat3_float->z.y) << 10) | (_convert(src.mat3_float->z.z) << 20); }
#undef _convert #undef _convert
} }
//TODO: multiple batches per vertices //TODO: multiple batches per vertices with their own buffers
rk_batch_t rk_create_batch( rk_batch_t rk_create_batch(
rk_vertices_t _vertices, rk_vertices_t _vertices,
rk_uint max_size, rk_uint max_size,
rk_param_format const * params_format) { rk_param_format const * params_format) {
rk_vertices const * const vertices = reinterpret_cast<rk_vertices const *>(_vertices); rk_vertices const * const vertices = reinterpret_cast<rk_vertices const *>(_vertices);
if (!vertices || !max_size || !params_format || max_size > RK_BATCH_MAX_SIZE) { if (!vertices || !max_size || max_size > RK_BATCH_MAX_SIZE) {
rk_printf("rk_create_batch(): invalid parameters."); rk_printf("rk_create_batch(): invalid parameters.");
return nullptr; return nullptr;
} }
unsigned nparams = 0; unsigned nparams = 0;
if (params_format) {
for ( ; params_format[nparams]; ++nparams);
}
rk_batch * batch = new rk_batch;
batch->max_size = max_size;
batch->nparams = nparams;
batch->indices = nullptr;
batch->commands = nullptr;
batch->params = nullptr;
batch->packed_params = nullptr;
batch->indirect_buffer = 0;
batch->params_array = 0;
unsigned params_size = 0; unsigned params_size = 0;
unsigned packed_size = 0; if (nparams) {
unsigned nattribs = 0; batch->params = new rk_parameter[nparams];
for (rk_param_format const * f = params_format; *f; ++f, ++nparams) { rk_parameter * param = batch->params;
for (rk_param_format const * f = params_format; *f; ++f, ++param) {
param->offset = params_size;
bool const norm = (*f & RK_PARAM_FORMAT_NORMALIZE) != 0;
switch (*f & RK_PARAM_FORMAT_MASK) { switch (*f & RK_PARAM_FORMAT_MASK) {
case RK_PARAM_FORMAT_VEC3_FLOAT: case RK_PARAM_FORMAT_VEC3_FLOAT:
params_size += sizeof(rk_vec3_float); param->size = sizeof(rk_vec3_float);
packed_size += sizeof(rk_vec3_float); param->packer = rk_pack_vec3_float;
nattribs += 1;
break; break;
case RK_PARAM_FORMAT_VEC3_SHORT: case RK_PARAM_FORMAT_VEC3_SHORT:
params_size += sizeof(rk_vec3_float); param->size = sizeof(rk_vec3_short);
packed_size += sizeof(rk_vec3_short); param->packer = norm ? rk_pack_vec3_short_norm : rk_pack_vec3_short;
nattribs += 1;
break; break;
case RK_PARAM_FORMAT_VEC3_INT10: case RK_PARAM_FORMAT_VEC3_INT10:
params_size += sizeof(rk_vec3_float); param->size = sizeof(rk_vec3_int10);
packed_size += sizeof(rk_vec3_int10); param->packer = norm ? rk_pack_vec3_int10_norm : rk_pack_vec3_int10;
nattribs += 1;
break; break;
case RK_PARAM_FORMAT_MAT3_FLOAT: case RK_PARAM_FORMAT_MAT3_FLOAT:
params_size += sizeof(rk_mat3_float); param->size = sizeof(rk_mat3_float);
packed_size += sizeof(rk_mat3_float); param->packer = rk_pack_mat3_float;
nattribs += 3;
break; break;
case RK_PARAM_FORMAT_MAT3_INT10: case RK_PARAM_FORMAT_MAT3_INT10:
params_size += sizeof(rk_mat3_float); param->size = sizeof(rk_mat3_int10);
packed_size += sizeof(rk_mat3_int10); param->packer = norm ? rk_pack_mat3_int10_norm : rk_pack_mat3_int10;
nattribs += 3;
break; break;
default: default:
rk_printf("rk_create_batch(): invalid param format."); rk_printf("rk_create_batch(): invalid param format.");
delete[] batch->params;
delete batch;
return nullptr; return nullptr;
break; break;
} }
params_size += max_size * param->size;
}
} }
glBindVertexArray(vertices->array);
rk_batch * batch = new rk_batch;
batch->size = max_size;
batch->nparams = nparams;
batch->params_size = params_size;
batch->packed_size = packed_size;
batch->indices = new rk_ushort[max_size]; batch->indices = new rk_ushort[max_size];
batch->commands = new rk_command[max_size * sizeof(rk_command)]; batch->commands = new rk_command[max_size];
memset(batch->commands, 0, max_size * sizeof(rk_command)); memset(batch->commands, 0, max_size * sizeof(rk_command));
batch->indirect_buffer = 0;
if (rk_MultiDrawElementsIndirect) { if (rk_MultiDrawElementsIndirect) {
glGenBuffers(1, &batch->commands_buffer); glGenBuffers(1, &batch->indirect_buffer);
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, batch->commands_buffer); glBindBuffer(GL_DRAW_INDIRECT_BUFFER, batch->indirect_buffer);
glBufferData(GL_DRAW_INDIRECT_BUFFER, max_size * sizeof(rk_command), batch->commands, GL_DYNAMIC_DRAW); glBufferData(GL_DRAW_INDIRECT_BUFFER, max_size * sizeof(rk_command), nullptr, GL_DYNAMIC_DRAW);
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0);
} else {
batch->commands_buffer = 0;
} }
batch->packers = nullptr;
batch->params = nullptr;
if (nparams) { if (nparams) {
batch->packers = new rk_packer[nparams]; batch->packed_params = new rk_ubyte[params_size];
batch->params = new rk_ubyte[max_size * packed_size]; memset(batch->packed_params, 0, params_size);
memset(batch->params, 0, max_size * packed_size); glBindVertexArray(vertices->array);
glGenBuffers(1, &batch->params_buffer); glGenBuffers(1, &batch->params_array);
glBindBuffer(GL_ARRAY_BUFFER, batch->params_buffer); glBindBuffer(GL_ARRAY_BUFFER, batch->params_array);
glBufferData(GL_ARRAY_BUFFER, max_size * batch->packed_size, batch->params, GL_DYNAMIC_DRAW); glBufferData(GL_ARRAY_BUFFER, params_size, nullptr, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindVertexBuffer(RK_PARAMS_BINDING, batch->params_buffer, 0, batch->packed_size); rk_parameter const * param = batch->params;
for (unsigned attrib = vertices->layout; attrib < vertices->layout + nattribs; ++attrib) { unsigned binding = RK_PARAMS_BINDING_BASE;
glEnableVertexAttribArray(attrib); unsigned attrib = vertices->layout;
} for (rk_param_format const * f = params_format; *f; ++f, ++param, ++binding) {
rk_packer * packer = batch->packers; bool const norm = (*f & RK_PARAM_FORMAT_NORMALIZE) != 0;
unsigned layout = vertices->layout; glBindVertexBuffer(binding, batch->params_array, param->offset, param->size);
unsigned offset = 0;
for (rk_param_format const * f = params_format; *f; ++f, ++packer) {
GLboolean const norm = (*f & RK_PARAM_FORMAT_NORMALIZE) != 0;
switch (*f & RK_PARAM_FORMAT_MASK) { switch (*f & RK_PARAM_FORMAT_MASK) {
case RK_PARAM_FORMAT_VEC3_FLOAT: case RK_PARAM_FORMAT_VEC3_FLOAT:
glVertexAttribFormat(layout++, 3, GL_FLOAT, GL_FALSE, offset); glEnableVertexAttribArray(attrib);
packer->pack = rk_pack_vec3_float; glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, 0);
packer->src_incr = sizeof(rk_vec3_float); glVertexAttribBinding(attrib++, binding);
packer->dst_incr = sizeof(rk_vec3_float);
break; break;
case RK_PARAM_FORMAT_VEC3_SHORT: case RK_PARAM_FORMAT_VEC3_SHORT:
glVertexAttribFormat(layout++, 3, GL_SHORT, norm, offset); glEnableVertexAttribArray(attrib);
if (norm) { glVertexAttribFormat(attrib, 3, GL_SHORT, norm, 0);
packer->pack = rk_pack_vec3_short_norm; glVertexAttribBinding(attrib++, binding);
} else {
packer->pack = rk_pack_vec3_short;
}
packer->src_incr = sizeof(rk_vec3_float);
packer->dst_incr = sizeof(rk_vec3_short);
break; break;
case RK_PARAM_FORMAT_VEC3_INT10: case RK_PARAM_FORMAT_VEC3_INT10:
glVertexAttribFormat(layout++, 4, GL_INT_2_10_10_10_REV, norm, offset); glEnableVertexAttribArray(attrib);
if (norm) { glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, 0);
packer->pack = rk_pack_vec3_int10_norm; glVertexAttribBinding(attrib++, binding);
} else {
packer->pack = rk_pack_vec3_int10;
}
packer->src_incr = sizeof(rk_vec3_float);
packer->dst_incr = sizeof(rk_vec3_int10);
break; break;
case RK_PARAM_FORMAT_MAT3_FLOAT: case RK_PARAM_FORMAT_MAT3_FLOAT:
glVertexAttribFormat(layout++, 3, GL_FLOAT, GL_FALSE, offset + offsetof(rk_mat3_float, x)); glEnableVertexAttribArray(attrib);
glVertexAttribFormat(layout++, 3, GL_FLOAT, GL_FALSE, offset + offsetof(rk_mat3_float, y)); glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, offsetof(rk_mat3_float, x));
glVertexAttribFormat(layout++, 3, GL_FLOAT, GL_FALSE, offset + offsetof(rk_mat3_float, z)); glVertexAttribBinding(attrib++, binding);
packer->pack = rk_pack_mat3_float; glEnableVertexAttribArray(attrib);
packer->src_incr = sizeof(rk_mat3_float); glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, offsetof(rk_mat3_float, y));
packer->dst_incr = sizeof(rk_mat3_float); glVertexAttribBinding(attrib++, binding);
glEnableVertexAttribArray(attrib);
glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, offsetof(rk_mat3_float, z));
glVertexAttribBinding(attrib++, binding);
break; break;
case RK_PARAM_FORMAT_MAT3_INT10: case RK_PARAM_FORMAT_MAT3_INT10:
glVertexAttribFormat(layout++, 4, GL_INT_2_10_10_10_REV, norm, offset + offsetof(rk_mat3_int10, x)); glEnableVertexAttribArray(attrib);
glVertexAttribFormat(layout++, 4, GL_INT_2_10_10_10_REV, norm, offset + offsetof(rk_mat3_int10, y)); glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, offsetof(rk_mat3_int10, x));
glVertexAttribFormat(layout++, 4, GL_INT_2_10_10_10_REV, norm, offset + offsetof(rk_mat3_int10, z)); glVertexAttribBinding(attrib++, binding);
if (norm) { glEnableVertexAttribArray(attrib);
packer->pack = rk_pack_mat3_int10_norm; glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, offsetof(rk_mat3_int10, y));
} else { glVertexAttribBinding(attrib++, binding);
packer->pack = rk_pack_mat3_int10; glEnableVertexAttribArray(attrib);
} glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, offsetof(rk_mat3_int10, z));
packer->src_incr = sizeof(rk_mat3_float); glVertexAttribBinding(attrib++, binding);
packer->dst_incr = sizeof(rk_mat3_int10);
break; break;
} }
offset += packer->dst_incr; glVertexBindingDivisor(binding, 1);
}
for (unsigned attrib = vertices->layout; attrib < vertices->layout + nattribs; ++attrib) {
glVertexAttribBinding(attrib, RK_PARAMS_BINDING);
}
glVertexBindingDivisor(RK_PARAMS_BINDING, 1);
} else {
batch->params_buffer = 0;
} }
glBindVertexArray(0); glBindVertexArray(0);
}
return batch; return batch;
} }
@ -704,7 +746,6 @@ static unsigned rk_batch_build_commands(
commands->nvertices = static_cast<GLuint>(mesh.ntriangles) * 3; commands->nvertices = static_cast<GLuint>(mesh.ntriangles) * 3;
commands->ninstances = first - base; commands->ninstances = first - base;
commands->base_index = mesh.base_index; commands->base_index = mesh.base_index;
commands->base_vertex = 0;
commands->base_instance = base - batch.indices; commands->base_instance = base - batch.indices;
} }
return commands - batch.commands; return commands - batch.commands;
@ -713,47 +754,40 @@ static unsigned rk_batch_build_commands(
static void rk_batch_pack( static void rk_batch_pack(
rk_batch & batch, rk_batch & batch,
unsigned const ninstances, unsigned const ninstances,
rk_ubyte const * const params) { rk_ubyte const ** srcs) {
rk_pack_dst dst(batch.params); rk_parameter const * const last_param = batch.params + batch.nparams;
rk_ushort const * const last_index = batch.indices + ninstances; for (rk_parameter const * param = batch.params; param < last_param; ++param) {
rk_packer const * const last_packer = batch.packers + batch.nparams; rk_ubyte const * const src = *srcs++;
for (rk_ushort const * index = batch.indices; index < last_index; ++index) { if (src) {
rk_pack_src src(&params[batch.params_size * (*index)]); rk_ubyte * const dst = batch.packed_params + param->offset;
for (rk_packer const * packer = batch.packers; packer < last_packer; ++packer) { param->packer(ninstances, batch.indices, dst, src);
packer->pack(dst, src); glBufferSubData(GL_ARRAY_BUFFER, param->offset, ninstances * param->size, dst);
src.ptr += packer->src_incr;
dst.ptr += packer->dst_incr;
} }
} }
} }
void rk_draw_batch( void rk_draw_batch(
rk_batch_t _batch, rk_batch_t _batch,
rk_uint size, rk_uint count,
rk_instance_flags const * flags, rk_instance_flags const * flags,
rk_mesh const * meshes, rk_mesh const * meshes,
rk_ubyte const * params) { rk_ubyte const ** params) {
rk_batch & batch = *reinterpret_cast<rk_batch *>(_batch); rk_batch & batch = *reinterpret_cast<rk_batch *>(_batch);
if (!size || size > batch.size || !flags || !meshes || !rk_current_shader || !rk_current_vertices) { if (!count || count > batch.max_size || !flags || !meshes || !rk_current_shader || !rk_current_vertices) {
return; return;
} }
unsigned const ninstances = rk_batch_filter(batch, size, flags); unsigned const ninstances = rk_batch_filter(batch, count, flags);
if (!ninstances) { if (!ninstances) {
return; return;
} }
if (rk_MultiDrawElementsIndirect) {
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, batch.commands_buffer);
}
if (batch.nparams) {
glBindBuffer(GL_ARRAY_BUFFER, batch.params_buffer);
}
unsigned const ncommands = rk_batch_build_commands(batch, ninstances, meshes); unsigned const ncommands = rk_batch_build_commands(batch, ninstances, meshes);
if (rk_MultiDrawElementsIndirect) { if (rk_MultiDrawElementsIndirect) {
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, batch.indirect_buffer);
glBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, ncommands * sizeof(rk_command), batch.commands); glBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, ncommands * sizeof(rk_command), batch.commands);
} }
if (batch.nparams) { if (batch.nparams && params) {
glBindBuffer(GL_ARRAY_BUFFER, batch.params_array);
rk_batch_pack(batch, ninstances, params); rk_batch_pack(batch, ninstances, params);
glBufferSubData(GL_ARRAY_BUFFER, 0, ninstances * batch.packed_size, batch.params);
} }
if (rk_DrawElementsInstancedBaseInstance) { if (rk_DrawElementsInstancedBaseInstance) {
if (rk_MultiDrawElementsIndirect) { if (rk_MultiDrawElementsIndirect) {
@ -768,13 +802,16 @@ void rk_draw_batch(
} }
} }
} else { } else {
unsigned params_offset = 0; unsigned param_index = 0;
rk_command const * const last_command = batch.commands + ncommands; rk_command const * const last_command = batch.commands + ncommands;
rk_parameter const * const last_param = batch.params + batch.nparams;
for (rk_command const * command = batch.commands; command < last_command; ++command) { for (rk_command const * command = batch.commands; command < last_command; ++command) {
if (batch.nparams) { unsigned binding = RK_PARAMS_BINDING_BASE;
glBindVertexBuffer(RK_PARAMS_BINDING, batch.params_buffer, params_offset, batch.packed_size); for (rk_parameter const * param = batch.params; param < last_param; ++param, ++binding) {
params_offset += command->ninstances * batch.packed_size; glBindVertexBuffer(binding, batch.params_array,
param->offset + param_index * param->size, param->size);
} }
param_index += command->ninstances;
glDrawElementsInstanced( glDrawElementsInstanced(
GL_TRIANGLES, command->nvertices, GL_UNSIGNED_SHORT, GL_TRIANGLES, command->nvertices, GL_UNSIGNED_SHORT,
reinterpret_cast<void const *>(command->base_index << 1), reinterpret_cast<void const *>(command->base_index << 1),
@ -784,7 +821,7 @@ void rk_draw_batch(
if (rk_MultiDrawElementsIndirect) { if (rk_MultiDrawElementsIndirect) {
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0);
} }
if (batch.nparams) { if (batch.nparams && params) {
glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_ARRAY_BUFFER, 0);
} }
} }
@ -824,13 +861,13 @@ void rk_destroy_batch(
if (batch) { if (batch) {
delete[] batch->indices; delete[] batch->indices;
delete[] batch->commands; delete[] batch->commands;
if (batch->nparams) {
delete[] batch->packers;
delete[] batch->params;
glDeleteBuffers(1, &batch->params_buffer);
}
if (rk_MultiDrawElementsIndirect) { if (rk_MultiDrawElementsIndirect) {
glDeleteBuffers(1, &batch->commands_buffer); glDeleteBuffers(1, &batch->indirect_buffer);
}
if (batch->nparams) {
delete[] batch->params;
delete[] batch->packed_params;
glDeleteBuffers(1, &batch->params_array);
} }
delete batch; delete batch;
} }

View File

@ -23,7 +23,7 @@
enum : GLuint { enum : GLuint {
RK_VERTICES_BINDING = 0, RK_VERTICES_BINDING = 0,
RK_PARAMS_BINDING = 1 RK_PARAMS_BINDING_BASE = 1
}; };
struct rk_shader { struct rk_shader {
@ -51,6 +51,14 @@ struct rk_vertices {
GLuint indices; GLuint indices;
}; };
struct rk_command {
GLuint nvertices;
GLuint ninstances;
GLuint base_index;
GLint base_vertex;
GLuint base_instance;
};
struct rk_vec3_float { struct rk_vec3_float {
float x; float x;
float y; float y;
@ -79,54 +87,27 @@ struct rk_mat3_int10 {
rk_vec3_int10 z; rk_vec3_int10 z;
}; };
union rk_pack_src { typedef void (*rk_packer)(
rk_ubyte const * __restrict ptr; unsigned const, // count
rk_vec3_float const * __restrict vec3_float; rk_ushort const * const, // indices
rk_mat3_float const * __restrict mat3_float; rk_ubyte *, // dst
rk_ubyte const * const); // src
inline rk_pack_src() {} struct rk_parameter {
inline rk_pack_src(rk_ubyte const * const __restrict src) : ptr(src) {} unsigned offset;
}; unsigned size;
rk_packer packer;
union rk_pack_dst {
rk_ubyte * __restrict ptr;
rk_vec3_float * __restrict vec3_float;
rk_vec3_short * __restrict vec3_short;
rk_vec3_int10 * __restrict vec3_int10;
rk_mat3_float * __restrict mat3_float;
rk_mat3_int10 * __restrict mat3_int10;
inline rk_pack_dst() {}
inline rk_pack_dst(rk_ubyte * const __restrict dst) : ptr(dst) {}
};
typedef void (*rk_packer_fn)(rk_pack_dst const, rk_pack_src const);
struct rk_packer {
rk_packer_fn pack;
unsigned src_incr;
unsigned dst_incr;
};
struct rk_command {
GLuint nvertices;
GLuint ninstances;
GLuint base_index;
GLint base_vertex;
GLuint base_instance;
}; };
struct rk_batch { struct rk_batch {
unsigned size; unsigned max_size;
unsigned nparams; unsigned nparams;
unsigned params_size;
unsigned packed_size;
rk_ushort * indices; rk_ushort * indices;
rk_command * commands; rk_command * commands;
rk_packer * packers; rk_parameter * params;
rk_ubyte * params; rk_ubyte * packed_params;
GLuint commands_buffer; GLuint indirect_buffer;
GLuint params_buffer; GLuint params_array;
}; };
#endif // _RK_ENGINE_RENDER_OPENGLES_H #endif // _RK_ENGINE_RENDER_OPENGLES_H