Switch back to SoA parameters.

This commit is contained in:
Roz K 2022-12-30 10:50:56 +01:00
parent 414630ecfd
commit 7db5304d40
Signed by: roz
GPG Key ID: 51FBF4E483E1C822
4 changed files with 262 additions and 232 deletions

View File

@ -257,6 +257,18 @@ PARAM_FORMAT_VEC3_INT10 = 3
PARAM_FORMAT_MAT3_FLOAT = 4
PARAM_FORMAT_MAT3_INT10 = 5
PARAM_FORMAT_NORMALIZE = _flag(7)
_PARAM_FORMAT_MASK = PARAM_FORMAT_NORMALIZE - 1
_PARAMS_TYPES = (
None,
vec3, # PARAM_FORMAT_VEC3_FLOAT
vec3, # PARAM_FORMAT_VEC3_SHORT
vec3, # PARAM_FORMAT_VEC3_INT10
mat3, # PARAM_FORMAT_MAT3_FLOAT
mat3) # PARAM_FORMAT_MAT3_INT10
def param_type(format):
return _PARAMS_TYPES[format & _PARAM_FORMAT_MASK]
def params_format(*format):
return array('B', format).tobytes()
@ -395,10 +407,10 @@ select_vertices.argtypes = (
draw_batch = _engine.rk_draw_batch
draw_batch.argtypes = (
ctypes.c_void_p, # batch
ctypes.c_uint, # size
ctypes.c_uint, # count
ctypes.POINTER(ctypes.c_ubyte), # flags
ctypes.POINTER(ctypes.c_uint), # meshes
ctypes.c_void_p) # params
ctypes.POINTER(ctypes.c_void_p)) # params
unselect_vertices = _engine.rk_unselect_vertices
unselect_vertices.argtypes = (

View File

@ -167,7 +167,7 @@ RK_EXPORT void rk_draw_batch(
rk_uint size,
rk_instance_flags const * flags,
rk_mesh const * meshes,
rk_ubyte const * params);
rk_ubyte const ** params);
RK_EXPORT void rk_unselect_vertices(
rk_vertices_t vertices);

View File

@ -348,220 +348,262 @@ rk_vertices_t rk_create_vertices(
}
static void rk_pack_vec3_float(
rk_pack_dst const dst,
rk_pack_src const src) {
*dst.vec3_float = *src.vec3_float;
unsigned const count,
rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_vec3_float * __restrict dst = reinterpret_cast<rk_vec3_float *>(_dst);
rk_vec3_float const * const __restrict src = reinterpret_cast<rk_vec3_float const *>(_src);
for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
*dst = src[*index];
}
}
static void rk_pack_vec3_short(
rk_pack_dst const dst,
rk_pack_src const src) {
dst.vec3_short->x = static_cast<rk_short>(src.vec3_float->x);
dst.vec3_short->y = static_cast<rk_short>(src.vec3_float->y);
dst.vec3_short->z = static_cast<rk_short>(src.vec3_float->z);
unsigned const count,
rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_vec3_short * __restrict dst = reinterpret_cast<rk_vec3_short *>(_dst);
rk_vec3_float const * const __restrict src = reinterpret_cast<rk_vec3_float const *>(_src);
for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
rk_vec3_float const & input = src[*index];
dst->x = static_cast<rk_short>(input.x);
dst->y = static_cast<rk_short>(input.y);
dst->z = static_cast<rk_short>(input.z);
}
}
static void rk_pack_vec3_short_norm(
rk_pack_dst const dst,
rk_pack_src const src) {
unsigned const count,
rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_vec3_short * __restrict dst = reinterpret_cast<rk_vec3_short *>(_dst);
rk_vec3_float const * const __restrict src = reinterpret_cast<rk_vec3_float const *>(_src);
#define _convert(s) (static_cast<rk_short>((s) * ((s) < 0.f ? 32768.f : 32767.f)))
dst.vec3_short->x = _convert(src.vec3_float->x);
dst.vec3_short->y = _convert(src.vec3_float->y);
dst.vec3_short->z = _convert(src.vec3_float->z);
for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
rk_vec3_float const & input = src[*index];
dst->x = _convert(input.x);
dst->y = _convert(input.y);
dst->z = _convert(input.z);
}
#undef _convert
}
static void rk_pack_vec3_int10(
rk_pack_dst const dst,
rk_pack_src const src) {
unsigned const count,
rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_vec3_int10 * __restrict dst = reinterpret_cast<rk_vec3_int10 *>(_dst);
rk_vec3_float const * const __restrict src = reinterpret_cast<rk_vec3_float const *>(_src);
#define _convert(s) (static_cast<rk_int>((s)) & 1023)
*dst.vec3_int10 =
_convert(src.vec3_float->x) | (_convert(src.vec3_float->y) << 10) | (_convert(src.vec3_float->z) << 20);
for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
rk_vec3_float const & input = src[*index];
*dst = _convert(input.x) | (_convert(input.y) << 10) | (_convert(input.z) << 20);
}
#undef _convert
}
static void rk_pack_vec3_int10_norm(
rk_pack_dst const dst,
rk_pack_src const src) {
unsigned const count,
rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_vec3_int10 * __restrict dst = reinterpret_cast<rk_vec3_int10 *>(_dst);
rk_vec3_float const * const __restrict src = reinterpret_cast<rk_vec3_float const *>(_src);
#define _convert(s) (static_cast<rk_int>((s) * ((s) < 0.f ? 512.f : 511.f)) & 1023)
*dst.vec3_int10 =
_convert(src.vec3_float->x) | (_convert(src.vec3_float->y) << 10) | (_convert(src.vec3_float->z) << 20);
for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
rk_vec3_float const & input = src[*index];
*dst = _convert(input.x) | (_convert(input.y) << 10) | (_convert(input.z) << 20);
}
#undef _convert
}
static void rk_pack_mat3_float(
rk_pack_dst const dst,
rk_pack_src const src) {
*dst.mat3_float = *src.mat3_float;
unsigned const count,
rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_mat3_float * __restrict dst = reinterpret_cast<rk_mat3_float *>(_dst);
rk_mat3_float const * const __restrict src = reinterpret_cast<rk_mat3_float const *>(_src);
for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
*dst = src[*index];
}
#undef _convert
}
static void rk_pack_mat3_int10(
rk_pack_dst const dst,
rk_pack_src const src) {
unsigned const count,
rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_mat3_int10 * __restrict dst = reinterpret_cast<rk_mat3_int10 *>(_dst);
rk_mat3_float const * const __restrict src = reinterpret_cast<rk_mat3_float const *>(_src);
#define _convert(s) (static_cast<rk_int>((s)) & 1023)
dst.mat3_int10->x =
_convert(src.mat3_float->x.x) | (_convert(src.mat3_float->x.y) << 10) | (_convert(src.mat3_float->x.z) << 20);
dst.mat3_int10->y =
_convert(src.mat3_float->y.x) | (_convert(src.mat3_float->y.y) << 10) | (_convert(src.mat3_float->y.z) << 20);
dst.mat3_int10->z =
_convert(src.mat3_float->z.x) | (_convert(src.mat3_float->z.y) << 10) | (_convert(src.mat3_float->z.z) << 20);
for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
rk_mat3_float const & input = src[*index];
dst->x = _convert(input.x.x) | (_convert(input.x.y) << 10) | (_convert(input.x.z) << 20);
dst->y = _convert(input.y.x) | (_convert(input.y.y) << 10) | (_convert(input.y.z) << 20);
dst->z = _convert(input.z.x) | (_convert(input.z.y) << 10) | (_convert(input.z.z) << 20);
}
#undef _convert
}
static void rk_pack_mat3_int10_norm(
rk_pack_dst const dst,
rk_pack_src const src) {
unsigned const count,
rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_mat3_int10 * __restrict dst = reinterpret_cast<rk_mat3_int10 *>(_dst);
rk_mat3_float const * const __restrict src = reinterpret_cast<rk_mat3_float const *>(_src);
#define _convert(s) (static_cast<rk_int>((s) * ((s) < 0.f ? 512.f : 511.f)) & 1023)
dst.mat3_int10->x =
_convert(src.mat3_float->x.x) | (_convert(src.mat3_float->x.y) << 10) | (_convert(src.mat3_float->x.z) << 20);
dst.mat3_int10->y =
_convert(src.mat3_float->y.x) | (_convert(src.mat3_float->y.y) << 10) | (_convert(src.mat3_float->y.z) << 20);
dst.mat3_int10->z =
_convert(src.mat3_float->z.x) | (_convert(src.mat3_float->z.y) << 10) | (_convert(src.mat3_float->z.z) << 20);
for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
rk_mat3_float const & input = src[*index];
dst->x = _convert(input.x.x) | (_convert(input.x.y) << 10) | (_convert(input.x.z) << 20);
dst->y = _convert(input.y.x) | (_convert(input.y.y) << 10) | (_convert(input.y.z) << 20);
dst->z = _convert(input.z.x) | (_convert(input.z.y) << 10) | (_convert(input.z.z) << 20);
}
#undef _convert
}
//TODO: multiple batches per vertices
//TODO: multiple batches per vertices with their own buffers
rk_batch_t rk_create_batch(
rk_vertices_t _vertices,
rk_uint max_size,
rk_param_format const * params_format) {
rk_vertices const * const vertices = reinterpret_cast<rk_vertices const *>(_vertices);
if (!vertices || !max_size || !params_format || max_size > RK_BATCH_MAX_SIZE) {
if (!vertices || !max_size || max_size > RK_BATCH_MAX_SIZE) {
rk_printf("rk_create_batch(): invalid parameters.");
return nullptr;
}
unsigned nparams = 0;
if (params_format) {
for ( ; params_format[nparams]; ++nparams);
}
rk_batch * batch = new rk_batch;
batch->max_size = max_size;
batch->nparams = nparams;
batch->indices = nullptr;
batch->commands = nullptr;
batch->params = nullptr;
batch->packed_params = nullptr;
batch->indirect_buffer = 0;
batch->params_array = 0;
unsigned params_size = 0;
unsigned packed_size = 0;
unsigned nattribs = 0;
for (rk_param_format const * f = params_format; *f; ++f, ++nparams) {
if (nparams) {
batch->params = new rk_parameter[nparams];
rk_parameter * param = batch->params;
for (rk_param_format const * f = params_format; *f; ++f, ++param) {
param->offset = params_size;
bool const norm = (*f & RK_PARAM_FORMAT_NORMALIZE) != 0;
switch (*f & RK_PARAM_FORMAT_MASK) {
case RK_PARAM_FORMAT_VEC3_FLOAT:
params_size += sizeof(rk_vec3_float);
packed_size += sizeof(rk_vec3_float);
nattribs += 1;
param->size = sizeof(rk_vec3_float);
param->packer = rk_pack_vec3_float;
break;
case RK_PARAM_FORMAT_VEC3_SHORT:
params_size += sizeof(rk_vec3_float);
packed_size += sizeof(rk_vec3_short);
nattribs += 1;
param->size = sizeof(rk_vec3_short);
param->packer = norm ? rk_pack_vec3_short_norm : rk_pack_vec3_short;
break;
case RK_PARAM_FORMAT_VEC3_INT10:
params_size += sizeof(rk_vec3_float);
packed_size += sizeof(rk_vec3_int10);
nattribs += 1;
param->size = sizeof(rk_vec3_int10);
param->packer = norm ? rk_pack_vec3_int10_norm : rk_pack_vec3_int10;
break;
case RK_PARAM_FORMAT_MAT3_FLOAT:
params_size += sizeof(rk_mat3_float);
packed_size += sizeof(rk_mat3_float);
nattribs += 3;
param->size = sizeof(rk_mat3_float);
param->packer = rk_pack_mat3_float;
break;
case RK_PARAM_FORMAT_MAT3_INT10:
params_size += sizeof(rk_mat3_float);
packed_size += sizeof(rk_mat3_int10);
nattribs += 3;
param->size = sizeof(rk_mat3_int10);
param->packer = norm ? rk_pack_mat3_int10_norm : rk_pack_mat3_int10;
break;
default:
rk_printf("rk_create_batch(): invalid param format.");
delete[] batch->params;
delete batch;
return nullptr;
break;
}
params_size += max_size * param->size;
}
}
glBindVertexArray(vertices->array);
rk_batch * batch = new rk_batch;
batch->size = max_size;
batch->nparams = nparams;
batch->params_size = params_size;
batch->packed_size = packed_size;
batch->indices = new rk_ushort[max_size];
batch->commands = new rk_command[max_size * sizeof(rk_command)];
batch->commands = new rk_command[max_size];
memset(batch->commands, 0, max_size * sizeof(rk_command));
batch->indirect_buffer = 0;
if (rk_MultiDrawElementsIndirect) {
glGenBuffers(1, &batch->commands_buffer);
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, batch->commands_buffer);
glBufferData(GL_DRAW_INDIRECT_BUFFER, max_size * sizeof(rk_command), batch->commands, GL_DYNAMIC_DRAW);
glGenBuffers(1, &batch->indirect_buffer);
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, batch->indirect_buffer);
glBufferData(GL_DRAW_INDIRECT_BUFFER, max_size * sizeof(rk_command), nullptr, GL_DYNAMIC_DRAW);
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0);
} else {
batch->commands_buffer = 0;
}
batch->packers = nullptr;
batch->params = nullptr;
if (nparams) {
batch->packers = new rk_packer[nparams];
batch->params = new rk_ubyte[max_size * packed_size];
memset(batch->params, 0, max_size * packed_size);
glGenBuffers(1, &batch->params_buffer);
glBindBuffer(GL_ARRAY_BUFFER, batch->params_buffer);
glBufferData(GL_ARRAY_BUFFER, max_size * batch->packed_size, batch->params, GL_DYNAMIC_DRAW);
batch->packed_params = new rk_ubyte[params_size];
memset(batch->packed_params, 0, params_size);
glBindVertexArray(vertices->array);
glGenBuffers(1, &batch->params_array);
glBindBuffer(GL_ARRAY_BUFFER, batch->params_array);
glBufferData(GL_ARRAY_BUFFER, params_size, nullptr, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindVertexBuffer(RK_PARAMS_BINDING, batch->params_buffer, 0, batch->packed_size);
for (unsigned attrib = vertices->layout; attrib < vertices->layout + nattribs; ++attrib) {
glEnableVertexAttribArray(attrib);
}
rk_packer * packer = batch->packers;
unsigned layout = vertices->layout;
unsigned offset = 0;
for (rk_param_format const * f = params_format; *f; ++f, ++packer) {
GLboolean const norm = (*f & RK_PARAM_FORMAT_NORMALIZE) != 0;
rk_parameter const * param = batch->params;
unsigned binding = RK_PARAMS_BINDING_BASE;
unsigned attrib = vertices->layout;
for (rk_param_format const * f = params_format; *f; ++f, ++param, ++binding) {
bool const norm = (*f & RK_PARAM_FORMAT_NORMALIZE) != 0;
glBindVertexBuffer(binding, batch->params_array, param->offset, param->size);
switch (*f & RK_PARAM_FORMAT_MASK) {
case RK_PARAM_FORMAT_VEC3_FLOAT:
glVertexAttribFormat(layout++, 3, GL_FLOAT, GL_FALSE, offset);
packer->pack = rk_pack_vec3_float;
packer->src_incr = sizeof(rk_vec3_float);
packer->dst_incr = sizeof(rk_vec3_float);
glEnableVertexAttribArray(attrib);
glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, 0);
glVertexAttribBinding(attrib++, binding);
break;
case RK_PARAM_FORMAT_VEC3_SHORT:
glVertexAttribFormat(layout++, 3, GL_SHORT, norm, offset);
if (norm) {
packer->pack = rk_pack_vec3_short_norm;
} else {
packer->pack = rk_pack_vec3_short;
}
packer->src_incr = sizeof(rk_vec3_float);
packer->dst_incr = sizeof(rk_vec3_short);
glEnableVertexAttribArray(attrib);
glVertexAttribFormat(attrib, 3, GL_SHORT, norm, 0);
glVertexAttribBinding(attrib++, binding);
break;
case RK_PARAM_FORMAT_VEC3_INT10:
glVertexAttribFormat(layout++, 4, GL_INT_2_10_10_10_REV, norm, offset);
if (norm) {
packer->pack = rk_pack_vec3_int10_norm;
} else {
packer->pack = rk_pack_vec3_int10;
}
packer->src_incr = sizeof(rk_vec3_float);
packer->dst_incr = sizeof(rk_vec3_int10);
glEnableVertexAttribArray(attrib);
glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, 0);
glVertexAttribBinding(attrib++, binding);
break;
case RK_PARAM_FORMAT_MAT3_FLOAT:
glVertexAttribFormat(layout++, 3, GL_FLOAT, GL_FALSE, offset + offsetof(rk_mat3_float, x));
glVertexAttribFormat(layout++, 3, GL_FLOAT, GL_FALSE, offset + offsetof(rk_mat3_float, y));
glVertexAttribFormat(layout++, 3, GL_FLOAT, GL_FALSE, offset + offsetof(rk_mat3_float, z));
packer->pack = rk_pack_mat3_float;
packer->src_incr = sizeof(rk_mat3_float);
packer->dst_incr = sizeof(rk_mat3_float);
glEnableVertexAttribArray(attrib);
glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, offsetof(rk_mat3_float, x));
glVertexAttribBinding(attrib++, binding);
glEnableVertexAttribArray(attrib);
glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, offsetof(rk_mat3_float, y));
glVertexAttribBinding(attrib++, binding);
glEnableVertexAttribArray(attrib);
glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, offsetof(rk_mat3_float, z));
glVertexAttribBinding(attrib++, binding);
break;
case RK_PARAM_FORMAT_MAT3_INT10:
glVertexAttribFormat(layout++, 4, GL_INT_2_10_10_10_REV, norm, offset + offsetof(rk_mat3_int10, x));
glVertexAttribFormat(layout++, 4, GL_INT_2_10_10_10_REV, norm, offset + offsetof(rk_mat3_int10, y));
glVertexAttribFormat(layout++, 4, GL_INT_2_10_10_10_REV, norm, offset + offsetof(rk_mat3_int10, z));
if (norm) {
packer->pack = rk_pack_mat3_int10_norm;
} else {
packer->pack = rk_pack_mat3_int10;
}
packer->src_incr = sizeof(rk_mat3_float);
packer->dst_incr = sizeof(rk_mat3_int10);
glEnableVertexAttribArray(attrib);
glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, offsetof(rk_mat3_int10, x));
glVertexAttribBinding(attrib++, binding);
glEnableVertexAttribArray(attrib);
glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, offsetof(rk_mat3_int10, y));
glVertexAttribBinding(attrib++, binding);
glEnableVertexAttribArray(attrib);
glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, offsetof(rk_mat3_int10, z));
glVertexAttribBinding(attrib++, binding);
break;
}
offset += packer->dst_incr;
}
for (unsigned attrib = vertices->layout; attrib < vertices->layout + nattribs; ++attrib) {
glVertexAttribBinding(attrib, RK_PARAMS_BINDING);
}
glVertexBindingDivisor(RK_PARAMS_BINDING, 1);
} else {
batch->params_buffer = 0;
glVertexBindingDivisor(binding, 1);
}
glBindVertexArray(0);
}
return batch;
}
@ -704,7 +746,6 @@ static unsigned rk_batch_build_commands(
commands->nvertices = static_cast<GLuint>(mesh.ntriangles) * 3;
commands->ninstances = first - base;
commands->base_index = mesh.base_index;
commands->base_vertex = 0;
commands->base_instance = base - batch.indices;
}
return commands - batch.commands;
@ -713,47 +754,40 @@ static unsigned rk_batch_build_commands(
static void rk_batch_pack(
rk_batch & batch,
unsigned const ninstances,
rk_ubyte const * const params) {
rk_pack_dst dst(batch.params);
rk_ushort const * const last_index = batch.indices + ninstances;
rk_packer const * const last_packer = batch.packers + batch.nparams;
for (rk_ushort const * index = batch.indices; index < last_index; ++index) {
rk_pack_src src(&params[batch.params_size * (*index)]);
for (rk_packer const * packer = batch.packers; packer < last_packer; ++packer) {
packer->pack(dst, src);
src.ptr += packer->src_incr;
dst.ptr += packer->dst_incr;
rk_ubyte const ** srcs) {
rk_parameter const * const last_param = batch.params + batch.nparams;
for (rk_parameter const * param = batch.params; param < last_param; ++param) {
rk_ubyte const * const src = *srcs++;
if (src) {
rk_ubyte * const dst = batch.packed_params + param->offset;
param->packer(ninstances, batch.indices, dst, src);
glBufferSubData(GL_ARRAY_BUFFER, param->offset, ninstances * param->size, dst);
}
}
}
void rk_draw_batch(
rk_batch_t _batch,
rk_uint size,
rk_uint count,
rk_instance_flags const * flags,
rk_mesh const * meshes,
rk_ubyte const * params) {
rk_ubyte const ** params) {
rk_batch & batch = *reinterpret_cast<rk_batch *>(_batch);
if (!size || size > batch.size || !flags || !meshes || !rk_current_shader || !rk_current_vertices) {
if (!count || count > batch.max_size || !flags || !meshes || !rk_current_shader || !rk_current_vertices) {
return;
}
unsigned const ninstances = rk_batch_filter(batch, size, flags);
unsigned const ninstances = rk_batch_filter(batch, count, flags);
if (!ninstances) {
return;
}
if (rk_MultiDrawElementsIndirect) {
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, batch.commands_buffer);
}
if (batch.nparams) {
glBindBuffer(GL_ARRAY_BUFFER, batch.params_buffer);
}
unsigned const ncommands = rk_batch_build_commands(batch, ninstances, meshes);
if (rk_MultiDrawElementsIndirect) {
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, batch.indirect_buffer);
glBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, ncommands * sizeof(rk_command), batch.commands);
}
if (batch.nparams) {
if (batch.nparams && params) {
glBindBuffer(GL_ARRAY_BUFFER, batch.params_array);
rk_batch_pack(batch, ninstances, params);
glBufferSubData(GL_ARRAY_BUFFER, 0, ninstances * batch.packed_size, batch.params);
}
if (rk_DrawElementsInstancedBaseInstance) {
if (rk_MultiDrawElementsIndirect) {
@ -768,13 +802,16 @@ void rk_draw_batch(
}
}
} else {
unsigned params_offset = 0;
unsigned param_index = 0;
rk_command const * const last_command = batch.commands + ncommands;
rk_parameter const * const last_param = batch.params + batch.nparams;
for (rk_command const * command = batch.commands; command < last_command; ++command) {
if (batch.nparams) {
glBindVertexBuffer(RK_PARAMS_BINDING, batch.params_buffer, params_offset, batch.packed_size);
params_offset += command->ninstances * batch.packed_size;
unsigned binding = RK_PARAMS_BINDING_BASE;
for (rk_parameter const * param = batch.params; param < last_param; ++param, ++binding) {
glBindVertexBuffer(binding, batch.params_array,
param->offset + param_index * param->size, param->size);
}
param_index += command->ninstances;
glDrawElementsInstanced(
GL_TRIANGLES, command->nvertices, GL_UNSIGNED_SHORT,
reinterpret_cast<void const *>(command->base_index << 1),
@ -784,7 +821,7 @@ void rk_draw_batch(
if (rk_MultiDrawElementsIndirect) {
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0);
}
if (batch.nparams) {
if (batch.nparams && params) {
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
}
@ -824,13 +861,13 @@ void rk_destroy_batch(
if (batch) {
delete[] batch->indices;
delete[] batch->commands;
if (batch->nparams) {
delete[] batch->packers;
delete[] batch->params;
glDeleteBuffers(1, &batch->params_buffer);
}
if (rk_MultiDrawElementsIndirect) {
glDeleteBuffers(1, &batch->commands_buffer);
glDeleteBuffers(1, &batch->indirect_buffer);
}
if (batch->nparams) {
delete[] batch->params;
delete[] batch->packed_params;
glDeleteBuffers(1, &batch->params_array);
}
delete batch;
}

View File

@ -23,7 +23,7 @@
enum : GLuint {
RK_VERTICES_BINDING = 0,
RK_PARAMS_BINDING = 1
RK_PARAMS_BINDING_BASE = 1
};
struct rk_shader {
@ -51,6 +51,14 @@ struct rk_vertices {
GLuint indices;
};
struct rk_command {
GLuint nvertices;
GLuint ninstances;
GLuint base_index;
GLint base_vertex;
GLuint base_instance;
};
struct rk_vec3_float {
float x;
float y;
@ -79,54 +87,27 @@ struct rk_mat3_int10 {
rk_vec3_int10 z;
};
union rk_pack_src {
rk_ubyte const * __restrict ptr;
rk_vec3_float const * __restrict vec3_float;
rk_mat3_float const * __restrict mat3_float;
typedef void (*rk_packer)(
unsigned const, // count
rk_ushort const * const, // indices
rk_ubyte *, // dst
rk_ubyte const * const); // src
inline rk_pack_src() {}
inline rk_pack_src(rk_ubyte const * const __restrict src) : ptr(src) {}
};
union rk_pack_dst {
rk_ubyte * __restrict ptr;
rk_vec3_float * __restrict vec3_float;
rk_vec3_short * __restrict vec3_short;
rk_vec3_int10 * __restrict vec3_int10;
rk_mat3_float * __restrict mat3_float;
rk_mat3_int10 * __restrict mat3_int10;
inline rk_pack_dst() {}
inline rk_pack_dst(rk_ubyte * const __restrict dst) : ptr(dst) {}
};
typedef void (*rk_packer_fn)(rk_pack_dst const, rk_pack_src const);
struct rk_packer {
rk_packer_fn pack;
unsigned src_incr;
unsigned dst_incr;
};
struct rk_command {
GLuint nvertices;
GLuint ninstances;
GLuint base_index;
GLint base_vertex;
GLuint base_instance;
struct rk_parameter {
unsigned offset;
unsigned size;
rk_packer packer;
};
struct rk_batch {
unsigned size;
unsigned max_size;
unsigned nparams;
unsigned params_size;
unsigned packed_size;
rk_ushort * indices;
rk_command * commands;
rk_packer * packers;
rk_ubyte * params;
GLuint commands_buffer;
GLuint params_buffer;
rk_parameter * params;
rk_ubyte * packed_params;
GLuint indirect_buffer;
GLuint params_array;
};
#endif // _RK_ENGINE_RENDER_OPENGLES_H