Compare commits

...

7 Commits

Author SHA1 Message Date
b46b4bddba Improve typing in render. 2023-01-04 09:33:49 +01:00
ebc6ededf3 Improve mesh struct after switching to mesh indices. 2023-01-04 09:06:13 +01:00
74b6f58794 Fix redondant modification test in batch sorting. 2023-01-03 21:41:03 +01:00
39a95e24c3 Switch to buckets sorting. 2023-01-03 21:31:36 +01:00
3e0ea2560a Improve compare-memcopy. 2023-01-03 20:50:18 +01:00
558ec08614 Buckets sorting. 2023-01-03 16:06:11 +01:00
211762c279 Remove obvious flags from makefile. 2023-01-03 16:05:37 +01:00
8 changed files with 340 additions and 151 deletions

View File

@ -21,7 +21,7 @@ cpp/math.cpp
OUTPUTFILE = engine.so
CXXFLAGS = -Wall -Werror -O2 -march=x86-64 -mtune=generic -msse2 -ffast-math -fpic -flto -fno-rtti -fno-exceptions
CXXFLAGS = -Wall -Werror -O2 -msse2 -ffast-math -fpic -flto -fno-rtti -fno-exceptions
.PHONY: all
all: clean $(OUTPUTFILE)

View File

@ -380,8 +380,9 @@ _create_vertices.argtypes = (
ctypes.c_void_p) # meshes
def create_vertices(format, nvertices, vertices, indices, meshes):
assert len(meshes) % 2 == 0
return _create_vertices(format,
nvertices, _ubyte_addr(vertices), len(indices), _ushort_addr(indices), len(meshes), _uint_addr(meshes))
nvertices, _ubyte_addr(vertices), len(indices), _ushort_addr(indices), len(meshes) // 2, _uint_addr(meshes))
create_batch = _engine.rk_create_batch
create_batch.restype = _handle

99
cpp/cmp_memcpy.hpp Normal file
View File

@ -0,0 +1,99 @@
// Copyright (C) 2023 RozK
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef RK_ENGINE_CMP_MEMCPY_H
#define RK_ENGINE_CMP_MEMCPY_H
#include "types.hpp"
template<typename _small>
bool _rk_cmp_memcpy_small(
_small * __restrict dst,
_small const * __restrict src,
unsigned count) {
_small cmp = 0;
do {
cmp |= *dst ^ *src;
*dst++ = *src++;
} while(--count > 0);
return (cmp != 0);
}
template<typename _big, typename _small>
bool _rk_cmp_memcpy_big(
_small * const __restrict _dst,
_small const * const __restrict _src,
unsigned const _count) {
unsigned const ratio = sizeof(_big) / sizeof(_small);
unsigned big_count = _count / ratio;
unsigned const small_count = _count % ratio;
_big * dst = reinterpret_cast<_big *>(_dst);
_big const * src = reinterpret_cast<_big const *>(_src);
_big cmp = 0;
do {
cmp |= *dst ^ *src;
*dst++ = *src++;
} while(--big_count > 0);
bool modified = (cmp != 0);
if (small_count) {
modified |= _rk_cmp_memcpy_small<_small>(
reinterpret_cast<_small *>(dst), reinterpret_cast<_small const *>(src), small_count);
}
return modified;
}
#ifdef RK_CMP_MEMCPY_UNALIGNED
#define _rk_count_and_alignment(_t) (count >= (sizeof(_t) / sizeof(_small)))
#else
#define _rk_count_and_alignment(_t) ((count >= (sizeof(_t) / sizeof(_small))) && !(alignment % sizeof(_t)))
#endif
template<typename _small>
bool rk_cmp_memcpy(
_small * const __restrict _dst,
_small const * const __restrict _src,
unsigned const count) {
#ifndef RK_CMP_MEMCPY_UNALIGNED
unsigned const alignment = reinterpret_cast<uintptr_t>(_dst) | reinterpret_cast<uintptr_t const>(_src);
#endif
if (sizeof(_small) < sizeof(rk_ullong)) {
if (_rk_count_and_alignment(rk_ullong)) {
return _rk_cmp_memcpy_big<rk_ullong, _small>(_dst, _src, count);
}
}
if (sizeof(_small) < sizeof(rk_ulong)) {
if (_rk_count_and_alignment(rk_ulong)) {
return _rk_cmp_memcpy_big<rk_ulong, _small>(_dst, _src, count);
}
}
if (sizeof(_small) < sizeof(rk_uint)) {
if (_rk_count_and_alignment(rk_uint)) {
return _rk_cmp_memcpy_big<rk_uint, _small>(_dst, _src, count);
}
}
if (sizeof(_small) < sizeof(rk_ushort)) {
if (_rk_count_and_alignment(rk_ushort)) {
return _rk_cmp_memcpy_big<rk_ushort, _small>(_dst, _src, count);
}
}
if (count) {
return _rk_cmp_memcpy_small<_small>(_dst, _src, count);
}
return false;
}
#undef _rk_count_and_alignment
#endif // RK_ENGINE_CMP_MEMCPY_H

View File

@ -26,6 +26,14 @@ typedef glm::vec4 rk_vec4;
typedef glm::mat3 rk_mat3;
typedef glm::mat4 rk_mat4;
#define RK_CHECK_MATH_TYPE(_t, _e, _c) static_assert(sizeof(_t) == sizeof(_e) * (_c))
RK_CHECK_MATH_TYPE(rk_vec2, float, 2);
RK_CHECK_MATH_TYPE(rk_vec3, float, 3);
RK_CHECK_MATH_TYPE(rk_vec4, float, 4);
RK_CHECK_MATH_TYPE(rk_mat3, rk_vec3, 3);
RK_CHECK_MATH_TYPE(rk_mat4, rk_vec4, 4);
#define vec3_right (rk_vec3(1.f, 0.f, 0.f))
#define vec3_forward (rk_vec3(0.f, 1.f, 0.f))
#define vec3_up (rk_vec3(0.f, 0.f, 1.f))

View File

@ -27,14 +27,18 @@ typedef rk_handle_t rk_triangles_t;
typedef rk_handle_t rk_vertices_t;
typedef rk_handle_t rk_batch_t;
enum rk_texture_format : rk_uint {
typedef rk_uint rk_texture_format;
enum : rk_uint {
RK_TEXTURE_FORMAT_SRGB8_A8 = 0,
RK_TEXTURE_FORMAT_RGBA8 = 1,
RK_TEXTURE_FORMAT_RGB10_A2 = 2,
RK_TEXTURE_FORMAT_FLOAT_32 = 3
};
enum rk_texture_flags : rk_uint {
typedef rk_uint rk_texture_flags;
enum : rk_uint {
RK_TEXTURE_FLAG_3D = RK_FLAG(0),
RK_TEXTURE_FLAG_MIPMAPS = RK_FLAG(1),
RK_TEXTURE_FLAG_MIN_NEAREST = 0,
@ -43,41 +47,53 @@ enum rk_texture_flags : rk_uint {
RK_TEXTURE_FLAG_MAG_LINEAR = RK_FLAG(3),
};
enum rk_vertex_format : rk_ubyte {
typedef rk_ubyte rk_vertex_format;
enum : rk_ubyte {
RK_VERTEX_FORMAT_VEC3_FLOAT = 1,
RK_VERTEX_FORMAT_VEC3_INT10 = 2,
RK_VERTEX_FORMAT_VEC3_UINT10 = 3
RK_VERTEX_FORMAT_VEC3_UINT10 = 3,
RK_VERTEX_FORMAT_NORMALIZE = RK_FLAG(7),
RK_VERTEX_FORMAT_MASK = RK_VERTEX_FORMAT_NORMALIZE - 1
};
enum : rk_ubyte { RK_VERTEX_FORMAT_NORMALIZE = RK_FLAG(7) };
enum : rk_ubyte { RK_VERTEX_FORMAT_MASK = RK_VERTEX_FORMAT_NORMALIZE - 1 };
typedef rk_ubyte rk_param_format;
enum rk_param_format : rk_ubyte {
enum : rk_ubyte {
RK_PARAM_FORMAT_VEC3_FLOAT = 1,
RK_PARAM_FORMAT_VEC3_SHORT = 2,
RK_PARAM_FORMAT_VEC3_INT10 = 3,
RK_PARAM_FORMAT_MAT3_FLOAT = 4,
RK_PARAM_FORMAT_MAT3_INT10 = 5
RK_PARAM_FORMAT_MAT3_INT10 = 5,
RK_PARAM_FORMAT_NORMALIZE = RK_FLAG(7),
RK_PARAM_FORMAT_MASK = RK_PARAM_FORMAT_NORMALIZE - 1
};
enum : rk_ubyte { RK_PARAM_FORMAT_NORMALIZE = RK_FLAG(7) };
enum : rk_ubyte { RK_PARAM_FORMAT_MASK = RK_PARAM_FORMAT_NORMALIZE - 1 };
typedef rk_ubyte rk_instance_flags;
enum rk_instance_flags : rk_ubyte {
enum : rk_ubyte {
RK_INSTANCE_FLAG_SPAWNED = RK_FLAG(0),
RK_INSTANCE_FLAG_VISIBLE = RK_FLAG(1)
RK_INSTANCE_FLAG_VISIBLE = RK_FLAG(1),
RK_INSTANCE_FLAGS_SPAWNED_VISIBLE = RK_INSTANCE_FLAG_SPAWNED | RK_INSTANCE_FLAG_VISIBLE
};
enum : rk_ubyte { RK_INSTANCE_FLAGS_SPAWNED_VISIBLE = RK_INSTANCE_FLAG_SPAWNED | RK_INSTANCE_FLAG_VISIBLE };
typedef rk_ushort rk_instance_index;
enum : rk_uint { RK_BATCH_MAX_SIZE = 65536 };
enum : rk_uint {
RK_BATCH_MAX_SIZE = 1 << (sizeof(rk_instance_index) * 8)
};
union rk_mesh {
rk_uint packed;
struct {
rk_ushort base_index;
rk_ushort ntriangles;
};
typedef rk_ushort rk_vertex_index;
typedef rk_ushort rk_mesh_index;
// param input types must be size compatible with an array of rk_param_input
typedef rk_uint rk_param_input;
#define RK_CHECK_PARAM_INPUT_TYPE(_t) static_assert(!(sizeof(_t) % sizeof(rk_param_input)))
struct rk_mesh {
rk_uint base_index;
rk_uint ntriangles;
};
RK_EXPORT void rk_render_initialize(
@ -116,7 +132,7 @@ RK_EXPORT rk_vertices_t rk_create_vertices(
rk_uint nvertices,
rk_ubyte const * vertices,
rk_uint nindices,
rk_ushort const * indices,
rk_vertex_index const * indices,
rk_uint nmeshes,
rk_mesh const * meshes);
@ -129,8 +145,8 @@ RK_EXPORT void rk_fill_batch(
rk_batch_t batch,
rk_uint count,
rk_instance_flags const * flags,
rk_ushort const * meshes,
rk_ubyte const * const * params);
rk_mesh_index const * meshes,
rk_param_input const * const * params);
RK_EXPORT void rk_clear_buffer(
rk_bool pixels,

View File

@ -16,6 +16,8 @@
#include "../render.hpp"
#include "render_opengles.hpp"
#include "../display/display_glx.hpp"
#include "../cmp_memcpy.hpp"
#include <cstdlib>
#include <cstdio>
#include <cstring>
@ -25,6 +27,9 @@ typedef void (*rk_MultiDrawElementsIndirectFunc)(rk_uint, rk_uint, const void *,
static rk_DrawElementsInstancedBaseInstanceFunc rk_DrawElementsInstancedBaseInstance = nullptr;
static rk_MultiDrawElementsIndirectFunc rk_MultiDrawElementsIndirect = nullptr;
static unsigned rk_nbuckets = 0;
static rk_bucket * rk_buckets = nullptr;
static void rk_gl_printf(char const * message) {
printf("[GL] %s\n", message);
}
@ -314,7 +319,7 @@ rk_vertices_t rk_create_vertices(
rk_uint nvertices,
rk_ubyte const * _vertices,
rk_uint nindices,
rk_ushort const * indices,
rk_vertex_index const * indices,
rk_uint nmeshes,
rk_mesh const * meshes) {
if (!format || !nvertices || !_vertices || !nindices || !indices) {
@ -352,8 +357,8 @@ rk_vertices_t rk_create_vertices(
memcpy(vertices->format, format, (format_size + 1) * sizeof(rk_vertex_format));
vertices->vertices = new rk_ubyte[nvertices * vertex_size];
memcpy(vertices->vertices, _vertices, nvertices * vertex_size);
vertices->indices = new rk_ushort[nindices];
memcpy(vertices->indices, indices, nindices * sizeof(rk_ushort));
vertices->indices = new rk_vertex_index[nindices];
memcpy(vertices->indices, indices, nindices * sizeof(rk_vertex_index));
vertices->meshes = new rk_mesh[nmeshes];
memcpy(vertices->meshes, meshes, nmeshes * sizeof(rk_mesh));
vertices->vertices_buffer = 0;
@ -361,28 +366,83 @@ rk_vertices_t rk_create_vertices(
return reinterpret_cast<rk_vertices_t>(vertices);
}
static void rk_buckets_alloc(
rk_batch const & batch) {
unsigned const count = batch.vertices->nmeshes;
unsigned const size = batch.max_size;
bool reallocated = false;
if (!rk_nbuckets) {
rk_nbuckets = count;
rk_buckets = reinterpret_cast<rk_bucket *>(malloc(count * sizeof(rk_bucket)));
for (unsigned index = 0; index < count; ++index) {
rk_bucket & bucket = rk_buckets[index];
bucket.size = size;
bucket.indices = reinterpret_cast<rk_instance_index *>(malloc(size * sizeof(rk_instance_index)));
}
reallocated = true;
}
else if (count <= rk_nbuckets) {
for (unsigned index = 0; index < count; ++index) {
rk_bucket & bucket = rk_buckets[index];
if (bucket.size < size) {
bucket.size = size;
bucket.indices = reinterpret_cast<rk_instance_index *>(
realloc(bucket.indices, size * sizeof(rk_instance_index)));
reallocated = true;
}
}
}
else {
rk_buckets = reinterpret_cast<rk_bucket *>(realloc(rk_buckets, count * sizeof(rk_bucket)));
for (unsigned index = 0; index < rk_nbuckets; ++index) {
rk_bucket & bucket = rk_buckets[index];
if (bucket.size < size) {
bucket.size = size;
bucket.indices = reinterpret_cast<rk_instance_index *>(
realloc(bucket.indices, size * sizeof(rk_instance_index)));
}
}
for (unsigned index = rk_nbuckets; index < count; ++index) {
rk_bucket & bucket = rk_buckets[index];
bucket.size = size;
bucket.indices = reinterpret_cast<rk_instance_index *>(
malloc(size * sizeof(rk_instance_index)));
}
rk_nbuckets = count;
reallocated = true;
}
if (reallocated) {
unsigned total_size = rk_nbuckets * sizeof(rk_bucket);
for (unsigned index = 0; index < rk_nbuckets; ++index) {
rk_bucket const & bucket = rk_buckets[index];
total_size += bucket.size * sizeof(rk_instance_index);
}
printf("[RK] rk_buckets_alloc() -> %d KiB\n", total_size / 1024);
}
}
static void rk_pack_vec3_float(
unsigned const count,
rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_instance_index const * const __restrict indices,
rk_param_output * __restrict _dst,
rk_param_input const * const __restrict _src) {
rk_instance_index const * const last_index = indices + count;
rk_vec3_float * __restrict dst = reinterpret_cast<rk_vec3_float *>(_dst);
rk_vec3_float const * const __restrict src = reinterpret_cast<rk_vec3_float const *>(_src);
for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) {
*dst = src[*index];
}
}
static void rk_pack_vec3_short(
unsigned const count,
rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_instance_index const * const __restrict indices,
rk_param_output * __restrict _dst,
rk_param_input const * const __restrict _src) {
rk_instance_index const * const last_index = indices + count;
rk_vec3_short * __restrict dst = reinterpret_cast<rk_vec3_short *>(_dst);
rk_vec3_float const * const __restrict src = reinterpret_cast<rk_vec3_float const *>(_src);
for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) {
rk_vec3_float const & input = src[*index];
dst->x = static_cast<rk_short>(input.x);
dst->y = static_cast<rk_short>(input.y);
@ -393,14 +453,14 @@ static void rk_pack_vec3_short(
static void rk_pack_vec3_short_norm(
unsigned const count,
rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_instance_index const * const __restrict indices,
rk_param_output * __restrict _dst,
rk_param_input const * const __restrict _src) {
rk_instance_index const * const last_index = indices + count;
rk_vec3_short * __restrict dst = reinterpret_cast<rk_vec3_short *>(_dst);
rk_vec3_float const * const __restrict src = reinterpret_cast<rk_vec3_float const *>(_src);
#define _convert(s) (static_cast<rk_short>((s) * ((s) < 0.f ? 32768.f : 32767.f)))
for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) {
rk_vec3_float const & input = src[*index];
dst->x = _convert(input.x);
dst->y = _convert(input.y);
@ -412,14 +472,14 @@ static void rk_pack_vec3_short_norm(
static void rk_pack_vec3_int10(
unsigned const count,
rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_instance_index const * const __restrict indices,
rk_param_output * __restrict _dst,
rk_param_input const * const __restrict _src) {
rk_instance_index const * const last_index = indices + count;
rk_vec3_int10 * __restrict dst = reinterpret_cast<rk_vec3_int10 *>(_dst);
rk_vec3_float const * const __restrict src = reinterpret_cast<rk_vec3_float const *>(_src);
#define _convert(s) (static_cast<rk_int>((s)) & 1023)
for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) {
rk_vec3_float const & input = src[*index];
*dst = _convert(input.x) | (_convert(input.y) << 10) | (_convert(input.z) << 20);
}
@ -428,14 +488,14 @@ static void rk_pack_vec3_int10(
static void rk_pack_vec3_int10_norm(
unsigned const count,
rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_instance_index const * const __restrict indices,
rk_param_output * __restrict _dst,
rk_param_input const * const __restrict _src) {
rk_instance_index const * const last_index = indices + count;
rk_vec3_int10 * __restrict dst = reinterpret_cast<rk_vec3_int10 *>(_dst);
rk_vec3_float const * const __restrict src = reinterpret_cast<rk_vec3_float const *>(_src);
#define _convert(s) (static_cast<rk_int>((s) * ((s) < 0.f ? 512.f : 511.f)) & 1023)
for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) {
rk_vec3_float const & input = src[*index];
*dst = _convert(input.x) | (_convert(input.y) << 10) | (_convert(input.z) << 20);
}
@ -444,13 +504,13 @@ static void rk_pack_vec3_int10_norm(
static void rk_pack_mat3_float(
unsigned const count,
rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_instance_index const * const __restrict indices,
rk_param_output * __restrict _dst,
rk_param_input const * const __restrict _src) {
rk_instance_index const * const last_index = indices + count;
rk_mat3_float * __restrict dst = reinterpret_cast<rk_mat3_float *>(_dst);
rk_mat3_float const * const __restrict src = reinterpret_cast<rk_mat3_float const *>(_src);
for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) {
*dst = src[*index];
}
#undef _convert
@ -458,14 +518,14 @@ static void rk_pack_mat3_float(
static void rk_pack_mat3_int10(
unsigned const count,
rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_instance_index const * const __restrict indices,
rk_param_output * __restrict _dst,
rk_param_input const * const __restrict _src) {
rk_instance_index const * const last_index = indices + count;
rk_mat3_int10 * __restrict dst = reinterpret_cast<rk_mat3_int10 *>(_dst);
rk_mat3_float const * const __restrict src = reinterpret_cast<rk_mat3_float const *>(_src);
#define _convert(s) (static_cast<rk_int>((s)) & 1023)
for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) {
rk_mat3_float const & input = src[*index];
dst->x = _convert(input.x.x) | (_convert(input.x.y) << 10) | (_convert(input.x.z) << 20);
dst->y = _convert(input.y.x) | (_convert(input.y.y) << 10) | (_convert(input.y.z) << 20);
@ -476,14 +536,14 @@ static void rk_pack_mat3_int10(
static void rk_pack_mat3_int10_norm(
unsigned const count,
rk_ushort const * const __restrict indices,
rk_ubyte * __restrict _dst,
rk_ubyte const * const __restrict _src) {
rk_ushort const * const last_index = indices + count;
rk_instance_index const * const __restrict indices,
rk_param_output * __restrict _dst,
rk_param_input const * const __restrict _src) {
rk_instance_index const * const last_index = indices + count;
rk_mat3_int10 * __restrict dst = reinterpret_cast<rk_mat3_int10 *>(_dst);
rk_mat3_float const * const __restrict src = reinterpret_cast<rk_mat3_float const *>(_src);
#define _convert(s) (static_cast<rk_int>((s) * ((s) < 0.f ? 512.f : 511.f)) & 1023)
for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) {
for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) {
rk_mat3_float const & input = src[*index];
dst->x = _convert(input.x.x) | (_convert(input.x.y) << 10) | (_convert(input.x.z) << 20);
dst->y = _convert(input.y.x) | (_convert(input.y.y) << 10) | (_convert(input.y.z) << 20);
@ -551,9 +611,13 @@ rk_batch_t rk_create_batch(
batch->nparams = nparams;
batch->vertices = vertices;
batch->flags = new rk_instance_flags[max_size];
batch->meshes = new rk_ushort[max_size];
batch->indices = new rk_ushort[max_size];
memset(batch->flags, 0xFF, max_size * sizeof(rk_instance_flags));
batch->meshes = new rk_mesh_index[max_size];
memset(batch->meshes, 0xFF, max_size * sizeof(rk_mesh_index));
batch->indices = new rk_instance_index[max_size];
memset(batch->indices, 0, max_size * sizeof(rk_instance_index));
batch->commands = new rk_command[vertices->nmeshes];
memset(batch->commands, 0, vertices->nmeshes * sizeof(rk_command));
if (nparams) {
batch->params = new rk_parameter[nparams];
} else {
@ -572,7 +636,8 @@ rk_batch_t rk_create_batch(
} else {
glGenBuffers(1, &vertices->indices_buffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vertices->indices_buffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, vertices->nindices * sizeof(rk_ushort), vertices->indices, GL_STATIC_DRAW);
glBufferData(GL_ELEMENT_ARRAY_BUFFER,
vertices->nindices * sizeof(rk_vertex_index), vertices->indices, GL_STATIC_DRAW);
}
if (rk_MultiDrawElementsIndirect) {
glGenBuffers(1, &batch->commands_buffer);
@ -682,52 +747,50 @@ rk_batch_t rk_create_batch(
break;
}
glVertexBindingDivisor(binding, 1);
param->source = new rk_ubyte[max_size * param->src_size];
param->src_len = param->src_size / sizeof(rk_param_input);
param->dst_len = param->dst_size / sizeof(rk_param_output);
param->source = new rk_param_input[max_size * param->src_len];
memset(param->source, 0xFF, max_size * param->src_size);
offset += max_size * param->dst_size;
}
}
glBindVertexArray(0);
rk_buckets_alloc(*batch);
return reinterpret_cast<rk_batch_t>(batch);
}
static void rk_sort_batch(
rk_batch const & batch) {
rk_instance_flags const * flags = batch.flags;
rk_ushort * indices = batch.indices;
for (unsigned index = 0; index < batch.count; ++index, ++flags) {
rk_bucket const * const last_bucket = rk_buckets + batch.vertices->nmeshes;
for (rk_bucket * __restrict bucket = rk_buckets; bucket < last_bucket; ++bucket) {
bucket->count = 0;
}
rk_instance_flags const * __restrict flags = batch.flags;
rk_mesh_index const * __restrict mesh_index = batch.meshes;
for (unsigned index = 0; index < batch.count; ++index, ++flags, ++mesh_index) {
if ((*flags & RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) == RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) {
*indices++ = index;
rk_bucket & __restrict bucket = rk_buckets[*mesh_index];
bucket.indices[bucket.count++] = index;
}
}
rk_instance_index * __restrict indices = batch.indices;
rk_command * __restrict command = batch.commands;
rk_mesh const * __restrict mesh = batch.vertices->meshes;
for (rk_bucket const * __restrict bucket = rk_buckets; bucket < last_bucket; ++bucket, ++mesh) {
if (bucket->count) {
memcpy(indices, bucket->indices, bucket->count * sizeof(rk_instance_index));
command->nvertices = mesh->ntriangles * 3;
command->ninstances = bucket->count;
command->base_index = mesh->base_index;
command->base_instance = indices - batch.indices;
indices += bucket->count;
++command;
}
}
batch.ninstances = indices - batch.indices;
batch.ncommands = 0;
if (batch.ninstances) {
rk_command * const last_command = batch.commands + batch.vertices->nmeshes;
rk_command * command = batch.commands;
rk_ushort * base = batch.indices;
rk_ushort * const last = batch.indices + batch.ninstances;
for (rk_ushort * first = batch.indices; first < last && command < last_command; base = first, ++command) {
unsigned const mesh_index = batch.meshes[*first++];
for ( ; first < last && mesh_index == batch.meshes[*first]; ++first) {
}
for (rk_ushort * second = first; second < last; ++second) {
unsigned const index = *second;
if (mesh_index == batch.meshes[index]) {
*second = *first;
*first++ = index;
}
}
rk_mesh const & mesh = batch.vertices->meshes[mesh_index];
command->nvertices = static_cast<GLuint>(mesh.ntriangles) * 3;
command->ninstances = first - base;
command->base_index = mesh.base_index;
command->base_vertex = 0;
command->base_instance = base - batch.indices;
}
batch.ncommands = command - batch.commands;
if (rk_MultiDrawElementsIndirect && batch.ncommands) {
glBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, batch.ncommands * sizeof(rk_command), batch.commands);
}
batch.ncommands = command - batch.commands;
if (rk_MultiDrawElementsIndirect) {
glBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, batch.ncommands * sizeof(rk_command), batch.commands);
}
batch.state = RK_BATCH_STATE_SORTED;
}
@ -740,7 +803,7 @@ static void rk_pack_batch(
if (param->dirty) {
param->dirty = false;
if (batch.ninstances) {
rk_ubyte * const dst = reinterpret_cast<rk_ubyte *>(
rk_param_output * const dst = reinterpret_cast<rk_param_output *>(
glMapBufferRange(GL_ARRAY_BUFFER, param->offset, batch.ninstances * param->dst_size,
GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));
if (dst) {
@ -755,38 +818,12 @@ static void rk_pack_batch(
batch.state = RK_BATCH_STATE_PACKED;
}
static bool rk_compare_replace(
void * __restrict _dst,
void const * __restrict _src,
unsigned const size) {
rk_ulong hash = 0;
rk_ulong * dst = reinterpret_cast<rk_ulong *>(_dst);
rk_ulong const * src = reinterpret_cast<rk_ulong const *>(_src);
unsigned count = size / sizeof(rk_ulong);
unsigned remain = (size - count * sizeof(rk_ulong));
if (count) {
do {
hash |= *dst ^ *src;
*dst++ = *src++;
} while(--count > 0);
}
rk_ubyte * rdst = reinterpret_cast<rk_ubyte *>(dst);
rk_ubyte const * rsrc = reinterpret_cast<rk_ubyte const *>(src);
if (remain) {
do {
hash |= *rdst ^ *rsrc;
*rdst++ = *rsrc++;
} while(--remain > 0);
}
return (hash != 0);
}
void rk_fill_batch(
rk_batch_t _batch,
rk_uint count,
rk_instance_flags const * flags,
rk_ushort const * meshes,
rk_ubyte const * const * params) {
rk_mesh_index const * meshes,
rk_param_input const * const * params) {
rk_batch const * const batch = reinterpret_cast<rk_batch const *>(_batch);
if (!batch || !count || count > batch->max_size) {
rk_printf("rk_fill_batch(): invalid params.");
@ -797,7 +834,7 @@ void rk_fill_batch(
if (batch->nparams) {
got_all_params = (params != nullptr);
if (params) {
for (rk_ubyte const * const * param = params; param < params + batch->nparams; ++param) {
for (rk_param_input const * const * param = params; param < params + batch->nparams; ++param) {
bool const got_param = (*param != nullptr);
got_any_params |= got_param;
got_all_params &= got_param;
@ -815,22 +852,20 @@ void rk_fill_batch(
return;
}
batch->count = count;
bool const cmp_flags =
(flags && rk_compare_replace(batch->flags, flags, batch->count * sizeof(rk_instance_flags)));
bool const cmp_meshes =
(meshes && rk_compare_replace(batch->meshes, meshes, batch->count * sizeof(rk_mesh)));
bool const cmp_flags = (flags && rk_cmp_memcpy(batch->flags, flags, batch->count));
bool const cmp_meshes = (meshes && rk_cmp_memcpy(batch->meshes, meshes, batch->count));
bool const need_sorting = (cmp_flags || cmp_meshes || resized);
if (batch->nparams) {
rk_parameter const * const last_param = batch->params + batch->nparams;
if (got_any_params) {
rk_ubyte const * const * src = params;
for (rk_parameter const * dst = batch->params; dst < last_param; ++dst, ++src) {
dst->dirty =
(*src && rk_compare_replace(dst->source, *src, batch->count * dst->src_size)) || need_sorting;
rk_param_input const * const * src = params;
for (rk_parameter const * param = batch->params; param < last_param; ++param, ++src) {
param->dirty =
((*src && rk_cmp_memcpy(param->source, *src, batch->count * param->src_len)) || need_sorting);
}
} else if (need_sorting) {
for (rk_parameter const * dst = batch->params; dst < last_param; ++dst) {
dst->dirty = true;
for (rk_parameter const * param = batch->params; param < last_param; ++param) {
param->dirty = true;
}
}
}

View File

@ -17,6 +17,7 @@
#define _RK_ENGINE_RENDER_OPENGLES_H
#include "../types.hpp"
#include "../math.hpp"
#include <GLES3/gl32.h>
#include <GLES3/gl3ext.h>
#include <GLES3/gl3platform.h>
@ -44,7 +45,7 @@ struct rk_vertices {
unsigned nmeshes;
rk_vertex_format * format;
rk_ubyte * vertices;
rk_ushort * indices;
rk_vertex_index * indices;
rk_mesh * meshes;
GLuint vertices_buffer;
GLuint indices_buffer;
@ -58,12 +59,19 @@ struct rk_command {
GLuint base_instance;
};
// param output types must be size compatible with an array of rk_param_output
typedef rk_uint rk_param_output;
#define RK_CHECK_PARAM_OUTPUT_TYPE(_t) static_assert(!(sizeof(_t) % sizeof(rk_param_output)))
struct rk_vec3_float {
float x;
float y;
float z;
};
static_assert(sizeof(rk_vec3_float) == sizeof(rk_vec3));
struct rk_vec3_short {
rk_short x;
rk_short y;
@ -80,17 +88,29 @@ struct rk_mat3_float {
rk_vec3_float z;
};
static_assert(sizeof(rk_mat3_float) == sizeof(rk_mat3));
struct rk_mat3_int10 {
rk_vec3_int10 x;
rk_vec3_int10 y;
rk_vec3_int10 z;
};
RK_CHECK_PARAM_INPUT_TYPE(rk_vec3_float);
RK_CHECK_PARAM_INPUT_TYPE(rk_mat3_float);
RK_CHECK_PARAM_OUTPUT_TYPE(rk_vec3_float);
RK_CHECK_PARAM_OUTPUT_TYPE(rk_vec3_short);
RK_CHECK_PARAM_OUTPUT_TYPE(rk_vec3_int10);
RK_CHECK_PARAM_OUTPUT_TYPE(rk_vec3_uint10);
RK_CHECK_PARAM_OUTPUT_TYPE(rk_mat3_float);
RK_CHECK_PARAM_OUTPUT_TYPE(rk_mat3_int10);
typedef void (*rk_packer)(
unsigned const, // count
rk_ushort const * const, // indices
rk_ubyte *, // dst
rk_ubyte const * const); // src
rk_instance_index const * const, // indices
rk_param_output *, // dst
rk_param_input const * const); // src
struct rk_parameter {
mutable bool dirty;
@ -98,10 +118,18 @@ struct rk_parameter {
unsigned offset;
unsigned src_size;
unsigned dst_size;
rk_ubyte * source;
unsigned src_len;
unsigned dst_len;
rk_param_input * source;
rk_packer packer;
};
struct rk_bucket {
unsigned size;
unsigned count;
rk_instance_index * indices;
};
enum rk_batch_state {
RK_BATCH_STATE_EMPTY = 0,
RK_BATCH_STATE_FILLED = 1,
@ -118,8 +146,8 @@ struct rk_batch {
unsigned nparams;
rk_vertices const * vertices;
rk_instance_flags * flags;
rk_ushort * meshes;
rk_ushort * indices;
rk_mesh_index * meshes;
rk_instance_index * indices;
rk_command * commands;
rk_parameter * params;
GLuint vertex_array;

View File

@ -33,6 +33,8 @@ typedef int32_t rk_int;
typedef uint32_t rk_uint;
typedef int64_t rk_long;
typedef uint64_t rk_ulong;
typedef __int128 rk_llong;
typedef unsigned __int128 rk_ullong;
typedef float rk_float;
typedef void * rk_handle_t;