diff --git a/cpp/cmp_memcpy.hpp b/cpp/cmp_memcpy.hpp index 955508b..6ec6aa4 100644 --- a/cpp/cmp_memcpy.hpp +++ b/cpp/cmp_memcpy.hpp @@ -18,15 +18,12 @@ #include "types.hpp" -template +template bool _rk_cmp_memcpy_small( - void * const __restrict _dst, - void const * const __restrict _src, - unsigned const size) { - unsigned count = (size / sizeof(_type)); - _type * dst = reinterpret_cast<_type *>(_dst); - _type const * src = reinterpret_cast<_type const *>(_src); - _type cmp = 0; + _small * __restrict dst, + _small const * __restrict src, + unsigned count) { + _small cmp = 0; do { cmp |= *dst ^ *src; *dst++ = *src++; @@ -36,134 +33,67 @@ bool _rk_cmp_memcpy_small( template bool _rk_cmp_memcpy_big( - void * const __restrict _dst, - void const * const __restrict _src, - unsigned const size) { - unsigned count = size / sizeof(_big); - unsigned const remain = size % sizeof(_big); + _small * const __restrict _dst, + _small const * const __restrict _src, + unsigned const _count) { + unsigned const ratio = sizeof(_big) / sizeof(_small); + unsigned big_count = _count / ratio; + unsigned const small_count = _count % ratio; _big * dst = reinterpret_cast<_big *>(_dst); _big const * src = reinterpret_cast<_big const *>(_src); _big cmp = 0; do { cmp |= *dst ^ *src; *dst++ = *src++; - } while(--count > 0); + } while(--big_count > 0); bool modified = (cmp != 0); - if (remain) { - modified |= _rk_cmp_memcpy_small<_small>(dst, src, remain); + if (small_count) { + modified |= _rk_cmp_memcpy_small<_small>( + reinterpret_cast<_small *>(dst), reinterpret_cast<_small const *>(src), small_count); } return modified; } #ifdef RK_CMP_MEMCPY_UNALIGNED -#define _rk_size_and_alignment(_t) (size >= sizeof(_t)) +#define _rk_count_and_alignment(_t) (count >= (sizeof(_t) / sizeof(_small))) #else -#define _rk_size_and_alignment(_t) (size >= sizeof(_t) && !(alignment & (sizeof(_t) - 1))) +#define _rk_count_and_alignment(_t) ((count >= (sizeof(_t) / sizeof(_small))) && !(alignment % sizeof(_t))) #endif -template +template bool rk_cmp_memcpy( - void * const __restrict _dst, - void const * const __restrict _src, - unsigned const size); - -template<> -bool rk_cmp_memcpy( - void * const __restrict _dst, - void const * const __restrict _src, - unsigned const size) { + _small * const __restrict _dst, + _small const * const __restrict _src, + unsigned const count) { #ifndef RK_CMP_MEMCPY_UNALIGNED unsigned const alignment = reinterpret_cast(_dst) | reinterpret_cast(_src); #endif - if (_rk_size_and_alignment(rk_ullong)) { - return _rk_cmp_memcpy_big(_dst, _src, size); + if (sizeof(_small) < sizeof(rk_ullong)) { + if (_rk_count_and_alignment(rk_ullong)) { + return _rk_cmp_memcpy_big(_dst, _src, count); + } } - if (_rk_size_and_alignment(rk_ulong)) { - return _rk_cmp_memcpy_big(_dst, _src, size); + if (sizeof(_small) < sizeof(rk_ulong)) { + if (_rk_count_and_alignment(rk_ulong)) { + return _rk_cmp_memcpy_big(_dst, _src, count); + } } - if (_rk_size_and_alignment(rk_uint)) { - return _rk_cmp_memcpy_big(_dst, _src, size); + if (sizeof(_small) < sizeof(rk_uint)) { + if (_rk_count_and_alignment(rk_uint)) { + return _rk_cmp_memcpy_big(_dst, _src, count); + } } - if (_rk_size_and_alignment(rk_ushort)) { - return _rk_cmp_memcpy_big(_dst, _src, size); + if (sizeof(_small) < sizeof(rk_ushort)) { + if (_rk_count_and_alignment(rk_ushort)) { + return _rk_cmp_memcpy_big(_dst, _src, count); + } } - if (size > 0) { - return _rk_cmp_memcpy_small(_dst, _src, size); + if (count) { + return _rk_cmp_memcpy_small<_small>(_dst, _src, count); } return false; } -template<> -bool rk_cmp_memcpy( - void * const __restrict _dst, - void const * const __restrict _src, - unsigned const size) { -#ifndef RK_CMP_MEMCPY_UNALIGNED - unsigned const alignment = reinterpret_cast(_dst) | reinterpret_cast(_src); -#endif - if (_rk_size_and_alignment(rk_ullong)) { - return _rk_cmp_memcpy_big(_dst, _src, size); - } - if (_rk_size_and_alignment(rk_ulong)) { - return _rk_cmp_memcpy_big(_dst, _src, size); - } - if (_rk_size_and_alignment(rk_uint)) { - return _rk_cmp_memcpy_big(_dst, _src, size); - } - if (size > 0) { - return _rk_cmp_memcpy_small(_dst, _src, size); - } - return false; -} - -template<> -bool rk_cmp_memcpy( - void * const __restrict _dst, - void const * const __restrict _src, - unsigned const size) { -#ifndef RK_CMP_MEMCPY_UNALIGNED - unsigned const alignment = reinterpret_cast(_dst) | reinterpret_cast(_src); -#endif - if (_rk_size_and_alignment(rk_ullong)) { - return _rk_cmp_memcpy_big(_dst, _src, size); - } - if (_rk_size_and_alignment(rk_ulong)) { - return _rk_cmp_memcpy_big(_dst, _src, size); - } - if (size > 0) { - return _rk_cmp_memcpy_small(_dst, _src, size); - } - return false; -} - -template<> -bool rk_cmp_memcpy( - void * const __restrict _dst, - void const * const __restrict _src, - unsigned const size) { -#ifndef RK_CMP_MEMCPY_UNALIGNED - unsigned const alignment = reinterpret_cast(_dst) | reinterpret_cast(_src); -#endif - if (_rk_size_and_alignment(rk_ullong)) { - return _rk_cmp_memcpy_big(_dst, _src, size); - } - if (size > 0) { - return _rk_cmp_memcpy_small(_dst, _src, size); - } - return false; -} - -template<> -bool rk_cmp_memcpy( - void * const __restrict _dst, - void const * const __restrict _src, - unsigned const size) { - if (size > 0) { - return _rk_cmp_memcpy_small(_dst, _src, size); - } - return false; -} - -#undef _rk_size_and_alignment +#undef _rk_count_and_alignment #endif // RK_ENGINE_CMP_MEMCPY_H diff --git a/cpp/math.hpp b/cpp/math.hpp index 0d240aa..9432c26 100644 --- a/cpp/math.hpp +++ b/cpp/math.hpp @@ -26,6 +26,14 @@ typedef glm::vec4 rk_vec4; typedef glm::mat3 rk_mat3; typedef glm::mat4 rk_mat4; +#define RK_CHECK_MATH_TYPE(_t, _e, _c) static_assert(sizeof(_t) == sizeof(_e) * (_c)) + +RK_CHECK_MATH_TYPE(rk_vec2, float, 2); +RK_CHECK_MATH_TYPE(rk_vec3, float, 3); +RK_CHECK_MATH_TYPE(rk_vec4, float, 4); +RK_CHECK_MATH_TYPE(rk_mat3, rk_vec3, 3); +RK_CHECK_MATH_TYPE(rk_mat4, rk_vec4, 4); + #define vec3_right (rk_vec3(1.f, 0.f, 0.f)) #define vec3_forward (rk_vec3(0.f, 1.f, 0.f)) #define vec3_up (rk_vec3(0.f, 0.f, 1.f)) diff --git a/cpp/render.hpp b/cpp/render.hpp index 4629b98..0fb5f17 100644 --- a/cpp/render.hpp +++ b/cpp/render.hpp @@ -27,14 +27,18 @@ typedef rk_handle_t rk_triangles_t; typedef rk_handle_t rk_vertices_t; typedef rk_handle_t rk_batch_t; -enum rk_texture_format : rk_uint { +typedef rk_uint rk_texture_format; + +enum : rk_uint { RK_TEXTURE_FORMAT_SRGB8_A8 = 0, RK_TEXTURE_FORMAT_RGBA8 = 1, RK_TEXTURE_FORMAT_RGB10_A2 = 2, RK_TEXTURE_FORMAT_FLOAT_32 = 3 }; -enum rk_texture_flags : rk_uint { +typedef rk_uint rk_texture_flags; + +enum : rk_uint { RK_TEXTURE_FLAG_3D = RK_FLAG(0), RK_TEXTURE_FLAG_MIPMAPS = RK_FLAG(1), RK_TEXTURE_FLAG_MIN_NEAREST = 0, @@ -43,34 +47,49 @@ enum rk_texture_flags : rk_uint { RK_TEXTURE_FLAG_MAG_LINEAR = RK_FLAG(3), }; -enum rk_vertex_format : rk_ubyte { +typedef rk_ubyte rk_vertex_format; + +enum : rk_ubyte { RK_VERTEX_FORMAT_VEC3_FLOAT = 1, RK_VERTEX_FORMAT_VEC3_INT10 = 2, - RK_VERTEX_FORMAT_VEC3_UINT10 = 3 + RK_VERTEX_FORMAT_VEC3_UINT10 = 3, + RK_VERTEX_FORMAT_NORMALIZE = RK_FLAG(7), + RK_VERTEX_FORMAT_MASK = RK_VERTEX_FORMAT_NORMALIZE - 1 }; -enum : rk_ubyte { RK_VERTEX_FORMAT_NORMALIZE = RK_FLAG(7) }; -enum : rk_ubyte { RK_VERTEX_FORMAT_MASK = RK_VERTEX_FORMAT_NORMALIZE - 1 }; +typedef rk_ubyte rk_param_format; -enum rk_param_format : rk_ubyte { +enum : rk_ubyte { RK_PARAM_FORMAT_VEC3_FLOAT = 1, RK_PARAM_FORMAT_VEC3_SHORT = 2, RK_PARAM_FORMAT_VEC3_INT10 = 3, RK_PARAM_FORMAT_MAT3_FLOAT = 4, - RK_PARAM_FORMAT_MAT3_INT10 = 5 + RK_PARAM_FORMAT_MAT3_INT10 = 5, + RK_PARAM_FORMAT_NORMALIZE = RK_FLAG(7), + RK_PARAM_FORMAT_MASK = RK_PARAM_FORMAT_NORMALIZE - 1 }; -enum : rk_ubyte { RK_PARAM_FORMAT_NORMALIZE = RK_FLAG(7) }; -enum : rk_ubyte { RK_PARAM_FORMAT_MASK = RK_PARAM_FORMAT_NORMALIZE - 1 }; +typedef rk_ubyte rk_instance_flags; -enum rk_instance_flags : rk_ubyte { +enum : rk_ubyte { RK_INSTANCE_FLAG_SPAWNED = RK_FLAG(0), - RK_INSTANCE_FLAG_VISIBLE = RK_FLAG(1) + RK_INSTANCE_FLAG_VISIBLE = RK_FLAG(1), + RK_INSTANCE_FLAGS_SPAWNED_VISIBLE = RK_INSTANCE_FLAG_SPAWNED | RK_INSTANCE_FLAG_VISIBLE }; -enum : rk_ubyte { RK_INSTANCE_FLAGS_SPAWNED_VISIBLE = RK_INSTANCE_FLAG_SPAWNED | RK_INSTANCE_FLAG_VISIBLE }; +typedef rk_ushort rk_instance_index; -enum : rk_uint { RK_BATCH_MAX_SIZE = 65536 }; +enum : rk_uint { + RK_BATCH_MAX_SIZE = 1 << (sizeof(rk_instance_index) * 8) +}; + +typedef rk_ushort rk_vertex_index; +typedef rk_ushort rk_mesh_index; + +// param input types must be size compatible with an array of rk_param_input +typedef rk_uint rk_param_input; + +#define RK_CHECK_PARAM_INPUT_TYPE(_t) static_assert(!(sizeof(_t) % sizeof(rk_param_input))) struct rk_mesh { rk_uint base_index; @@ -113,7 +132,7 @@ RK_EXPORT rk_vertices_t rk_create_vertices( rk_uint nvertices, rk_ubyte const * vertices, rk_uint nindices, - rk_ushort const * indices, + rk_vertex_index const * indices, rk_uint nmeshes, rk_mesh const * meshes); @@ -126,8 +145,8 @@ RK_EXPORT void rk_fill_batch( rk_batch_t batch, rk_uint count, rk_instance_flags const * flags, - rk_ushort const * meshes, - rk_ubyte const * const * params); + rk_mesh_index const * meshes, + rk_param_input const * const * params); RK_EXPORT void rk_clear_buffer( rk_bool pixels, diff --git a/cpp/render/render_opengles.cpp b/cpp/render/render_opengles.cpp index 3399222..0edfeb2 100644 --- a/cpp/render/render_opengles.cpp +++ b/cpp/render/render_opengles.cpp @@ -27,12 +27,6 @@ typedef void (*rk_MultiDrawElementsIndirectFunc)(rk_uint, rk_uint, const void *, static rk_DrawElementsInstancedBaseInstanceFunc rk_DrawElementsInstancedBaseInstance = nullptr; static rk_MultiDrawElementsIndirectFunc rk_MultiDrawElementsIndirect = nullptr; -struct rk_bucket { - unsigned size; - unsigned count; - rk_ushort * indices; -}; - static unsigned rk_nbuckets = 0; static rk_bucket * rk_buckets = nullptr; @@ -325,7 +319,7 @@ rk_vertices_t rk_create_vertices( rk_uint nvertices, rk_ubyte const * _vertices, rk_uint nindices, - rk_ushort const * indices, + rk_vertex_index const * indices, rk_uint nmeshes, rk_mesh const * meshes) { if (!format || !nvertices || !_vertices || !nindices || !indices) { @@ -363,8 +357,8 @@ rk_vertices_t rk_create_vertices( memcpy(vertices->format, format, (format_size + 1) * sizeof(rk_vertex_format)); vertices->vertices = new rk_ubyte[nvertices * vertex_size]; memcpy(vertices->vertices, _vertices, nvertices * vertex_size); - vertices->indices = new rk_ushort[nindices]; - memcpy(vertices->indices, indices, nindices * sizeof(rk_ushort)); + vertices->indices = new rk_vertex_index[nindices]; + memcpy(vertices->indices, indices, nindices * sizeof(rk_vertex_index)); vertices->meshes = new rk_mesh[nmeshes]; memcpy(vertices->meshes, meshes, nmeshes * sizeof(rk_mesh)); vertices->vertices_buffer = 0; @@ -383,7 +377,7 @@ static void rk_buckets_alloc( for (unsigned index = 0; index < count; ++index) { rk_bucket & bucket = rk_buckets[index]; bucket.size = size; - bucket.indices = reinterpret_cast(malloc(size * sizeof(rk_ushort))); + bucket.indices = reinterpret_cast(malloc(size * sizeof(rk_instance_index))); } reallocated = true; } @@ -392,7 +386,8 @@ static void rk_buckets_alloc( rk_bucket & bucket = rk_buckets[index]; if (bucket.size < size) { bucket.size = size; - bucket.indices = reinterpret_cast(realloc(bucket.indices, size * sizeof(rk_ushort))); + bucket.indices = reinterpret_cast( + realloc(bucket.indices, size * sizeof(rk_instance_index))); reallocated = true; } } @@ -403,13 +398,15 @@ static void rk_buckets_alloc( rk_bucket & bucket = rk_buckets[index]; if (bucket.size < size) { bucket.size = size; - bucket.indices = reinterpret_cast(realloc(bucket.indices, size * sizeof(rk_ushort))); + bucket.indices = reinterpret_cast( + realloc(bucket.indices, size * sizeof(rk_instance_index))); } } for (unsigned index = rk_nbuckets; index < count; ++index) { rk_bucket & bucket = rk_buckets[index]; bucket.size = size; - bucket.indices = reinterpret_cast(malloc(size * sizeof(rk_ushort))); + bucket.indices = reinterpret_cast( + malloc(size * sizeof(rk_instance_index))); } rk_nbuckets = count; reallocated = true; @@ -418,7 +415,7 @@ static void rk_buckets_alloc( unsigned total_size = rk_nbuckets * sizeof(rk_bucket); for (unsigned index = 0; index < rk_nbuckets; ++index) { rk_bucket const & bucket = rk_buckets[index]; - total_size += bucket.size * sizeof(rk_ushort); + total_size += bucket.size * sizeof(rk_instance_index); } printf("[RK] rk_buckets_alloc() -> %d KiB\n", total_size / 1024); } @@ -426,26 +423,26 @@ static void rk_buckets_alloc( static void rk_pack_vec3_float( unsigned const count, - rk_ushort const * const __restrict indices, - rk_ubyte * __restrict _dst, - rk_ubyte const * const __restrict _src) { - rk_ushort const * const last_index = indices + count; + rk_instance_index const * const __restrict indices, + rk_param_output * __restrict _dst, + rk_param_input const * const __restrict _src) { + rk_instance_index const * const last_index = indices + count; rk_vec3_float * __restrict dst = reinterpret_cast(_dst); rk_vec3_float const * const __restrict src = reinterpret_cast(_src); - for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { + for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) { *dst = src[*index]; } } static void rk_pack_vec3_short( unsigned const count, - rk_ushort const * const __restrict indices, - rk_ubyte * __restrict _dst, - rk_ubyte const * const __restrict _src) { - rk_ushort const * const last_index = indices + count; + rk_instance_index const * const __restrict indices, + rk_param_output * __restrict _dst, + rk_param_input const * const __restrict _src) { + rk_instance_index const * const last_index = indices + count; rk_vec3_short * __restrict dst = reinterpret_cast(_dst); rk_vec3_float const * const __restrict src = reinterpret_cast(_src); - for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { + for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) { rk_vec3_float const & input = src[*index]; dst->x = static_cast(input.x); dst->y = static_cast(input.y); @@ -456,14 +453,14 @@ static void rk_pack_vec3_short( static void rk_pack_vec3_short_norm( unsigned const count, - rk_ushort const * const __restrict indices, - rk_ubyte * __restrict _dst, - rk_ubyte const * const __restrict _src) { - rk_ushort const * const last_index = indices + count; + rk_instance_index const * const __restrict indices, + rk_param_output * __restrict _dst, + rk_param_input const * const __restrict _src) { + rk_instance_index const * const last_index = indices + count; rk_vec3_short * __restrict dst = reinterpret_cast(_dst); rk_vec3_float const * const __restrict src = reinterpret_cast(_src); #define _convert(s) (static_cast((s) * ((s) < 0.f ? 32768.f : 32767.f))) - for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { + for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) { rk_vec3_float const & input = src[*index]; dst->x = _convert(input.x); dst->y = _convert(input.y); @@ -475,14 +472,14 @@ static void rk_pack_vec3_short_norm( static void rk_pack_vec3_int10( unsigned const count, - rk_ushort const * const __restrict indices, - rk_ubyte * __restrict _dst, - rk_ubyte const * const __restrict _src) { - rk_ushort const * const last_index = indices + count; + rk_instance_index const * const __restrict indices, + rk_param_output * __restrict _dst, + rk_param_input const * const __restrict _src) { + rk_instance_index const * const last_index = indices + count; rk_vec3_int10 * __restrict dst = reinterpret_cast(_dst); rk_vec3_float const * const __restrict src = reinterpret_cast(_src); #define _convert(s) (static_cast((s)) & 1023) - for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { + for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) { rk_vec3_float const & input = src[*index]; *dst = _convert(input.x) | (_convert(input.y) << 10) | (_convert(input.z) << 20); } @@ -491,14 +488,14 @@ static void rk_pack_vec3_int10( static void rk_pack_vec3_int10_norm( unsigned const count, - rk_ushort const * const __restrict indices, - rk_ubyte * __restrict _dst, - rk_ubyte const * const __restrict _src) { - rk_ushort const * const last_index = indices + count; + rk_instance_index const * const __restrict indices, + rk_param_output * __restrict _dst, + rk_param_input const * const __restrict _src) { + rk_instance_index const * const last_index = indices + count; rk_vec3_int10 * __restrict dst = reinterpret_cast(_dst); rk_vec3_float const * const __restrict src = reinterpret_cast(_src); #define _convert(s) (static_cast((s) * ((s) < 0.f ? 512.f : 511.f)) & 1023) - for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { + for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) { rk_vec3_float const & input = src[*index]; *dst = _convert(input.x) | (_convert(input.y) << 10) | (_convert(input.z) << 20); } @@ -507,13 +504,13 @@ static void rk_pack_vec3_int10_norm( static void rk_pack_mat3_float( unsigned const count, - rk_ushort const * const __restrict indices, - rk_ubyte * __restrict _dst, - rk_ubyte const * const __restrict _src) { - rk_ushort const * const last_index = indices + count; + rk_instance_index const * const __restrict indices, + rk_param_output * __restrict _dst, + rk_param_input const * const __restrict _src) { + rk_instance_index const * const last_index = indices + count; rk_mat3_float * __restrict dst = reinterpret_cast(_dst); rk_mat3_float const * const __restrict src = reinterpret_cast(_src); - for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { + for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) { *dst = src[*index]; } #undef _convert @@ -521,14 +518,14 @@ static void rk_pack_mat3_float( static void rk_pack_mat3_int10( unsigned const count, - rk_ushort const * const __restrict indices, - rk_ubyte * __restrict _dst, - rk_ubyte const * const __restrict _src) { - rk_ushort const * const last_index = indices + count; + rk_instance_index const * const __restrict indices, + rk_param_output * __restrict _dst, + rk_param_input const * const __restrict _src) { + rk_instance_index const * const last_index = indices + count; rk_mat3_int10 * __restrict dst = reinterpret_cast(_dst); rk_mat3_float const * const __restrict src = reinterpret_cast(_src); #define _convert(s) (static_cast((s)) & 1023) - for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { + for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) { rk_mat3_float const & input = src[*index]; dst->x = _convert(input.x.x) | (_convert(input.x.y) << 10) | (_convert(input.x.z) << 20); dst->y = _convert(input.y.x) | (_convert(input.y.y) << 10) | (_convert(input.y.z) << 20); @@ -539,14 +536,14 @@ static void rk_pack_mat3_int10( static void rk_pack_mat3_int10_norm( unsigned const count, - rk_ushort const * const __restrict indices, - rk_ubyte * __restrict _dst, - rk_ubyte const * const __restrict _src) { - rk_ushort const * const last_index = indices + count; + rk_instance_index const * const __restrict indices, + rk_param_output * __restrict _dst, + rk_param_input const * const __restrict _src) { + rk_instance_index const * const last_index = indices + count; rk_mat3_int10 * __restrict dst = reinterpret_cast(_dst); rk_mat3_float const * const __restrict src = reinterpret_cast(_src); #define _convert(s) (static_cast((s) * ((s) < 0.f ? 512.f : 511.f)) & 1023) - for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { + for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) { rk_mat3_float const & input = src[*index]; dst->x = _convert(input.x.x) | (_convert(input.x.y) << 10) | (_convert(input.x.z) << 20); dst->y = _convert(input.y.x) | (_convert(input.y.y) << 10) | (_convert(input.y.z) << 20); @@ -614,9 +611,11 @@ rk_batch_t rk_create_batch( batch->nparams = nparams; batch->vertices = vertices; batch->flags = new rk_instance_flags[max_size]; - batch->meshes = new rk_ushort[max_size]; - batch->indices = new rk_ushort[max_size]; - memset(batch->indices, 0xFF, max_size * sizeof(rk_ushort)); + memset(batch->flags, 0xFF, max_size * sizeof(rk_instance_flags)); + batch->meshes = new rk_mesh_index[max_size]; + memset(batch->meshes, 0xFF, max_size * sizeof(rk_mesh_index)); + batch->indices = new rk_instance_index[max_size]; + memset(batch->indices, 0, max_size * sizeof(rk_instance_index)); batch->commands = new rk_command[vertices->nmeshes]; memset(batch->commands, 0, vertices->nmeshes * sizeof(rk_command)); if (nparams) { @@ -637,7 +636,8 @@ rk_batch_t rk_create_batch( } else { glGenBuffers(1, &vertices->indices_buffer); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vertices->indices_buffer); - glBufferData(GL_ELEMENT_ARRAY_BUFFER, vertices->nindices * sizeof(rk_ushort), vertices->indices, GL_STATIC_DRAW); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, + vertices->nindices * sizeof(rk_vertex_index), vertices->indices, GL_STATIC_DRAW); } if (rk_MultiDrawElementsIndirect) { glGenBuffers(1, &batch->commands_buffer); @@ -747,7 +747,10 @@ rk_batch_t rk_create_batch( break; } glVertexBindingDivisor(binding, 1); - param->source = new rk_ubyte[max_size * param->src_size]; + param->src_len = param->src_size / sizeof(rk_param_input); + param->dst_len = param->dst_size / sizeof(rk_param_output); + param->source = new rk_param_input[max_size * param->src_len]; + memset(param->source, 0xFF, max_size * param->src_size); offset += max_size * param->dst_size; } } @@ -763,20 +766,20 @@ static void rk_sort_batch( bucket->count = 0; } rk_instance_flags const * __restrict flags = batch.flags; - rk_ushort const * __restrict mesh_index = batch.meshes; + rk_mesh_index const * __restrict mesh_index = batch.meshes; for (unsigned index = 0; index < batch.count; ++index, ++flags, ++mesh_index) { if ((*flags & RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) == RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) { rk_bucket & __restrict bucket = rk_buckets[*mesh_index]; bucket.indices[bucket.count++] = index; } } - rk_ushort * __restrict indices = batch.indices; + rk_instance_index * __restrict indices = batch.indices; rk_command * __restrict command = batch.commands; rk_mesh const * __restrict mesh = batch.vertices->meshes; for (rk_bucket const * __restrict bucket = rk_buckets; bucket < last_bucket; ++bucket, ++mesh) { if (bucket->count) { - memcpy(indices, bucket->indices, bucket->count * sizeof(rk_ushort)); - command->nvertices = static_cast(mesh->ntriangles) * 3; + memcpy(indices, bucket->indices, bucket->count * sizeof(rk_instance_index)); + command->nvertices = mesh->ntriangles * 3; command->ninstances = bucket->count; command->base_index = mesh->base_index; command->base_instance = indices - batch.indices; @@ -800,7 +803,7 @@ static void rk_pack_batch( if (param->dirty) { param->dirty = false; if (batch.ninstances) { - rk_ubyte * const dst = reinterpret_cast( + rk_param_output * const dst = reinterpret_cast( glMapBufferRange(GL_ARRAY_BUFFER, param->offset, batch.ninstances * param->dst_size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT)); if (dst) { @@ -819,8 +822,8 @@ void rk_fill_batch( rk_batch_t _batch, rk_uint count, rk_instance_flags const * flags, - rk_ushort const * meshes, - rk_ubyte const * const * params) { + rk_mesh_index const * meshes, + rk_param_input const * const * params) { rk_batch const * const batch = reinterpret_cast(_batch); if (!batch || !count || count > batch->max_size) { rk_printf("rk_fill_batch(): invalid params."); @@ -831,7 +834,7 @@ void rk_fill_batch( if (batch->nparams) { got_all_params = (params != nullptr); if (params) { - for (rk_ubyte const * const * param = params; param < params + batch->nparams; ++param) { + for (rk_param_input const * const * param = params; param < params + batch->nparams; ++param) { bool const got_param = (*param != nullptr); got_any_params |= got_param; got_all_params &= got_param; @@ -849,23 +852,20 @@ void rk_fill_batch( return; } batch->count = count; - bool const cmp_flags = (flags && - rk_cmp_memcpy(batch->flags, flags, batch->count * sizeof(rk_instance_flags))); - bool const cmp_meshes = (meshes && - rk_cmp_memcpy(batch->meshes, meshes, batch->count * sizeof(rk_mesh))); + bool const cmp_flags = (flags && rk_cmp_memcpy(batch->flags, flags, batch->count)); + bool const cmp_meshes = (meshes && rk_cmp_memcpy(batch->meshes, meshes, batch->count)); bool const need_sorting = (cmp_flags || cmp_meshes || resized); if (batch->nparams) { rk_parameter const * const last_param = batch->params + batch->nparams; if (got_any_params) { - rk_ubyte const * const * src = params; - for (rk_parameter const * dst = batch->params; dst < last_param; ++dst, ++src) { - dst->dirty = ((*src && - rk_cmp_memcpy(dst->source, *src, batch->count * dst->src_size)) - || need_sorting); + rk_param_input const * const * src = params; + for (rk_parameter const * param = batch->params; param < last_param; ++param, ++src) { + param->dirty = + ((*src && rk_cmp_memcpy(param->source, *src, batch->count * param->src_len)) || need_sorting); } } else if (need_sorting) { - for (rk_parameter const * dst = batch->params; dst < last_param; ++dst) { - dst->dirty = true; + for (rk_parameter const * param = batch->params; param < last_param; ++param) { + param->dirty = true; } } } diff --git a/cpp/render/render_opengles.hpp b/cpp/render/render_opengles.hpp index 18cd3a9..3656dae 100644 --- a/cpp/render/render_opengles.hpp +++ b/cpp/render/render_opengles.hpp @@ -17,6 +17,7 @@ #define _RK_ENGINE_RENDER_OPENGLES_H #include "../types.hpp" +#include "../math.hpp" #include #include #include @@ -44,7 +45,7 @@ struct rk_vertices { unsigned nmeshes; rk_vertex_format * format; rk_ubyte * vertices; - rk_ushort * indices; + rk_vertex_index * indices; rk_mesh * meshes; GLuint vertices_buffer; GLuint indices_buffer; @@ -58,12 +59,19 @@ struct rk_command { GLuint base_instance; }; +// param output types must be size compatible with an array of rk_param_output +typedef rk_uint rk_param_output; + +#define RK_CHECK_PARAM_OUTPUT_TYPE(_t) static_assert(!(sizeof(_t) % sizeof(rk_param_output))) + struct rk_vec3_float { float x; float y; float z; }; +static_assert(sizeof(rk_vec3_float) == sizeof(rk_vec3)); + struct rk_vec3_short { rk_short x; rk_short y; @@ -80,17 +88,29 @@ struct rk_mat3_float { rk_vec3_float z; }; +static_assert(sizeof(rk_mat3_float) == sizeof(rk_mat3)); + struct rk_mat3_int10 { rk_vec3_int10 x; rk_vec3_int10 y; rk_vec3_int10 z; }; +RK_CHECK_PARAM_INPUT_TYPE(rk_vec3_float); +RK_CHECK_PARAM_INPUT_TYPE(rk_mat3_float); + +RK_CHECK_PARAM_OUTPUT_TYPE(rk_vec3_float); +RK_CHECK_PARAM_OUTPUT_TYPE(rk_vec3_short); +RK_CHECK_PARAM_OUTPUT_TYPE(rk_vec3_int10); +RK_CHECK_PARAM_OUTPUT_TYPE(rk_vec3_uint10); +RK_CHECK_PARAM_OUTPUT_TYPE(rk_mat3_float); +RK_CHECK_PARAM_OUTPUT_TYPE(rk_mat3_int10); + typedef void (*rk_packer)( unsigned const, // count - rk_ushort const * const, // indices - rk_ubyte *, // dst - rk_ubyte const * const); // src + rk_instance_index const * const, // indices + rk_param_output *, // dst + rk_param_input const * const); // src struct rk_parameter { mutable bool dirty; @@ -98,10 +118,18 @@ struct rk_parameter { unsigned offset; unsigned src_size; unsigned dst_size; - rk_ubyte * source; + unsigned src_len; + unsigned dst_len; + rk_param_input * source; rk_packer packer; }; +struct rk_bucket { + unsigned size; + unsigned count; + rk_instance_index * indices; +}; + enum rk_batch_state { RK_BATCH_STATE_EMPTY = 0, RK_BATCH_STATE_FILLED = 1, @@ -118,8 +146,8 @@ struct rk_batch { unsigned nparams; rk_vertices const * vertices; rk_instance_flags * flags; - rk_ushort * meshes; - rk_ushort * indices; + rk_mesh_index * meshes; + rk_instance_index * indices; rk_command * commands; rk_parameter * params; GLuint vertex_array;