From 81d52086fe73507fc53317f7b9a2706c6940b0b2 Mon Sep 17 00:00:00 2001 From: Roz K Date: Fri, 6 Jan 2023 17:05:09 +0100 Subject: [PATCH] Rework params packing. --- cpp/render.hpp | 13 +- cpp/render/render_opengles.cpp | 213 +++++------------------- cpp/render/render_opengles.hpp | 52 +----- cpp/render/vertex_format_opengles.hpp | 230 ++++++++++++++++++++++++++ 4 files changed, 281 insertions(+), 227 deletions(-) create mode 100644 cpp/render/vertex_format_opengles.hpp diff --git a/cpp/render.hpp b/cpp/render.hpp index 461a4b5..32193d7 100644 --- a/cpp/render.hpp +++ b/cpp/render.hpp @@ -85,22 +85,15 @@ enum : rk_uint { typedef rk_ushort rk_vertex_index; typedef rk_ushort rk_mesh_index; +typedef rk_uint rk_vertex_input; +typedef rk_uint rk_vertex_output; +typedef rk_vertex_input rk_param_input; struct rk_mesh { rk_uint base_index; rk_uint ntriangles; }; -// param input types must be size compatible with an array of rk_param_input -typedef rk_uint rk_param_input; - -template -void rk_param_get_input_size(unsigned & _size, unsigned & _len) { - static_assert((sizeof(_input_type) % sizeof(rk_param_input)) == 0); - _size = sizeof(_input_type); - _len = sizeof(_input_type) / sizeof(rk_param_input); -} - RK_EXPORT void rk_render_initialize( rk_bool debug); diff --git a/cpp/render/render_opengles.cpp b/cpp/render/render_opengles.cpp index 0b2680b..b3e011c 100644 --- a/cpp/render/render_opengles.cpp +++ b/cpp/render/render_opengles.cpp @@ -14,6 +14,7 @@ // along with this program. If not, see . #include "render_opengles.hpp" +#include "vertex_format_opengles.hpp" #include "../display/display_glx.hpp" #include "../utils/cmp_memcpy.hpp" #include @@ -330,13 +331,13 @@ rk_vertices_t rk_create_vertices( for (rk_vertex_format const * f = format; *f; ++f, ++format_size) { switch (*f & RK_VERTEX_FORMAT_MASK) { case RK_VERTEX_FORMAT_VEC3_FLOAT: - vertex_size += sizeof(rk_vec3_float); + vertex_size += rk_vec3_float::get_output_size(); break; case RK_VERTEX_FORMAT_VEC3_INT10: - vertex_size += sizeof(rk_vec3_int10); + vertex_size += rk_vec3_int10::get_output_size(); break; case RK_VERTEX_FORMAT_VEC3_UINT10: - vertex_size += sizeof(rk_vec3_uint10); + vertex_size += rk_vec3_uint10::get_output_size(); break; default: rk_printf("rk_create_vertices(): invalid vertex format."); @@ -420,143 +421,6 @@ static void rk_buckets_alloc( } } -#define rk_pack_short_norm(_f) (static_cast((_f) * 32767.f) & 65536) -// #define rk_pack_short_norm(_f) (((static_cast((_f) * 32767.f) - 1) / 2) & 65535) - -#define rk_pack_int10_norm(_f) (static_cast((_f) * 511.f) & 1023) -// #define rk_pack_int10_norm(_f) (((static_cast((_f) * 1023.f) - 1) / 2) & 1023) - -static void rk_pack_vec3_float( - unsigned const count, - rk_instance_index const * const __restrict indices, - rk_param_output * __restrict _dst, - rk_param_input const * const __restrict _src) { - rk_instance_index const * const last_index = indices + count; - rk_vec3_float * __restrict dst = reinterpret_cast(_dst); - rk_vec3_float const * const __restrict src = reinterpret_cast(_src); - for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) { - *dst = src[*index]; - } -} - -static void rk_pack_vec3_short( - unsigned const count, - rk_instance_index const * const __restrict indices, - rk_param_output * __restrict _dst, - rk_param_input const * const __restrict _src) { - rk_instance_index const * const last_index = indices + count; - rk_vec3_short * __restrict dst = reinterpret_cast(_dst); - rk_vec3_float const * const __restrict src = reinterpret_cast(_src); - for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) { - rk_vec3_float const & input = src[*index]; - dst->x = static_cast(input.x); - dst->y = static_cast(input.y); - dst->z = static_cast(input.z); - dst->pad = 0; - } -} - -static void rk_pack_vec3_short_norm( - unsigned const count, - rk_instance_index const * const __restrict indices, - rk_param_output * __restrict _dst, - rk_param_input const * const __restrict _src) { - rk_instance_index const * const last_index = indices + count; - rk_vec3_short * __restrict dst = reinterpret_cast(_dst); - rk_vec3_float const * const __restrict src = reinterpret_cast(_src); - #define _convert(s) rk_pack_short_norm(s) - for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) { - rk_vec3_float const & input = src[*index]; - dst->x = _convert(input.x); - dst->y = _convert(input.y); - dst->z = _convert(input.z); - dst->pad = 0; - } - #undef _convert -} - -static void rk_pack_vec3_int10( - unsigned const count, - rk_instance_index const * const __restrict indices, - rk_param_output * __restrict _dst, - rk_param_input const * const __restrict _src) { - rk_instance_index const * const last_index = indices + count; - rk_vec3_int10 * __restrict dst = reinterpret_cast(_dst); - rk_vec3_float const * const __restrict src = reinterpret_cast(_src); - #define _convert(s) (static_cast((s)) & 1023) - for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) { - rk_vec3_float const & input = src[*index]; - *dst = _convert(input.x) | (_convert(input.y) << 10) | (_convert(input.z) << 20); - } - #undef _convert -} - -static void rk_pack_vec3_int10_norm( - unsigned const count, - rk_instance_index const * const __restrict indices, - rk_param_output * __restrict _dst, - rk_param_input const * const __restrict _src) { - rk_instance_index const * const last_index = indices + count; - rk_vec3_int10 * __restrict dst = reinterpret_cast(_dst); - rk_vec3_float const * const __restrict src = reinterpret_cast(_src); - #define _convert(s) rk_pack_int10_norm(s) - for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) { - rk_vec3_float const & input = src[*index]; - *dst = _convert(input.x) | (_convert(input.y) << 10) | (_convert(input.z) << 20); - } - #undef _convert -} - -static void rk_pack_mat3_float( - unsigned const count, - rk_instance_index const * const __restrict indices, - rk_param_output * __restrict _dst, - rk_param_input const * const __restrict _src) { - rk_instance_index const * const last_index = indices + count; - rk_mat3_float * __restrict dst = reinterpret_cast(_dst); - rk_mat3_float const * const __restrict src = reinterpret_cast(_src); - for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) { - *dst = src[*index]; - } - #undef _convert -} - -static void rk_pack_mat3_int10( - unsigned const count, - rk_instance_index const * const __restrict indices, - rk_param_output * __restrict _dst, - rk_param_input const * const __restrict _src) { - rk_instance_index const * const last_index = indices + count; - rk_mat3_int10 * __restrict dst = reinterpret_cast(_dst); - rk_mat3_float const * const __restrict src = reinterpret_cast(_src); - #define _convert(s) (static_cast((s)) & 1023) - for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) { - rk_mat3_float const & input = src[*index]; - dst->x = _convert(input.x.x) | (_convert(input.x.y) << 10) | (_convert(input.x.z) << 20); - dst->y = _convert(input.y.x) | (_convert(input.y.y) << 10) | (_convert(input.y.z) << 20); - dst->z = _convert(input.z.x) | (_convert(input.z.y) << 10) | (_convert(input.z.z) << 20); - } - #undef _convert -} - -static void rk_pack_mat3_int10_norm( - unsigned const count, - rk_instance_index const * const __restrict indices, - rk_param_output * __restrict _dst, - rk_param_input const * const __restrict _src) { - rk_instance_index const * const last_index = indices + count; - rk_mat3_int10 * __restrict dst = reinterpret_cast(_dst); - rk_mat3_float const * const __restrict src = reinterpret_cast(_src); - #define _convert(s) rk_pack_int10_norm(s) - for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) { - rk_mat3_float const & input = src[*index]; - dst->x = _convert(input.x.x) | (_convert(input.x.y) << 10) | (_convert(input.x.z) << 20); - dst->y = _convert(input.y.x) | (_convert(input.y.y) << 10) | (_convert(input.y.z) << 20); - dst->z = _convert(input.z.x) | (_convert(input.z.y) << 10) | (_convert(input.z.z) << 20); - } - #undef _convert -} - rk_batch_t rk_create_batch( rk_vertices_t _vertices, rk_uint max_size, @@ -570,13 +434,13 @@ rk_batch_t rk_create_batch( for (rk_vertex_format const * f = vertices->format; *f; ++f) { switch (*f & RK_VERTEX_FORMAT_MASK) { case RK_VERTEX_FORMAT_VEC3_FLOAT: - vertex_size += sizeof(rk_vec3_float); + vertex_size += rk_vec3_float::get_output_size(); break; case RK_VERTEX_FORMAT_VEC3_INT10: - vertex_size += sizeof(rk_vec3_int10); + vertex_size += rk_vec3_int10::get_output_size(); break; case RK_VERTEX_FORMAT_VEC3_UINT10: - vertex_size += sizeof(rk_vec3_uint10); + vertex_size += rk_vec3_uint10::get_output_size(); break; } } @@ -586,19 +450,19 @@ rk_batch_t rk_create_batch( for (rk_param_format const * f = params_format; *f; ++f, ++nparams) { switch (*f & RK_PARAM_FORMAT_MASK) { case RK_PARAM_FORMAT_VEC3_FLOAT: - params_size += sizeof(rk_vec3_float); + params_size += rk_vec3_float::get_output_size(); break; case RK_PARAM_FORMAT_VEC3_SHORT: - params_size += sizeof(rk_vec3_short); + params_size += rk_vec3_short::get_output_size(); break; case RK_PARAM_FORMAT_VEC3_INT10: - params_size += sizeof(rk_vec3_int10); + params_size += rk_vec3_int10::get_output_size(); break; case RK_PARAM_FORMAT_MAT3_FLOAT: - params_size += sizeof(rk_mat3_float); + params_size += rk_mat3_float::get_output_size(); break; case RK_PARAM_FORMAT_MAT3_INT10: - params_size += sizeof(rk_mat3_int10); + params_size += rk_mat3_int10::get_output_size(); break; default: rk_printf("rk_create_batch(): invalid param format."); @@ -667,19 +531,19 @@ rk_batch_t rk_create_batch( glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, offset); glVertexAttribBinding(attrib++, binding); - offset += sizeof(rk_vec3_float); + offset += rk_vec3_float::get_output_size(); break; case RK_VERTEX_FORMAT_VEC3_INT10: glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, offset); glVertexAttribBinding(attrib++, binding); - offset += sizeof(rk_vec3_int10); + offset += rk_vec3_int10::get_output_size(); break; case RK_VERTEX_FORMAT_VEC3_UINT10: glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 4, GL_UNSIGNED_INT_2_10_10_10_REV, norm, offset); glVertexAttribBinding(attrib++, binding); - offset += sizeof(rk_vec3_uint10); + offset += rk_vec3_uint10::get_output_size(); break; } } @@ -694,64 +558,66 @@ rk_batch_t rk_create_batch( param->offset = offset; switch (*f & RK_PARAM_FORMAT_MASK) { case RK_PARAM_FORMAT_VEC3_FLOAT: - rk_param_get_input_size(param->src_size, param->src_len); - rk_param_get_output_size(param->dst_size, param->dst_len); - param->packer = rk_pack_vec3_float; + param->src_size = rk_vec3_float::get_input_size(); + param->dst_size = rk_vec3_float::get_output_size(); + param->packer = rk_vec3_float::param_packer; glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->dst_size); glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, 0); glVertexAttribBinding(attrib++, binding); break; case RK_PARAM_FORMAT_VEC3_SHORT: - rk_param_get_input_size(param->src_size, param->src_len); - rk_param_get_output_size(param->dst_size, param->dst_len); - param->packer = norm ? rk_pack_vec3_short_norm : rk_pack_vec3_short; + param->src_size = rk_vec3_short::get_input_size(); + param->dst_size = rk_vec3_short::get_output_size(); + param->packer = norm ? rk_vec3_short_norm::param_packer : rk_vec3_short::param_packer; glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->dst_size); glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 3, GL_SHORT, norm, 0); glVertexAttribBinding(attrib++, binding); break; case RK_PARAM_FORMAT_VEC3_INT10: - rk_param_get_input_size(param->src_size, param->src_len); - rk_param_get_output_size(param->dst_size, param->dst_len); - param->packer = norm ? rk_pack_vec3_int10_norm : rk_pack_vec3_int10; + param->src_size = rk_vec3_int10::get_input_size(); + param->dst_size = rk_vec3_int10::get_output_size(); + param->packer = norm ? rk_vec3_int10_norm::param_packer : rk_vec3_int10::param_packer; glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->dst_size); glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, 0); glVertexAttribBinding(attrib++, binding); break; case RK_PARAM_FORMAT_MAT3_FLOAT: - rk_param_get_input_size(param->src_size, param->src_len); - rk_param_get_output_size(param->dst_size, param->dst_len); - param->packer = rk_pack_mat3_float; + param->src_size = rk_mat3_float::get_input_size(); + param->dst_size = rk_mat3_float::get_output_size(); + param->packer = rk_mat3_float::param_packer; glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->dst_size); glEnableVertexAttribArray(attrib); - glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, offsetof(rk_mat3_float, x)); + glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, rk_mat3_float::get_output_offset(0)); glVertexAttribBinding(attrib++, binding); glEnableVertexAttribArray(attrib); - glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, offsetof(rk_mat3_float, y)); + glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, rk_mat3_float::get_output_offset(1)); glVertexAttribBinding(attrib++, binding); glEnableVertexAttribArray(attrib); - glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, offsetof(rk_mat3_float, z)); + glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, rk_mat3_float::get_output_offset(2)); glVertexAttribBinding(attrib++, binding); break; case RK_PARAM_FORMAT_MAT3_INT10: - rk_param_get_input_size(param->src_size, param->src_len); - rk_param_get_output_size(param->dst_size, param->dst_len); - param->packer = norm ? rk_pack_mat3_int10_norm : rk_pack_mat3_int10; + param->src_size = rk_mat3_int10::get_input_size(); + param->dst_size = rk_mat3_int10::get_output_size(); + param->packer = norm ? rk_mat3_int10_norm::param_packer : rk_mat3_int10::param_packer; glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->dst_size); glEnableVertexAttribArray(attrib); - glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, offsetof(rk_mat3_int10, x)); + glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, rk_mat3_int10::get_output_offset(0)); glVertexAttribBinding(attrib++, binding); glEnableVertexAttribArray(attrib); - glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, offsetof(rk_mat3_int10, y)); + glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, rk_mat3_int10::get_output_offset(1)); glVertexAttribBinding(attrib++, binding); glEnableVertexAttribArray(attrib); - glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, offsetof(rk_mat3_int10, z)); + glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, rk_mat3_int10::get_output_offset(2)); glVertexAttribBinding(attrib++, binding); break; } glVertexBindingDivisor(binding, 1); + param->src_len = param->src_size / sizeof(rk_param_output); + param->dst_len = param->dst_size / sizeof(rk_param_output); param->source = new rk_param_input[max_size * param->src_len]; memset(param->source, 0xFF, max_size * param->src_size); offset += max_size * param->dst_size; @@ -762,6 +628,7 @@ rk_batch_t rk_create_batch( return reinterpret_cast(batch); } +[[RK_HOT, RK_FAST]] static void rk_sort_batch( rk_batch const & batch) { rk_bucket const * const last_bucket = rk_buckets + batch.vertices->nmeshes; @@ -798,6 +665,7 @@ static void rk_sort_batch( batch.state = RK_BATCH_STATE_SORTED; } +[[RK_HOT, RK_FAST]] static void rk_pack_batch( rk_batch const & batch) { if (batch.nparams) { @@ -821,6 +689,7 @@ static void rk_pack_batch( batch.state = RK_BATCH_STATE_PACKED; } +[[RK_HOT, RK_FAST]] void rk_fill_batch( rk_batch_t _batch, rk_uint count, diff --git a/cpp/render/render_opengles.hpp b/cpp/render/render_opengles.hpp index 6ee78b9..b367a40 100644 --- a/cpp/render/render_opengles.hpp +++ b/cpp/render/render_opengles.hpp @@ -21,6 +21,10 @@ #include #include +static_assert(sizeof(rk_vertex_output) == 4); + +typedef rk_vertex_output rk_param_output; + struct rk_shader { GLuint vertex; GLuint fragment; @@ -58,53 +62,11 @@ struct rk_command { GLuint base_instance; }; -// param output types must be size compatible with an array of rk_param_output -typedef rk_uint rk_param_output; - -template -void rk_param_get_output_size(unsigned & _size, unsigned & _len) { - static_assert((sizeof(_output_type) % sizeof(rk_param_output)) == 0); - _size = sizeof(_output_type); - _len = sizeof(_output_type) / sizeof(rk_param_output); -} - -struct rk_vec3_float { - float x; - float y; - float z; -}; - -static_assert(sizeof(rk_vec3_float) == sizeof(rk_vec3)); - -struct rk_vec3_short { - rk_short x; - rk_short y; - rk_short z; - rk_short pad; -}; - -typedef rk_int rk_vec3_int10; -typedef rk_uint rk_vec3_uint10; - -struct rk_mat3_float { - rk_vec3_float x; - rk_vec3_float y; - rk_vec3_float z; -}; - -static_assert(sizeof(rk_mat3_float) == sizeof(rk_mat3)); - -struct rk_mat3_int10 { - rk_vec3_int10 x; - rk_vec3_int10 y; - rk_vec3_int10 z; -}; - typedef void (*rk_packer)( unsigned const, // count - rk_instance_index const * const, // indices - rk_param_output *, // dst - rk_param_input const * const); // src + rk_instance_index const * const __restrict, // indices + rk_param_output * __restrict, // dst + rk_param_input const * const __restrict); // src struct rk_parameter { mutable bool dirty; diff --git a/cpp/render/vertex_format_opengles.hpp b/cpp/render/vertex_format_opengles.hpp new file mode 100644 index 0000000..bcf8dce --- /dev/null +++ b/cpp/render/vertex_format_opengles.hpp @@ -0,0 +1,230 @@ +// Copyright (C) 2023 RozK +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +#ifndef RK_ENGINE_VERTEX_FORMAT_H +#define RK_ENGINE_VERTEX_FORMAT_H + +#include "render_opengles.hpp" +#include + +namespace rk_vertex { + +#pragma pack(push, 1) + +template +struct alignas(alignof(_type)) rk_input { + _type input; +}; + +template +struct alignas(alignof(_type)) rk_output { + _type output; + + [[RK_FAST]] + inline void convert( + rk_input<_input> const & __restrict src) { + output = static_cast<_type>(src.input); + } +}; + +template +struct alignas(alignof(_type)) rk_output<_type, rk_float, _signed, true> { + _type output; + + [[RK_FAST]] + inline void convert( + rk_input const & __restrict src) { + enum : _type { max = std::numeric_limits<_type>::max() }; + output = static_cast<_type>(src.input * static_cast(max)); + } +}; + +#pragma pack(4) + +template +struct alignas(4) rk_input_row { + rk_input<_input> input_col[_cols]; +}; + +template +struct alignas(4) rk_output_row { + rk_output<_output, _input, _signed, _normalized> output_col[_cols]; + + [[RK_FAST, RK_FLATTEN, RK_UNROLLED]] + inline void convert( + rk_input_row<_input, _cols> const & __restrict src) { + for (unsigned col = 0; col < _cols; ++col) { + output_col[col].convert(src.input_col[col]); + } + } +}; + +template +struct alignas(4) rk_output_row, _input, _cols, _signed, _normalized> { + rk_output, _input, _signed, _normalized> output_cols; + + [[RK_FAST, RK_FLATTEN]] + inline void convert( + rk_input_row<_input, _cols> const & __restrict src) { + output_cols.convert(src); + } +}; + +template +struct alignas(4) rk_input_format { + rk_input_row<_input, _cols> input_row[_rows]; +}; + +template +struct alignas(4) rk_output_format { + rk_output_row<_output, _input, _cols, _signed, _normalized> output_row[_rows]; + + [[RK_FAST, RK_FLATTEN, RK_UNROLLED]] + inline void convert( + rk_input_format<_input, _cols, _rows> const & __restrict src) { + for (unsigned row = 0; row < _rows; ++row) { + output_row[row].convert(src.input_row[row]); + } + } +}; + +template +struct alignas(alignof(rk_packed<_signed, 3>)) rk_output, _input, _signed, _normalized> { + rk_packed<_signed, 3> output; + + [[RK_FAST]] + inline void convert( + rk_input_row<_input, 3> const & __restrict src) { + typedef typename rk_packed<_signed, 3>::type packed_type; + output.packed = + ((static_cast(src.input_col[0].input) & 1023)) | + ((static_cast(src.input_col[1].input) & 1023) << 10) | + ((static_cast(src.input_col[2].input) & 1023) << 20); + } +}; + +template +struct alignas(alignof(rk_packed<_signed, 4>)) rk_output, _input, _signed, _normalized> { + rk_packed<_signed, 4> output; + + [[RK_FAST]] + inline void convert( + rk_input_row<_input, 4> const & __restrict src) { + typedef typename rk_packed<_signed, 4>::type packed_type; + output.packed = + ((static_cast(src.input_col[0].input) & 1023)) | + ((static_cast(src.input_col[1].input) & 1023) << 10) | + ((static_cast(src.input_col[2].input) & 1023) << 20) | + ((static_cast(src.input_col[3].input) & 3) << 30); + } +}; + +template<> +struct alignas(alignof(rk_packed)) rk_output, rk_float, true, true> { + rk_packed output; + + [[RK_FAST]] + inline void convert( + rk_input_row const & __restrict src) { + output.packed = + ((static_cast(src.input_col[0].input * 511.f) & 1023)) | + ((static_cast(src.input_col[1].input * 511.f) & 1023) << 10) | + ((static_cast(src.input_col[2].input * 511.f) & 1023) << 20); + } +}; + +template<> +struct alignas(alignof(rk_packed)) rk_output, rk_float, true, true> { + rk_packed output; + + [[RK_FAST]] + inline void convert( + rk_input_row const & __restrict src) { + output.packed = + ((static_cast(src.input_col[0].input * 511.f) & 1023)) | + ((static_cast(src.input_col[1].input * 511.f) & 1023) << 10) | + ((static_cast(src.input_col[2].input * 511.f) & 1023) << 20) | + ((static_cast(src.input_col[3].input) & 3) << 30); + } +}; + +#pragma pack(pop) + +template +struct rk_format { + typedef rk_input<_input> input; + typedef rk_output<_output, input, _signed, _normalized> output; + + typedef rk_input_row<_input, _cols> input_row; + typedef rk_output_row<_output, _input, _cols, _signed, _normalized> output_row; + + typedef rk_input_format<_input, _cols, _rows> input_format; + typedef rk_output_format<_output, _input, _cols, _rows, _signed, _normalized> output_format; + + static_assert(sizeof(input) == sizeof(_input)); + static_assert(sizeof(output) == sizeof(_output)); + static_assert((sizeof(input_row) % sizeof(rk_vertex_input)) == 0); + static_assert((sizeof(output_row) % sizeof(rk_vertex_output)) == 0); + static_assert((sizeof(input_format) % sizeof(rk_vertex_input)) == 0); + static_assert((sizeof(output_format) % sizeof(rk_vertex_output)) == 0); + + static unsigned get_input_size() { + return sizeof(input_format); + } + + static unsigned get_output_size() { + return sizeof(output_format); + } + + static unsigned get_output_offset(unsigned const index) { + return index * sizeof(output_row); + } + + [[RK_FAST, RK_FLATTEN]] + inline static void convert( + output_format & __restrict dst, + input_format const & __restrict src) { + dst.convert(src); + } + + [[RK_HOT, RK_FAST, RK_FLATTEN]] + static void param_packer( + unsigned const count, + rk_instance_index const * const __restrict indices, + rk_param_output * __restrict _dst, + rk_param_input const * const __restrict _src) { + rk_instance_index const * const last_index = indices + count; + output_format * __restrict dst = reinterpret_cast(_dst); + input_format const * const __restrict src = reinterpret_cast(_src); + for (rk_instance_index const * __restrict index = indices; index < last_index; ++index, ++dst) { + dst->convert(src[*index]); + } + } +}; + +} // namepace rk_vertex + +typedef rk_vertex::rk_format rk_vec3_float; +typedef rk_vertex::rk_format rk_vec3_short; +typedef rk_vertex::rk_format rk_vec3_short_norm; +typedef rk_vertex::rk_format, rk_float, 3, 1, true, false> rk_vec3_int10; +typedef rk_vertex::rk_format, rk_float, 3, 1, true, true> rk_vec3_int10_norm; +typedef rk_vertex::rk_format, rk_float, 3, 1, true, false> rk_vec3_uint10; +typedef rk_vertex::rk_format, rk_float, 3, 1, true, true> rk_vec3_uint10_norm; +typedef rk_vertex::rk_format rk_mat3_float; +typedef rk_vertex::rk_format, rk_float, 3, 3, true, false> rk_mat3_int10; +typedef rk_vertex::rk_format, rk_float, 3, 3, true, true> rk_mat3_int10_norm; + +#endif // RK_ENGINE_VERTEX_FORMAT_H