diff --git a/cpp/cmp_memcpy.hpp b/cpp/cmp_memcpy.hpp new file mode 100644 index 0000000..955508b --- /dev/null +++ b/cpp/cmp_memcpy.hpp @@ -0,0 +1,169 @@ +// Copyright (C) 2023 RozK +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +#ifndef RK_ENGINE_CMP_MEMCPY_H +#define RK_ENGINE_CMP_MEMCPY_H + +#include "types.hpp" + +template +bool _rk_cmp_memcpy_small( + void * const __restrict _dst, + void const * const __restrict _src, + unsigned const size) { + unsigned count = (size / sizeof(_type)); + _type * dst = reinterpret_cast<_type *>(_dst); + _type const * src = reinterpret_cast<_type const *>(_src); + _type cmp = 0; + do { + cmp |= *dst ^ *src; + *dst++ = *src++; + } while(--count > 0); + return (cmp != 0); +} + +template +bool _rk_cmp_memcpy_big( + void * const __restrict _dst, + void const * const __restrict _src, + unsigned const size) { + unsigned count = size / sizeof(_big); + unsigned const remain = size % sizeof(_big); + _big * dst = reinterpret_cast<_big *>(_dst); + _big const * src = reinterpret_cast<_big const *>(_src); + _big cmp = 0; + do { + cmp |= *dst ^ *src; + *dst++ = *src++; + } while(--count > 0); + bool modified = (cmp != 0); + if (remain) { + modified |= _rk_cmp_memcpy_small<_small>(dst, src, remain); + } + return modified; +} + +#ifdef RK_CMP_MEMCPY_UNALIGNED +#define _rk_size_and_alignment(_t) (size >= sizeof(_t)) +#else +#define _rk_size_and_alignment(_t) (size >= sizeof(_t) && !(alignment & (sizeof(_t) - 1))) +#endif + +template +bool rk_cmp_memcpy( + void * const __restrict _dst, + void const * const __restrict _src, + unsigned const size); + +template<> +bool rk_cmp_memcpy( + void * const __restrict _dst, + void const * const __restrict _src, + unsigned const size) { +#ifndef RK_CMP_MEMCPY_UNALIGNED + unsigned const alignment = reinterpret_cast(_dst) | reinterpret_cast(_src); +#endif + if (_rk_size_and_alignment(rk_ullong)) { + return _rk_cmp_memcpy_big(_dst, _src, size); + } + if (_rk_size_and_alignment(rk_ulong)) { + return _rk_cmp_memcpy_big(_dst, _src, size); + } + if (_rk_size_and_alignment(rk_uint)) { + return _rk_cmp_memcpy_big(_dst, _src, size); + } + if (_rk_size_and_alignment(rk_ushort)) { + return _rk_cmp_memcpy_big(_dst, _src, size); + } + if (size > 0) { + return _rk_cmp_memcpy_small(_dst, _src, size); + } + return false; +} + +template<> +bool rk_cmp_memcpy( + void * const __restrict _dst, + void const * const __restrict _src, + unsigned const size) { +#ifndef RK_CMP_MEMCPY_UNALIGNED + unsigned const alignment = reinterpret_cast(_dst) | reinterpret_cast(_src); +#endif + if (_rk_size_and_alignment(rk_ullong)) { + return _rk_cmp_memcpy_big(_dst, _src, size); + } + if (_rk_size_and_alignment(rk_ulong)) { + return _rk_cmp_memcpy_big(_dst, _src, size); + } + if (_rk_size_and_alignment(rk_uint)) { + return _rk_cmp_memcpy_big(_dst, _src, size); + } + if (size > 0) { + return _rk_cmp_memcpy_small(_dst, _src, size); + } + return false; +} + +template<> +bool rk_cmp_memcpy( + void * const __restrict _dst, + void const * const __restrict _src, + unsigned const size) { +#ifndef RK_CMP_MEMCPY_UNALIGNED + unsigned const alignment = reinterpret_cast(_dst) | reinterpret_cast(_src); +#endif + if (_rk_size_and_alignment(rk_ullong)) { + return _rk_cmp_memcpy_big(_dst, _src, size); + } + if (_rk_size_and_alignment(rk_ulong)) { + return _rk_cmp_memcpy_big(_dst, _src, size); + } + if (size > 0) { + return _rk_cmp_memcpy_small(_dst, _src, size); + } + return false; +} + +template<> +bool rk_cmp_memcpy( + void * const __restrict _dst, + void const * const __restrict _src, + unsigned const size) { +#ifndef RK_CMP_MEMCPY_UNALIGNED + unsigned const alignment = reinterpret_cast(_dst) | reinterpret_cast(_src); +#endif + if (_rk_size_and_alignment(rk_ullong)) { + return _rk_cmp_memcpy_big(_dst, _src, size); + } + if (size > 0) { + return _rk_cmp_memcpy_small(_dst, _src, size); + } + return false; +} + +template<> +bool rk_cmp_memcpy( + void * const __restrict _dst, + void const * const __restrict _src, + unsigned const size) { + if (size > 0) { + return _rk_cmp_memcpy_small(_dst, _src, size); + } + return false; +} + +#undef _rk_size_and_alignment + +#endif // RK_ENGINE_CMP_MEMCPY_H diff --git a/cpp/render/render_opengles.cpp b/cpp/render/render_opengles.cpp index f56a815..15d5d24 100644 --- a/cpp/render/render_opengles.cpp +++ b/cpp/render/render_opengles.cpp @@ -18,6 +18,7 @@ #include "../render.hpp" #include "render_opengles.hpp" #include "../display/display_glx.hpp" +#include "../cmp_memcpy.hpp" #include #include #include @@ -28,32 +29,6 @@ typedef void (*rk_MultiDrawElementsIndirectFunc)(rk_uint, rk_uint, const void *, static rk_DrawElementsInstancedBaseInstanceFunc rk_DrawElementsInstancedBaseInstance = nullptr; static rk_MultiDrawElementsIndirectFunc rk_MultiDrawElementsIndirect = nullptr; -static bool rk_compare_replace( - void * __restrict _dst, - void const * __restrict _src, - unsigned const size) { - rk_ulong hash = 0; - rk_ulong * dst = reinterpret_cast(_dst); - rk_ulong const * src = reinterpret_cast(_src); - unsigned count = size / sizeof(rk_ulong); - unsigned remain = (size - count * sizeof(rk_ulong)); - if (count) { - do { - hash |= *dst ^ *src; - *dst++ = *src++; - } while(--count > 0); - } - if (remain) { - rk_ubyte * rdst = reinterpret_cast(dst); - rk_ubyte const * rsrc = reinterpret_cast(src); - do { - hash |= *rdst ^ *rsrc; - *rdst++ = *rsrc++; - } while(--remain > 0); - } - return (hash != 0); -} - #ifdef RK_BUCKETS_SORT struct rk_bucket { @@ -143,7 +118,7 @@ static bool rk_buckets_sort( command->base_index = mesh->base_index; command->base_vertex = 0; command->base_instance = indices - batch.indices; - modified |= rk_compare_replace(indices, bucket->indices, bucket->count * sizeof(rk_ushort)); + modified |= rk_cmp_memcpy(indices, bucket->indices, bucket->count * sizeof(rk_ushort)); indices += bucket->count; ++command; } @@ -937,18 +912,19 @@ void rk_fill_batch( return; } batch->count = count; - bool const cmp_flags = - (flags && rk_compare_replace(batch->flags, flags, batch->count * sizeof(rk_instance_flags))); - bool const cmp_meshes = - (meshes && rk_compare_replace(batch->meshes, meshes, batch->count * sizeof(rk_mesh))); + bool const cmp_flags = (flags && + rk_cmp_memcpy(batch->flags, flags, batch->count * sizeof(rk_instance_flags))); + bool const cmp_meshes = (meshes && + rk_cmp_memcpy(batch->meshes, meshes, batch->count * sizeof(rk_mesh))); bool const need_sorting = (cmp_flags || cmp_meshes || resized); if (batch->nparams) { rk_parameter const * const last_param = batch->params + batch->nparams; if (got_any_params) { rk_ubyte const * const * src = params; for (rk_parameter const * dst = batch->params; dst < last_param; ++dst, ++src) { - dst->dirty = - (*src && rk_compare_replace(dst->source, *src, batch->count * dst->src_size)) || need_sorting; + dst->dirty = ((*src && + rk_cmp_memcpy(dst->source, *src, batch->count * dst->src_size)) + || need_sorting); } } else if (need_sorting) { for (rk_parameter const * dst = batch->params; dst < last_param; ++dst) { diff --git a/cpp/types.hpp b/cpp/types.hpp index b5bd179..6ac3fbb 100644 --- a/cpp/types.hpp +++ b/cpp/types.hpp @@ -33,6 +33,8 @@ typedef int32_t rk_int; typedef uint32_t rk_uint; typedef int64_t rk_long; typedef uint64_t rk_ulong; +typedef __int128 rk_llong; +typedef unsigned __int128 rk_ullong; typedef float rk_float; typedef void * rk_handle_t;