diff --git a/cpp/cmp_memcpy.hpp b/cpp/cmp_memcpy.hpp
new file mode 100644
index 0000000..955508b
--- /dev/null
+++ b/cpp/cmp_memcpy.hpp
@@ -0,0 +1,169 @@
+// Copyright (C) 2023 RozK
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+#ifndef RK_ENGINE_CMP_MEMCPY_H
+#define RK_ENGINE_CMP_MEMCPY_H
+
+#include "types.hpp"
+
+template
+bool _rk_cmp_memcpy_small(
+ void * const __restrict _dst,
+ void const * const __restrict _src,
+ unsigned const size) {
+ unsigned count = (size / sizeof(_type));
+ _type * dst = reinterpret_cast<_type *>(_dst);
+ _type const * src = reinterpret_cast<_type const *>(_src);
+ _type cmp = 0;
+ do {
+ cmp |= *dst ^ *src;
+ *dst++ = *src++;
+ } while(--count > 0);
+ return (cmp != 0);
+}
+
+template
+bool _rk_cmp_memcpy_big(
+ void * const __restrict _dst,
+ void const * const __restrict _src,
+ unsigned const size) {
+ unsigned count = size / sizeof(_big);
+ unsigned const remain = size % sizeof(_big);
+ _big * dst = reinterpret_cast<_big *>(_dst);
+ _big const * src = reinterpret_cast<_big const *>(_src);
+ _big cmp = 0;
+ do {
+ cmp |= *dst ^ *src;
+ *dst++ = *src++;
+ } while(--count > 0);
+ bool modified = (cmp != 0);
+ if (remain) {
+ modified |= _rk_cmp_memcpy_small<_small>(dst, src, remain);
+ }
+ return modified;
+}
+
+#ifdef RK_CMP_MEMCPY_UNALIGNED
+#define _rk_size_and_alignment(_t) (size >= sizeof(_t))
+#else
+#define _rk_size_and_alignment(_t) (size >= sizeof(_t) && !(alignment & (sizeof(_t) - 1)))
+#endif
+
+template
+bool rk_cmp_memcpy(
+ void * const __restrict _dst,
+ void const * const __restrict _src,
+ unsigned const size);
+
+template<>
+bool rk_cmp_memcpy(
+ void * const __restrict _dst,
+ void const * const __restrict _src,
+ unsigned const size) {
+#ifndef RK_CMP_MEMCPY_UNALIGNED
+ unsigned const alignment = reinterpret_cast(_dst) | reinterpret_cast(_src);
+#endif
+ if (_rk_size_and_alignment(rk_ullong)) {
+ return _rk_cmp_memcpy_big(_dst, _src, size);
+ }
+ if (_rk_size_and_alignment(rk_ulong)) {
+ return _rk_cmp_memcpy_big(_dst, _src, size);
+ }
+ if (_rk_size_and_alignment(rk_uint)) {
+ return _rk_cmp_memcpy_big(_dst, _src, size);
+ }
+ if (_rk_size_and_alignment(rk_ushort)) {
+ return _rk_cmp_memcpy_big(_dst, _src, size);
+ }
+ if (size > 0) {
+ return _rk_cmp_memcpy_small(_dst, _src, size);
+ }
+ return false;
+}
+
+template<>
+bool rk_cmp_memcpy(
+ void * const __restrict _dst,
+ void const * const __restrict _src,
+ unsigned const size) {
+#ifndef RK_CMP_MEMCPY_UNALIGNED
+ unsigned const alignment = reinterpret_cast(_dst) | reinterpret_cast(_src);
+#endif
+ if (_rk_size_and_alignment(rk_ullong)) {
+ return _rk_cmp_memcpy_big(_dst, _src, size);
+ }
+ if (_rk_size_and_alignment(rk_ulong)) {
+ return _rk_cmp_memcpy_big(_dst, _src, size);
+ }
+ if (_rk_size_and_alignment(rk_uint)) {
+ return _rk_cmp_memcpy_big(_dst, _src, size);
+ }
+ if (size > 0) {
+ return _rk_cmp_memcpy_small(_dst, _src, size);
+ }
+ return false;
+}
+
+template<>
+bool rk_cmp_memcpy(
+ void * const __restrict _dst,
+ void const * const __restrict _src,
+ unsigned const size) {
+#ifndef RK_CMP_MEMCPY_UNALIGNED
+ unsigned const alignment = reinterpret_cast(_dst) | reinterpret_cast(_src);
+#endif
+ if (_rk_size_and_alignment(rk_ullong)) {
+ return _rk_cmp_memcpy_big(_dst, _src, size);
+ }
+ if (_rk_size_and_alignment(rk_ulong)) {
+ return _rk_cmp_memcpy_big(_dst, _src, size);
+ }
+ if (size > 0) {
+ return _rk_cmp_memcpy_small(_dst, _src, size);
+ }
+ return false;
+}
+
+template<>
+bool rk_cmp_memcpy(
+ void * const __restrict _dst,
+ void const * const __restrict _src,
+ unsigned const size) {
+#ifndef RK_CMP_MEMCPY_UNALIGNED
+ unsigned const alignment = reinterpret_cast(_dst) | reinterpret_cast(_src);
+#endif
+ if (_rk_size_and_alignment(rk_ullong)) {
+ return _rk_cmp_memcpy_big(_dst, _src, size);
+ }
+ if (size > 0) {
+ return _rk_cmp_memcpy_small(_dst, _src, size);
+ }
+ return false;
+}
+
+template<>
+bool rk_cmp_memcpy(
+ void * const __restrict _dst,
+ void const * const __restrict _src,
+ unsigned const size) {
+ if (size > 0) {
+ return _rk_cmp_memcpy_small(_dst, _src, size);
+ }
+ return false;
+}
+
+#undef _rk_size_and_alignment
+
+#endif // RK_ENGINE_CMP_MEMCPY_H
diff --git a/cpp/render/render_opengles.cpp b/cpp/render/render_opengles.cpp
index f56a815..15d5d24 100644
--- a/cpp/render/render_opengles.cpp
+++ b/cpp/render/render_opengles.cpp
@@ -18,6 +18,7 @@
#include "../render.hpp"
#include "render_opengles.hpp"
#include "../display/display_glx.hpp"
+#include "../cmp_memcpy.hpp"
#include
#include
#include
@@ -28,32 +29,6 @@ typedef void (*rk_MultiDrawElementsIndirectFunc)(rk_uint, rk_uint, const void *,
static rk_DrawElementsInstancedBaseInstanceFunc rk_DrawElementsInstancedBaseInstance = nullptr;
static rk_MultiDrawElementsIndirectFunc rk_MultiDrawElementsIndirect = nullptr;
-static bool rk_compare_replace(
- void * __restrict _dst,
- void const * __restrict _src,
- unsigned const size) {
- rk_ulong hash = 0;
- rk_ulong * dst = reinterpret_cast(_dst);
- rk_ulong const * src = reinterpret_cast(_src);
- unsigned count = size / sizeof(rk_ulong);
- unsigned remain = (size - count * sizeof(rk_ulong));
- if (count) {
- do {
- hash |= *dst ^ *src;
- *dst++ = *src++;
- } while(--count > 0);
- }
- if (remain) {
- rk_ubyte * rdst = reinterpret_cast(dst);
- rk_ubyte const * rsrc = reinterpret_cast(src);
- do {
- hash |= *rdst ^ *rsrc;
- *rdst++ = *rsrc++;
- } while(--remain > 0);
- }
- return (hash != 0);
-}
-
#ifdef RK_BUCKETS_SORT
struct rk_bucket {
@@ -143,7 +118,7 @@ static bool rk_buckets_sort(
command->base_index = mesh->base_index;
command->base_vertex = 0;
command->base_instance = indices - batch.indices;
- modified |= rk_compare_replace(indices, bucket->indices, bucket->count * sizeof(rk_ushort));
+ modified |= rk_cmp_memcpy(indices, bucket->indices, bucket->count * sizeof(rk_ushort));
indices += bucket->count;
++command;
}
@@ -937,18 +912,19 @@ void rk_fill_batch(
return;
}
batch->count = count;
- bool const cmp_flags =
- (flags && rk_compare_replace(batch->flags, flags, batch->count * sizeof(rk_instance_flags)));
- bool const cmp_meshes =
- (meshes && rk_compare_replace(batch->meshes, meshes, batch->count * sizeof(rk_mesh)));
+ bool const cmp_flags = (flags &&
+ rk_cmp_memcpy(batch->flags, flags, batch->count * sizeof(rk_instance_flags)));
+ bool const cmp_meshes = (meshes &&
+ rk_cmp_memcpy(batch->meshes, meshes, batch->count * sizeof(rk_mesh)));
bool const need_sorting = (cmp_flags || cmp_meshes || resized);
if (batch->nparams) {
rk_parameter const * const last_param = batch->params + batch->nparams;
if (got_any_params) {
rk_ubyte const * const * src = params;
for (rk_parameter const * dst = batch->params; dst < last_param; ++dst, ++src) {
- dst->dirty =
- (*src && rk_compare_replace(dst->source, *src, batch->count * dst->src_size)) || need_sorting;
+ dst->dirty = ((*src &&
+ rk_cmp_memcpy(dst->source, *src, batch->count * dst->src_size))
+ || need_sorting);
}
} else if (need_sorting) {
for (rk_parameter const * dst = batch->params; dst < last_param; ++dst) {
diff --git a/cpp/types.hpp b/cpp/types.hpp
index b5bd179..6ac3fbb 100644
--- a/cpp/types.hpp
+++ b/cpp/types.hpp
@@ -33,6 +33,8 @@ typedef int32_t rk_int;
typedef uint32_t rk_uint;
typedef int64_t rk_long;
typedef uint64_t rk_ulong;
+typedef __int128 rk_llong;
+typedef unsigned __int128 rk_ullong;
typedef float rk_float;
typedef void * rk_handle_t;