Improve compare-memcopy.
This commit is contained in:
parent
558ec08614
commit
3e0ea2560a
169
cpp/cmp_memcpy.hpp
Normal file
169
cpp/cmp_memcpy.hpp
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
// Copyright (C) 2023 RozK
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU Affero General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Affero General Public License
|
||||||
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#ifndef RK_ENGINE_CMP_MEMCPY_H
|
||||||
|
#define RK_ENGINE_CMP_MEMCPY_H
|
||||||
|
|
||||||
|
#include "types.hpp"
|
||||||
|
|
||||||
|
template<typename _type>
|
||||||
|
bool _rk_cmp_memcpy_small(
|
||||||
|
void * const __restrict _dst,
|
||||||
|
void const * const __restrict _src,
|
||||||
|
unsigned const size) {
|
||||||
|
unsigned count = (size / sizeof(_type));
|
||||||
|
_type * dst = reinterpret_cast<_type *>(_dst);
|
||||||
|
_type const * src = reinterpret_cast<_type const *>(_src);
|
||||||
|
_type cmp = 0;
|
||||||
|
do {
|
||||||
|
cmp |= *dst ^ *src;
|
||||||
|
*dst++ = *src++;
|
||||||
|
} while(--count > 0);
|
||||||
|
return (cmp != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _big, typename _small>
|
||||||
|
bool _rk_cmp_memcpy_big(
|
||||||
|
void * const __restrict _dst,
|
||||||
|
void const * const __restrict _src,
|
||||||
|
unsigned const size) {
|
||||||
|
unsigned count = size / sizeof(_big);
|
||||||
|
unsigned const remain = size % sizeof(_big);
|
||||||
|
_big * dst = reinterpret_cast<_big *>(_dst);
|
||||||
|
_big const * src = reinterpret_cast<_big const *>(_src);
|
||||||
|
_big cmp = 0;
|
||||||
|
do {
|
||||||
|
cmp |= *dst ^ *src;
|
||||||
|
*dst++ = *src++;
|
||||||
|
} while(--count > 0);
|
||||||
|
bool modified = (cmp != 0);
|
||||||
|
if (remain) {
|
||||||
|
modified |= _rk_cmp_memcpy_small<_small>(dst, src, remain);
|
||||||
|
}
|
||||||
|
return modified;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef RK_CMP_MEMCPY_UNALIGNED
|
||||||
|
#define _rk_size_and_alignment(_t) (size >= sizeof(_t))
|
||||||
|
#else
|
||||||
|
#define _rk_size_and_alignment(_t) (size >= sizeof(_t) && !(alignment & (sizeof(_t) - 1)))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<unsigned _s>
|
||||||
|
bool rk_cmp_memcpy(
|
||||||
|
void * const __restrict _dst,
|
||||||
|
void const * const __restrict _src,
|
||||||
|
unsigned const size);
|
||||||
|
|
||||||
|
template<>
|
||||||
|
bool rk_cmp_memcpy<sizeof(rk_ubyte)>(
|
||||||
|
void * const __restrict _dst,
|
||||||
|
void const * const __restrict _src,
|
||||||
|
unsigned const size) {
|
||||||
|
#ifndef RK_CMP_MEMCPY_UNALIGNED
|
||||||
|
unsigned const alignment = reinterpret_cast<uintptr_t>(_dst) | reinterpret_cast<uintptr_t const>(_src);
|
||||||
|
#endif
|
||||||
|
if (_rk_size_and_alignment(rk_ullong)) {
|
||||||
|
return _rk_cmp_memcpy_big<rk_ullong, rk_ubyte>(_dst, _src, size);
|
||||||
|
}
|
||||||
|
if (_rk_size_and_alignment(rk_ulong)) {
|
||||||
|
return _rk_cmp_memcpy_big<rk_ulong, rk_ubyte>(_dst, _src, size);
|
||||||
|
}
|
||||||
|
if (_rk_size_and_alignment(rk_uint)) {
|
||||||
|
return _rk_cmp_memcpy_big<rk_uint, rk_ubyte>(_dst, _src, size);
|
||||||
|
}
|
||||||
|
if (_rk_size_and_alignment(rk_ushort)) {
|
||||||
|
return _rk_cmp_memcpy_big<rk_ushort, rk_ubyte>(_dst, _src, size);
|
||||||
|
}
|
||||||
|
if (size > 0) {
|
||||||
|
return _rk_cmp_memcpy_small<rk_ubyte>(_dst, _src, size);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
bool rk_cmp_memcpy<sizeof(rk_ushort)>(
|
||||||
|
void * const __restrict _dst,
|
||||||
|
void const * const __restrict _src,
|
||||||
|
unsigned const size) {
|
||||||
|
#ifndef RK_CMP_MEMCPY_UNALIGNED
|
||||||
|
unsigned const alignment = reinterpret_cast<uintptr_t>(_dst) | reinterpret_cast<uintptr_t const>(_src);
|
||||||
|
#endif
|
||||||
|
if (_rk_size_and_alignment(rk_ullong)) {
|
||||||
|
return _rk_cmp_memcpy_big<rk_ullong, rk_ushort>(_dst, _src, size);
|
||||||
|
}
|
||||||
|
if (_rk_size_and_alignment(rk_ulong)) {
|
||||||
|
return _rk_cmp_memcpy_big<rk_ulong, rk_ushort>(_dst, _src, size);
|
||||||
|
}
|
||||||
|
if (_rk_size_and_alignment(rk_uint)) {
|
||||||
|
return _rk_cmp_memcpy_big<rk_uint, rk_ushort>(_dst, _src, size);
|
||||||
|
}
|
||||||
|
if (size > 0) {
|
||||||
|
return _rk_cmp_memcpy_small<rk_ushort>(_dst, _src, size);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
bool rk_cmp_memcpy<sizeof(rk_uint)>(
|
||||||
|
void * const __restrict _dst,
|
||||||
|
void const * const __restrict _src,
|
||||||
|
unsigned const size) {
|
||||||
|
#ifndef RK_CMP_MEMCPY_UNALIGNED
|
||||||
|
unsigned const alignment = reinterpret_cast<uintptr_t>(_dst) | reinterpret_cast<uintptr_t const>(_src);
|
||||||
|
#endif
|
||||||
|
if (_rk_size_and_alignment(rk_ullong)) {
|
||||||
|
return _rk_cmp_memcpy_big<rk_ullong, rk_uint>(_dst, _src, size);
|
||||||
|
}
|
||||||
|
if (_rk_size_and_alignment(rk_ulong)) {
|
||||||
|
return _rk_cmp_memcpy_big<rk_ulong, rk_uint>(_dst, _src, size);
|
||||||
|
}
|
||||||
|
if (size > 0) {
|
||||||
|
return _rk_cmp_memcpy_small<rk_uint>(_dst, _src, size);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
bool rk_cmp_memcpy<sizeof(rk_ulong)>(
|
||||||
|
void * const __restrict _dst,
|
||||||
|
void const * const __restrict _src,
|
||||||
|
unsigned const size) {
|
||||||
|
#ifndef RK_CMP_MEMCPY_UNALIGNED
|
||||||
|
unsigned const alignment = reinterpret_cast<uintptr_t>(_dst) | reinterpret_cast<uintptr_t const>(_src);
|
||||||
|
#endif
|
||||||
|
if (_rk_size_and_alignment(rk_ullong)) {
|
||||||
|
return _rk_cmp_memcpy_big<rk_ullong, rk_ulong>(_dst, _src, size);
|
||||||
|
}
|
||||||
|
if (size > 0) {
|
||||||
|
return _rk_cmp_memcpy_small<rk_ulong>(_dst, _src, size);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
bool rk_cmp_memcpy<sizeof(rk_ullong)>(
|
||||||
|
void * const __restrict _dst,
|
||||||
|
void const * const __restrict _src,
|
||||||
|
unsigned const size) {
|
||||||
|
if (size > 0) {
|
||||||
|
return _rk_cmp_memcpy_small<rk_ullong>(_dst, _src, size);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef _rk_size_and_alignment
|
||||||
|
|
||||||
|
#endif // RK_ENGINE_CMP_MEMCPY_H
|
@ -18,6 +18,7 @@
|
|||||||
#include "../render.hpp"
|
#include "../render.hpp"
|
||||||
#include "render_opengles.hpp"
|
#include "render_opengles.hpp"
|
||||||
#include "../display/display_glx.hpp"
|
#include "../display/display_glx.hpp"
|
||||||
|
#include "../cmp_memcpy.hpp"
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
@ -28,32 +29,6 @@ typedef void (*rk_MultiDrawElementsIndirectFunc)(rk_uint, rk_uint, const void *,
|
|||||||
static rk_DrawElementsInstancedBaseInstanceFunc rk_DrawElementsInstancedBaseInstance = nullptr;
|
static rk_DrawElementsInstancedBaseInstanceFunc rk_DrawElementsInstancedBaseInstance = nullptr;
|
||||||
static rk_MultiDrawElementsIndirectFunc rk_MultiDrawElementsIndirect = nullptr;
|
static rk_MultiDrawElementsIndirectFunc rk_MultiDrawElementsIndirect = nullptr;
|
||||||
|
|
||||||
static bool rk_compare_replace(
|
|
||||||
void * __restrict _dst,
|
|
||||||
void const * __restrict _src,
|
|
||||||
unsigned const size) {
|
|
||||||
rk_ulong hash = 0;
|
|
||||||
rk_ulong * dst = reinterpret_cast<rk_ulong *>(_dst);
|
|
||||||
rk_ulong const * src = reinterpret_cast<rk_ulong const *>(_src);
|
|
||||||
unsigned count = size / sizeof(rk_ulong);
|
|
||||||
unsigned remain = (size - count * sizeof(rk_ulong));
|
|
||||||
if (count) {
|
|
||||||
do {
|
|
||||||
hash |= *dst ^ *src;
|
|
||||||
*dst++ = *src++;
|
|
||||||
} while(--count > 0);
|
|
||||||
}
|
|
||||||
if (remain) {
|
|
||||||
rk_ubyte * rdst = reinterpret_cast<rk_ubyte *>(dst);
|
|
||||||
rk_ubyte const * rsrc = reinterpret_cast<rk_ubyte const *>(src);
|
|
||||||
do {
|
|
||||||
hash |= *rdst ^ *rsrc;
|
|
||||||
*rdst++ = *rsrc++;
|
|
||||||
} while(--remain > 0);
|
|
||||||
}
|
|
||||||
return (hash != 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef RK_BUCKETS_SORT
|
#ifdef RK_BUCKETS_SORT
|
||||||
|
|
||||||
struct rk_bucket {
|
struct rk_bucket {
|
||||||
@ -143,7 +118,7 @@ static bool rk_buckets_sort(
|
|||||||
command->base_index = mesh->base_index;
|
command->base_index = mesh->base_index;
|
||||||
command->base_vertex = 0;
|
command->base_vertex = 0;
|
||||||
command->base_instance = indices - batch.indices;
|
command->base_instance = indices - batch.indices;
|
||||||
modified |= rk_compare_replace(indices, bucket->indices, bucket->count * sizeof(rk_ushort));
|
modified |= rk_cmp_memcpy<sizeof(rk_ushort)>(indices, bucket->indices, bucket->count * sizeof(rk_ushort));
|
||||||
indices += bucket->count;
|
indices += bucket->count;
|
||||||
++command;
|
++command;
|
||||||
}
|
}
|
||||||
@ -937,18 +912,19 @@ void rk_fill_batch(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
batch->count = count;
|
batch->count = count;
|
||||||
bool const cmp_flags =
|
bool const cmp_flags = (flags &&
|
||||||
(flags && rk_compare_replace(batch->flags, flags, batch->count * sizeof(rk_instance_flags)));
|
rk_cmp_memcpy<sizeof(rk_ubyte)>(batch->flags, flags, batch->count * sizeof(rk_instance_flags)));
|
||||||
bool const cmp_meshes =
|
bool const cmp_meshes = (meshes &&
|
||||||
(meshes && rk_compare_replace(batch->meshes, meshes, batch->count * sizeof(rk_mesh)));
|
rk_cmp_memcpy<sizeof(rk_ushort)>(batch->meshes, meshes, batch->count * sizeof(rk_mesh)));
|
||||||
bool const need_sorting = (cmp_flags || cmp_meshes || resized);
|
bool const need_sorting = (cmp_flags || cmp_meshes || resized);
|
||||||
if (batch->nparams) {
|
if (batch->nparams) {
|
||||||
rk_parameter const * const last_param = batch->params + batch->nparams;
|
rk_parameter const * const last_param = batch->params + batch->nparams;
|
||||||
if (got_any_params) {
|
if (got_any_params) {
|
||||||
rk_ubyte const * const * src = params;
|
rk_ubyte const * const * src = params;
|
||||||
for (rk_parameter const * dst = batch->params; dst < last_param; ++dst, ++src) {
|
for (rk_parameter const * dst = batch->params; dst < last_param; ++dst, ++src) {
|
||||||
dst->dirty =
|
dst->dirty = ((*src &&
|
||||||
(*src && rk_compare_replace(dst->source, *src, batch->count * dst->src_size)) || need_sorting;
|
rk_cmp_memcpy<sizeof(rk_uint)>(dst->source, *src, batch->count * dst->src_size))
|
||||||
|
|| need_sorting);
|
||||||
}
|
}
|
||||||
} else if (need_sorting) {
|
} else if (need_sorting) {
|
||||||
for (rk_parameter const * dst = batch->params; dst < last_param; ++dst) {
|
for (rk_parameter const * dst = batch->params; dst < last_param; ++dst) {
|
||||||
|
@ -33,6 +33,8 @@ typedef int32_t rk_int;
|
|||||||
typedef uint32_t rk_uint;
|
typedef uint32_t rk_uint;
|
||||||
typedef int64_t rk_long;
|
typedef int64_t rk_long;
|
||||||
typedef uint64_t rk_ulong;
|
typedef uint64_t rk_ulong;
|
||||||
|
typedef __int128 rk_llong;
|
||||||
|
typedef unsigned __int128 rk_ullong;
|
||||||
typedef float rk_float;
|
typedef float rk_float;
|
||||||
typedef void * rk_handle_t;
|
typedef void * rk_handle_t;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user