Compare commits
2 Commits
a5adfacdfd
...
558ec08614
Author | SHA1 | Date | |
---|---|---|---|
558ec08614 | |||
211762c279 |
2
Makefile
2
Makefile
@ -21,7 +21,7 @@ cpp/math.cpp
|
||||
|
||||
OUTPUTFILE = engine.so
|
||||
|
||||
CXXFLAGS = -Wall -Werror -O2 -march=x86-64 -mtune=generic -msse2 -ffast-math -fpic -flto -fno-rtti -fno-exceptions
|
||||
CXXFLAGS = -Wall -Werror -O2 -msse2 -ffast-math -fpic -flto -fno-rtti -fno-exceptions
|
||||
|
||||
.PHONY: all
|
||||
all: clean $(OUTPUTFILE)
|
||||
|
@ -13,9 +13,12 @@
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#define RK_BUCKETS_SORT
|
||||
|
||||
#include "../render.hpp"
|
||||
#include "render_opengles.hpp"
|
||||
#include "../display/display_glx.hpp"
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
||||
@ -25,6 +28,141 @@ typedef void (*rk_MultiDrawElementsIndirectFunc)(rk_uint, rk_uint, const void *,
|
||||
static rk_DrawElementsInstancedBaseInstanceFunc rk_DrawElementsInstancedBaseInstance = nullptr;
|
||||
static rk_MultiDrawElementsIndirectFunc rk_MultiDrawElementsIndirect = nullptr;
|
||||
|
||||
static bool rk_compare_replace(
|
||||
void * __restrict _dst,
|
||||
void const * __restrict _src,
|
||||
unsigned const size) {
|
||||
rk_ulong hash = 0;
|
||||
rk_ulong * dst = reinterpret_cast<rk_ulong *>(_dst);
|
||||
rk_ulong const * src = reinterpret_cast<rk_ulong const *>(_src);
|
||||
unsigned count = size / sizeof(rk_ulong);
|
||||
unsigned remain = (size - count * sizeof(rk_ulong));
|
||||
if (count) {
|
||||
do {
|
||||
hash |= *dst ^ *src;
|
||||
*dst++ = *src++;
|
||||
} while(--count > 0);
|
||||
}
|
||||
if (remain) {
|
||||
rk_ubyte * rdst = reinterpret_cast<rk_ubyte *>(dst);
|
||||
rk_ubyte const * rsrc = reinterpret_cast<rk_ubyte const *>(src);
|
||||
do {
|
||||
hash |= *rdst ^ *rsrc;
|
||||
*rdst++ = *rsrc++;
|
||||
} while(--remain > 0);
|
||||
}
|
||||
return (hash != 0);
|
||||
}
|
||||
|
||||
#ifdef RK_BUCKETS_SORT
|
||||
|
||||
struct rk_bucket {
|
||||
unsigned size;
|
||||
unsigned count;
|
||||
rk_ushort * indices;
|
||||
};
|
||||
|
||||
static unsigned rk_nbuckets = 0;
|
||||
static rk_bucket * rk_buckets = nullptr;
|
||||
|
||||
static void rk_buckets_alloc(
|
||||
rk_batch const & batch) {
|
||||
unsigned const count = batch.vertices->nmeshes;
|
||||
unsigned const size = batch.max_size / count;
|
||||
if (!rk_nbuckets) {
|
||||
rk_nbuckets = count;
|
||||
rk_buckets = reinterpret_cast<rk_bucket *>(malloc(count * sizeof(rk_bucket)));
|
||||
for (unsigned index = 0; index < count; ++index) {
|
||||
rk_bucket & bucket = rk_buckets[index];
|
||||
bucket.size = size;
|
||||
bucket.indices = reinterpret_cast<rk_ushort *>(malloc(size * sizeof(rk_ushort)));
|
||||
}
|
||||
}
|
||||
else if (count <= rk_nbuckets) {
|
||||
for (unsigned index = 0; index < count; ++index) {
|
||||
rk_bucket & bucket = rk_buckets[index];
|
||||
if (bucket.size < size) {
|
||||
bucket.size = size;
|
||||
bucket.indices = reinterpret_cast<rk_ushort *>(realloc(bucket.indices, size * sizeof(rk_ushort)));
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
rk_buckets = reinterpret_cast<rk_bucket *>(realloc(rk_buckets, count * sizeof(rk_bucket)));
|
||||
for (unsigned index = 0; index < rk_nbuckets; ++index) {
|
||||
rk_bucket & bucket = rk_buckets[index];
|
||||
if (bucket.size < size) {
|
||||
bucket.size = size;
|
||||
bucket.indices = reinterpret_cast<rk_ushort *>(realloc(bucket.indices, size * sizeof(rk_ushort)));
|
||||
}
|
||||
}
|
||||
for (unsigned index = rk_nbuckets; index < count; ++index) {
|
||||
rk_bucket & bucket = rk_buckets[index];
|
||||
bucket.size = size;
|
||||
bucket.indices = reinterpret_cast<rk_ushort *>(malloc(size * sizeof(rk_ushort)));
|
||||
}
|
||||
rk_nbuckets = count;
|
||||
}
|
||||
unsigned total_size = rk_nbuckets * sizeof(rk_bucket);
|
||||
for (unsigned index = 0; index < rk_nbuckets; ++index) {
|
||||
rk_bucket const & bucket = rk_buckets[index];
|
||||
total_size += bucket.size * sizeof(rk_ushort);
|
||||
}
|
||||
printf("[RK] rk_buckets_alloc() -> %d KiB\n", total_size / 1024);
|
||||
}
|
||||
|
||||
static bool rk_buckets_sort(
|
||||
rk_batch const & batch) {
|
||||
bool reallocated = false;
|
||||
rk_bucket const * const last_bucket = rk_buckets + batch.vertices->nmeshes;
|
||||
for (rk_bucket * bucket = rk_buckets; bucket < last_bucket; ++bucket) {
|
||||
bucket->count = 0;
|
||||
}
|
||||
rk_instance_flags const * flags = batch.flags;
|
||||
rk_ushort const * mesh_index = batch.meshes;
|
||||
for (unsigned index = 0; index < batch.count; ++index, ++flags, ++mesh_index) {
|
||||
if ((*flags & RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) == RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) {
|
||||
rk_bucket & bucket = rk_buckets[*mesh_index];
|
||||
if (bucket.count == bucket.size) {
|
||||
bucket.size *= 2;
|
||||
bucket.indices = reinterpret_cast<rk_ushort *>(
|
||||
realloc(bucket.indices, bucket.size * sizeof(rk_ushort)));
|
||||
reallocated = true;
|
||||
}
|
||||
bucket.indices[bucket.count++] = index;
|
||||
}
|
||||
}
|
||||
bool modified = false;
|
||||
rk_ushort * indices = batch.indices;
|
||||
rk_command * command = batch.commands;
|
||||
rk_mesh const * mesh = batch.vertices->meshes;
|
||||
for (rk_bucket const * bucket = rk_buckets; bucket < last_bucket; ++bucket, ++mesh) {
|
||||
if (bucket->count) {
|
||||
command->nvertices = static_cast<GLuint>(mesh->ntriangles) * 3;
|
||||
command->ninstances = bucket->count;
|
||||
command->base_index = mesh->base_index;
|
||||
command->base_vertex = 0;
|
||||
command->base_instance = indices - batch.indices;
|
||||
modified |= rk_compare_replace(indices, bucket->indices, bucket->count * sizeof(rk_ushort));
|
||||
indices += bucket->count;
|
||||
++command;
|
||||
}
|
||||
}
|
||||
batch.ninstances = indices - batch.indices;
|
||||
batch.ncommands = command - batch.commands;
|
||||
if (reallocated) {
|
||||
unsigned total_size = rk_nbuckets * sizeof(rk_bucket);
|
||||
for (unsigned index = 0; index < rk_nbuckets; ++index) {
|
||||
rk_bucket const & bucket = rk_buckets[index];
|
||||
total_size += bucket.size * sizeof(rk_ushort);
|
||||
}
|
||||
printf("[RK] rk_buckets_sort() -> %d KiB\n", total_size / 1024);
|
||||
}
|
||||
return modified;
|
||||
}
|
||||
|
||||
#endif // RK_BUCKETS_SORT
|
||||
|
||||
static void rk_gl_printf(char const * message) {
|
||||
printf("[GL] %s\n", message);
|
||||
}
|
||||
@ -687,11 +825,20 @@ rk_batch_t rk_create_batch(
|
||||
}
|
||||
}
|
||||
glBindVertexArray(0);
|
||||
#ifdef RK_BUCKETS_SORT
|
||||
rk_buckets_alloc(*batch);
|
||||
#endif
|
||||
return reinterpret_cast<rk_batch_t>(batch);
|
||||
}
|
||||
|
||||
static void rk_sort_batch(
|
||||
rk_batch const & batch) {
|
||||
#ifdef RK_BUCKETS_SORT
|
||||
bool const modified = rk_buckets_sort(batch);
|
||||
if (modified && rk_MultiDrawElementsIndirect) {
|
||||
glBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, batch.ncommands * sizeof(rk_command), batch.commands);
|
||||
}
|
||||
#else
|
||||
rk_instance_flags const * flags = batch.flags;
|
||||
rk_ushort * indices = batch.indices;
|
||||
for (unsigned index = 0; index < batch.count; ++index, ++flags) {
|
||||
@ -729,6 +876,7 @@ static void rk_sort_batch(
|
||||
glBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, batch.ncommands * sizeof(rk_command), batch.commands);
|
||||
}
|
||||
}
|
||||
#endif // RK_BUCKETS_SORT
|
||||
batch.state = RK_BATCH_STATE_SORTED;
|
||||
}
|
||||
|
||||
@ -755,32 +903,6 @@ static void rk_pack_batch(
|
||||
batch.state = RK_BATCH_STATE_PACKED;
|
||||
}
|
||||
|
||||
static bool rk_compare_replace(
|
||||
void * __restrict _dst,
|
||||
void const * __restrict _src,
|
||||
unsigned const size) {
|
||||
rk_ulong hash = 0;
|
||||
rk_ulong * dst = reinterpret_cast<rk_ulong *>(_dst);
|
||||
rk_ulong const * src = reinterpret_cast<rk_ulong const *>(_src);
|
||||
unsigned count = size / sizeof(rk_ulong);
|
||||
unsigned remain = (size - count * sizeof(rk_ulong));
|
||||
if (count) {
|
||||
do {
|
||||
hash |= *dst ^ *src;
|
||||
*dst++ = *src++;
|
||||
} while(--count > 0);
|
||||
}
|
||||
rk_ubyte * rdst = reinterpret_cast<rk_ubyte *>(dst);
|
||||
rk_ubyte const * rsrc = reinterpret_cast<rk_ubyte const *>(src);
|
||||
if (remain) {
|
||||
do {
|
||||
hash |= *rdst ^ *rsrc;
|
||||
*rdst++ = *rsrc++;
|
||||
} while(--remain > 0);
|
||||
}
|
||||
return (hash != 0);
|
||||
}
|
||||
|
||||
void rk_fill_batch(
|
||||
rk_batch_t _batch,
|
||||
rk_uint count,
|
||||
|
Loading…
Reference in New Issue
Block a user