// Copyright (C) 2022 RozK // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . #include "../render.hpp" #include "render_opengles.hpp" #include "../display/display_glx.hpp" #include "../cmp_memcpy.hpp" #include #include #include typedef void (*rk_DrawElementsInstancedBaseInstanceFunc)(rk_uint, rk_uint, rk_uint, const void *, rk_uint, rk_uint); typedef void (*rk_MultiDrawElementsIndirectFunc)(rk_uint, rk_uint, const void *, rk_uint, rk_uint); static rk_DrawElementsInstancedBaseInstanceFunc rk_DrawElementsInstancedBaseInstance = nullptr; static rk_MultiDrawElementsIndirectFunc rk_MultiDrawElementsIndirect = nullptr; struct rk_bucket { unsigned size; unsigned count; rk_ushort * indices; }; static unsigned rk_nbuckets = 0; static rk_bucket * rk_buckets = nullptr; static void rk_gl_printf(char const * message) { printf("[GL] %s\n", message); } static void rk_printf(char const * message) { printf("[RK] %s\n", message); } static void rk_debug_message_callback( GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, GLchar const * message, void const * userParam) { printf("[GL] (id=%d) %s\n", id, message); } void rk_render_initialize( rk_bool debug) { GLubyte const * const vendor = glGetString(GL_VENDOR); GLubyte const * const renderer = glGetString(GL_RENDERER); printf("[GL] vendor: %s, renderer: %s\n", vendor, renderer); GLubyte const * const version = glGetString(GL_VERSION); GLubyte const * const language = glGetString(GL_SHADING_LANGUAGE_VERSION); printf("[GL] version: %s, language: %s\n", version, language); if (debug) { GLint max_texture_layers = 0; glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &max_texture_layers); printf("[GL] Max texture layers: %d\n", max_texture_layers); GLint max_vertex_attribs = 0; glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_vertex_attribs); printf("[GL] Max vertex attribs: %d\n", max_vertex_attribs); GLint max_vertex_bindings = 0; glGetIntegerv(GL_MAX_VERTEX_ATTRIB_BINDINGS, &max_vertex_bindings); printf("[GL] Max vertex bindings: %d\n", max_vertex_bindings); glDebugMessageCallback(rk_debug_message_callback, nullptr); glEnable(GL_DEBUG_OUTPUT); } char const * const gl_exts = reinterpret_cast(glGetString(GL_EXTENSIONS)); // printf("[GL] %s\n", gl_exts); rk_DrawElementsInstancedBaseInstance = reinterpret_cast( rk_resolve_extension(gl_exts, "GL_EXT_base_instance", "DrawElementsInstancedBaseInstance")); if (rk_DrawElementsInstancedBaseInstance) { rk_gl_printf("Using extension GL_EXT_base_instance::DrawElementsInstancedBaseInstance."); rk_MultiDrawElementsIndirect = reinterpret_cast( rk_resolve_extension(gl_exts, "GL_EXT_multi_draw_indirect", "MultiDrawElementsIndirectEXT")); if (rk_MultiDrawElementsIndirect) { rk_gl_printf("Using extension GL_EXT_multi_draw_indirect::MultiDrawElementsIndirectEXT."); } } glDisable(GL_BLEND); glDisable(GL_DITHER); glEnable(GL_DEPTH_TEST); glDisable(GL_SCISSOR_TEST); glDisable(GL_STENCIL_TEST); glEnable(GL_CULL_FACE); glFrontFace(GL_CCW); glCullFace(GL_BACK); glHint(GL_GENERATE_MIPMAP_HINT, GL_NICEST); } void rk_render_terminate() { } static char rk_infolog[1024]; static void rk_print_shader_infolog(GLuint shader) { GLsizei length; glGetShaderInfoLog(shader, sizeof(rk_infolog) - 1, &length, rk_infolog); if (length > 0) { rk_gl_printf(rk_infolog); } } static void rk_print_program_infolog(GLuint program) { GLsizei length; glGetProgramInfoLog(program, sizeof(rk_infolog) - 1, &length, rk_infolog); if (length > 0) { rk_gl_printf(rk_infolog); } } rk_shader_t rk_create_shader( rk_uint const vert_nlines, rk_char const * const * vert_lines, rk_uint const frag_nlines, rk_char const * const * const frag_lines) { if (!vert_nlines || !vert_lines || !frag_nlines || !frag_lines) { rk_printf("rk_load_shader(): invalid params."); return RK_INVALID_HANDLE; } rk_printf("Compiling vertex shader..."); while (glGetError() != GL_NO_ERROR); GLuint const vert = glCreateShader(GL_VERTEX_SHADER); glShaderSource(vert, vert_nlines, vert_lines, nullptr); glCompileShader(vert); GLint vert_success = 0; glGetShaderiv(vert, GL_COMPILE_STATUS, &vert_success); if (!vert_success || glGetError() != GL_NO_ERROR) { rk_print_shader_infolog(vert); glDeleteShader(vert); return RK_INVALID_HANDLE; } rk_printf("Compiling fragment shader..."); while (glGetError() != GL_NO_ERROR); GLuint const frag = glCreateShader(GL_FRAGMENT_SHADER); glShaderSource(frag, frag_nlines, frag_lines, nullptr); glCompileShader(frag); GLint frag_success = 0; glGetShaderiv(frag, GL_COMPILE_STATUS, &frag_success); if (!frag_success || glGetError() != GL_NO_ERROR) { rk_print_shader_infolog(frag); glDeleteShader(vert); glDeleteShader(frag); return RK_INVALID_HANDLE; } rk_printf("Linking program..."); while (glGetError() != GL_NO_ERROR); GLuint prog = glCreateProgram(); glAttachShader(prog, vert); glAttachShader(prog, frag); glLinkProgram(prog); GLint prog_success = 0; glGetProgramiv(prog, GL_LINK_STATUS, &prog_success); if (!prog_success || glGetError() != GL_NO_ERROR) { rk_print_program_infolog(prog); glDeleteShader(vert); glDeleteShader(frag); glDeleteProgram(prog); return RK_INVALID_HANDLE; } rk_printf("Done."); glReleaseShaderCompiler(); rk_shader * const shader = new rk_shader; shader->vertex = vert; shader->fragment = frag; shader->program = prog; return reinterpret_cast(shader); } rk_input_t rk_resolve_input( rk_shader_t _shader, rk_char const * name) { rk_shader const * const shader = reinterpret_cast(_shader); if (!shader || !name) { rk_printf("rk_resolve_input(): invalid params."); return RK_INVALID_HANDLE; } GLint const uniform = glGetUniformLocation(shader->program, name); if (uniform < 0) { printf("[RK] rk_resolve_input(): uniform %s not found.\n", name); } return reinterpret_cast(uniform + 1); } rk_param_t rk_resolve_param( rk_shader_t _shader, rk_char const * name) { rk_shader const * const shader = reinterpret_cast(_shader); if (!shader || !name) { rk_printf("rk_resolve_param(): invalid params."); return RK_INVALID_HANDLE; } GLint const location = glGetAttribLocation(shader->program, name); if (location < 0) { printf("[RK] rk_resolve_param(): attrib %s not found.\n", name); } return reinterpret_cast(location + 1); } rk_texture_t rk_create_texture( rk_texture_format format, rk_uint width, rk_uint height, rk_uint nlevels, rk_texture_flags flags, rk_ubyte const * pixels) { if (!width || !height || !pixels) { rk_printf("rk_create_texture(): invalid params."); return RK_INVALID_HANDLE; } GLint internal_format; GLenum source_format; GLenum source_type; switch (format) { case RK_TEXTURE_FORMAT_SRGB8_A8: internal_format = GL_SRGB8_ALPHA8; source_format = GL_RGBA; source_type = GL_UNSIGNED_BYTE; break; case RK_TEXTURE_FORMAT_RGBA8: internal_format = GL_RGBA8; source_format = GL_RGBA; source_type = GL_UNSIGNED_BYTE; break; case RK_TEXTURE_FORMAT_RGB10_A2: internal_format = GL_RGB10_A2; source_format = GL_RGBA; source_type = GL_UNSIGNED_INT_2_10_10_10_REV; break; case RK_TEXTURE_FORMAT_FLOAT_32: internal_format = GL_R32F; source_format = GL_RED; source_type = GL_FLOAT; break; default: rk_printf("rk_create_texture(): invalid texture format."); return RK_INVALID_HANDLE; break; } rk_texture * const texture = new rk_texture; glGenTextures(1, &texture->texture); GLenum target; if (nlevels) { if (flags & RK_TEXTURE_FLAG_3D) { target = GL_TEXTURE_3D; } else { target = GL_TEXTURE_2D_ARRAY; } glBindTexture(target, texture->texture); //TODO: glTexStorage3D glTexImage3D(target, 0, internal_format, width, height, nlevels, 0, source_format, source_type, pixels); } else { target = GL_TEXTURE_2D; glBindTexture(target, texture->texture); //TODO: glTexStorage2D glTexImage2D(target, 0, internal_format, width, height, 0, source_format, source_type, pixels); } if (flags & RK_TEXTURE_FLAG_MIPMAPS) { if (flags & RK_TEXTURE_FLAG_MIN_LINEAR) { glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); } else { glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_NEAREST_MIPMAP_LINEAR); } } else { if (flags & RK_TEXTURE_FLAG_MIN_LINEAR) { glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); } else { glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_NEAREST); } } if (flags & RK_TEXTURE_FLAG_MAG_LINEAR) { glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); } else { glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_NEAREST); } glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_REPEAT); glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_REPEAT); glTexParameteri(target, GL_TEXTURE_WRAP_R, GL_REPEAT); if (flags & RK_TEXTURE_FLAG_MIPMAPS) { glGenerateMipmap(target); } texture->nlevels = nlevels; glBindTexture(target, 0); return reinterpret_cast(texture); } rk_triangles_t rk_create_triangles( rk_uint nvertices, rk_vec3 const * vertices) { if (!nvertices || !vertices) { rk_printf("rk_create_triangles(): invalid params."); return RK_INVALID_HANDLE; } rk_triangles * const triangles = new rk_triangles; triangles->size = nvertices; glGenVertexArrays(1, &triangles->array); glBindVertexArray(triangles->array); glGenBuffers(1, &triangles->vertices); glBindBuffer(GL_ARRAY_BUFFER, triangles->vertices); glBufferData(GL_ARRAY_BUFFER, nvertices * sizeof(rk_vec3), vertices, GL_STATIC_DRAW); glEnableVertexAttribArray(0); glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 0, 0); glBindBuffer(GL_ARRAY_BUFFER, 0); glBindVertexArray(0); return reinterpret_cast(triangles); } rk_vertices_t rk_create_vertices( rk_vertex_format const * format, rk_uint nvertices, rk_ubyte const * _vertices, rk_uint nindices, rk_ushort const * indices, rk_uint nmeshes, rk_mesh const * meshes) { if (!format || !nvertices || !_vertices || !nindices || !indices) { rk_printf("rk_create_vertices(): invalid params."); return RK_INVALID_HANDLE; } unsigned format_size = 0; unsigned vertex_size = 0; for (rk_vertex_format const * f = format; *f; ++f, ++format_size) { switch (*f & RK_VERTEX_FORMAT_MASK) { case RK_VERTEX_FORMAT_VEC3_FLOAT: vertex_size += sizeof(rk_vec3_float); break; case RK_VERTEX_FORMAT_VEC3_INT10: vertex_size += sizeof(rk_vec3_int10); break; case RK_VERTEX_FORMAT_VEC3_UINT10: vertex_size += sizeof(rk_vec3_uint10); break; default: rk_printf("rk_create_vertices(): invalid vertex format."); return RK_INVALID_HANDLE; break; } } if (!format_size) { rk_printf("rk_create_vertices(): empty vertex format."); return RK_INVALID_HANDLE; } rk_vertices * const vertices = new rk_vertices; vertices->nvertices = nvertices; vertices->nindices = nindices; vertices->nmeshes = nmeshes; vertices->format = new rk_vertex_format[format_size + 1]; memcpy(vertices->format, format, (format_size + 1) * sizeof(rk_vertex_format)); vertices->vertices = new rk_ubyte[nvertices * vertex_size]; memcpy(vertices->vertices, _vertices, nvertices * vertex_size); vertices->indices = new rk_ushort[nindices]; memcpy(vertices->indices, indices, nindices * sizeof(rk_ushort)); vertices->meshes = new rk_mesh[nmeshes]; memcpy(vertices->meshes, meshes, nmeshes * sizeof(rk_mesh)); vertices->vertices_buffer = 0; vertices->indices_buffer = 0; return reinterpret_cast(vertices); } static void rk_buckets_alloc( rk_batch const & batch) { unsigned const count = batch.vertices->nmeshes; unsigned const size = batch.max_size; bool reallocated = false; if (!rk_nbuckets) { rk_nbuckets = count; rk_buckets = reinterpret_cast(malloc(count * sizeof(rk_bucket))); for (unsigned index = 0; index < count; ++index) { rk_bucket & bucket = rk_buckets[index]; bucket.size = size; bucket.indices = reinterpret_cast(malloc(size * sizeof(rk_ushort))); } reallocated = true; } else if (count <= rk_nbuckets) { for (unsigned index = 0; index < count; ++index) { rk_bucket & bucket = rk_buckets[index]; if (bucket.size < size) { bucket.size = size; bucket.indices = reinterpret_cast(realloc(bucket.indices, size * sizeof(rk_ushort))); reallocated = true; } } } else { rk_buckets = reinterpret_cast(realloc(rk_buckets, count * sizeof(rk_bucket))); for (unsigned index = 0; index < rk_nbuckets; ++index) { rk_bucket & bucket = rk_buckets[index]; if (bucket.size < size) { bucket.size = size; bucket.indices = reinterpret_cast(realloc(bucket.indices, size * sizeof(rk_ushort))); } } for (unsigned index = rk_nbuckets; index < count; ++index) { rk_bucket & bucket = rk_buckets[index]; bucket.size = size; bucket.indices = reinterpret_cast(malloc(size * sizeof(rk_ushort))); } rk_nbuckets = count; reallocated = true; } if (reallocated) { unsigned total_size = rk_nbuckets * sizeof(rk_bucket); for (unsigned index = 0; index < rk_nbuckets; ++index) { rk_bucket const & bucket = rk_buckets[index]; total_size += bucket.size * sizeof(rk_ushort); } printf("[RK] rk_buckets_alloc() -> %d KiB\n", total_size / 1024); } } static void rk_pack_vec3_float( unsigned const count, rk_ushort const * const __restrict indices, rk_ubyte * __restrict _dst, rk_ubyte const * const __restrict _src) { rk_ushort const * const last_index = indices + count; rk_vec3_float * __restrict dst = reinterpret_cast(_dst); rk_vec3_float const * const __restrict src = reinterpret_cast(_src); for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { *dst = src[*index]; } } static void rk_pack_vec3_short( unsigned const count, rk_ushort const * const __restrict indices, rk_ubyte * __restrict _dst, rk_ubyte const * const __restrict _src) { rk_ushort const * const last_index = indices + count; rk_vec3_short * __restrict dst = reinterpret_cast(_dst); rk_vec3_float const * const __restrict src = reinterpret_cast(_src); for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { rk_vec3_float const & input = src[*index]; dst->x = static_cast(input.x); dst->y = static_cast(input.y); dst->z = static_cast(input.z); dst->pad = 0; } } static void rk_pack_vec3_short_norm( unsigned const count, rk_ushort const * const __restrict indices, rk_ubyte * __restrict _dst, rk_ubyte const * const __restrict _src) { rk_ushort const * const last_index = indices + count; rk_vec3_short * __restrict dst = reinterpret_cast(_dst); rk_vec3_float const * const __restrict src = reinterpret_cast(_src); #define _convert(s) (static_cast((s) * ((s) < 0.f ? 32768.f : 32767.f))) for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { rk_vec3_float const & input = src[*index]; dst->x = _convert(input.x); dst->y = _convert(input.y); dst->z = _convert(input.z); dst->pad = 0; } #undef _convert } static void rk_pack_vec3_int10( unsigned const count, rk_ushort const * const __restrict indices, rk_ubyte * __restrict _dst, rk_ubyte const * const __restrict _src) { rk_ushort const * const last_index = indices + count; rk_vec3_int10 * __restrict dst = reinterpret_cast(_dst); rk_vec3_float const * const __restrict src = reinterpret_cast(_src); #define _convert(s) (static_cast((s)) & 1023) for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { rk_vec3_float const & input = src[*index]; *dst = _convert(input.x) | (_convert(input.y) << 10) | (_convert(input.z) << 20); } #undef _convert } static void rk_pack_vec3_int10_norm( unsigned const count, rk_ushort const * const __restrict indices, rk_ubyte * __restrict _dst, rk_ubyte const * const __restrict _src) { rk_ushort const * const last_index = indices + count; rk_vec3_int10 * __restrict dst = reinterpret_cast(_dst); rk_vec3_float const * const __restrict src = reinterpret_cast(_src); #define _convert(s) (static_cast((s) * ((s) < 0.f ? 512.f : 511.f)) & 1023) for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { rk_vec3_float const & input = src[*index]; *dst = _convert(input.x) | (_convert(input.y) << 10) | (_convert(input.z) << 20); } #undef _convert } static void rk_pack_mat3_float( unsigned const count, rk_ushort const * const __restrict indices, rk_ubyte * __restrict _dst, rk_ubyte const * const __restrict _src) { rk_ushort const * const last_index = indices + count; rk_mat3_float * __restrict dst = reinterpret_cast(_dst); rk_mat3_float const * const __restrict src = reinterpret_cast(_src); for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { *dst = src[*index]; } #undef _convert } static void rk_pack_mat3_int10( unsigned const count, rk_ushort const * const __restrict indices, rk_ubyte * __restrict _dst, rk_ubyte const * const __restrict _src) { rk_ushort const * const last_index = indices + count; rk_mat3_int10 * __restrict dst = reinterpret_cast(_dst); rk_mat3_float const * const __restrict src = reinterpret_cast(_src); #define _convert(s) (static_cast((s)) & 1023) for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { rk_mat3_float const & input = src[*index]; dst->x = _convert(input.x.x) | (_convert(input.x.y) << 10) | (_convert(input.x.z) << 20); dst->y = _convert(input.y.x) | (_convert(input.y.y) << 10) | (_convert(input.y.z) << 20); dst->z = _convert(input.z.x) | (_convert(input.z.y) << 10) | (_convert(input.z.z) << 20); } #undef _convert } static void rk_pack_mat3_int10_norm( unsigned const count, rk_ushort const * const __restrict indices, rk_ubyte * __restrict _dst, rk_ubyte const * const __restrict _src) { rk_ushort const * const last_index = indices + count; rk_mat3_int10 * __restrict dst = reinterpret_cast(_dst); rk_mat3_float const * const __restrict src = reinterpret_cast(_src); #define _convert(s) (static_cast((s) * ((s) < 0.f ? 512.f : 511.f)) & 1023) for (rk_ushort const * __restrict index = indices; index < last_index; ++index, ++dst) { rk_mat3_float const & input = src[*index]; dst->x = _convert(input.x.x) | (_convert(input.x.y) << 10) | (_convert(input.x.z) << 20); dst->y = _convert(input.y.x) | (_convert(input.y.y) << 10) | (_convert(input.y.z) << 20); dst->z = _convert(input.z.x) | (_convert(input.z.y) << 10) | (_convert(input.z.z) << 20); } #undef _convert } rk_batch_t rk_create_batch( rk_vertices_t _vertices, rk_uint max_size, rk_param_format const * params_format) { rk_vertices * const vertices = reinterpret_cast(_vertices); if (!vertices || !max_size || max_size > RK_BATCH_MAX_SIZE) { rk_printf("rk_create_batch(): invalid params."); return RK_INVALID_HANDLE; } unsigned vertex_size = 0; for (rk_vertex_format const * f = vertices->format; *f; ++f) { switch (*f & RK_VERTEX_FORMAT_MASK) { case RK_VERTEX_FORMAT_VEC3_FLOAT: vertex_size += sizeof(rk_vec3_float); break; case RK_VERTEX_FORMAT_VEC3_INT10: vertex_size += sizeof(rk_vec3_int10); break; case RK_VERTEX_FORMAT_VEC3_UINT10: vertex_size += sizeof(rk_vec3_uint10); break; } } unsigned nparams = 0; unsigned params_size = 0; if (params_format) { for (rk_param_format const * f = params_format; *f; ++f, ++nparams) { switch (*f & RK_PARAM_FORMAT_MASK) { case RK_PARAM_FORMAT_VEC3_FLOAT: params_size += sizeof(rk_vec3_float); break; case RK_PARAM_FORMAT_VEC3_SHORT: params_size += sizeof(rk_vec3_short); break; case RK_PARAM_FORMAT_VEC3_INT10: params_size += sizeof(rk_vec3_int10); break; case RK_PARAM_FORMAT_MAT3_FLOAT: params_size += sizeof(rk_mat3_float); break; case RK_PARAM_FORMAT_MAT3_INT10: params_size += sizeof(rk_mat3_int10); break; default: rk_printf("rk_create_batch(): invalid param format."); return RK_INVALID_HANDLE; break; } } } rk_batch * batch = new rk_batch; batch->state = RK_BATCH_STATE_EMPTY; batch->count = 0; batch->ncommands = 0; batch->ninstances = 0; batch->max_size = max_size; batch->nparams = nparams; batch->vertices = vertices; batch->flags = new rk_instance_flags[max_size]; batch->meshes = new rk_ushort[max_size]; batch->indices = new rk_ushort[max_size]; memset(batch->indices, 0xFF, max_size * sizeof(rk_ushort)); batch->commands = new rk_command[vertices->nmeshes]; memset(batch->commands, 0, vertices->nmeshes * sizeof(rk_command)); if (nparams) { batch->params = new rk_parameter[nparams]; } else { batch->params = nullptr; } glGenVertexArrays(1, &batch->vertex_array); glBindVertexArray(batch->vertex_array); if (!vertices->vertices_buffer) { glGenBuffers(1, &vertices->vertices_buffer); glBindBuffer(GL_ARRAY_BUFFER, vertices->vertices_buffer); glBufferData(GL_ARRAY_BUFFER, vertices->nvertices * vertex_size, vertices->vertices, GL_STATIC_DRAW); glBindBuffer(GL_ARRAY_BUFFER, 0); } if (vertices->indices_buffer) { glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vertices->indices_buffer); } else { glGenBuffers(1, &vertices->indices_buffer); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vertices->indices_buffer); glBufferData(GL_ELEMENT_ARRAY_BUFFER, vertices->nindices * sizeof(rk_ushort), vertices->indices, GL_STATIC_DRAW); } if (rk_MultiDrawElementsIndirect) { glGenBuffers(1, &batch->commands_buffer); glBindBuffer(GL_DRAW_INDIRECT_BUFFER, batch->commands_buffer); glBufferData(GL_DRAW_INDIRECT_BUFFER, vertices->nmeshes * sizeof(rk_command), nullptr, GL_DYNAMIC_DRAW); glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); } if (nparams) { glGenBuffers(1, &batch->params_buffer); glBindBuffer(GL_ARRAY_BUFFER, batch->params_buffer); glBufferData(GL_ARRAY_BUFFER, max_size * params_size, nullptr, GL_DYNAMIC_DRAW); glBindBuffer(GL_ARRAY_BUFFER, 0); } unsigned binding = 0; unsigned attrib = 0; unsigned offset = 0; glBindVertexBuffer(binding, vertices->vertices_buffer, 0, vertex_size); for (rk_vertex_format const * f = vertices->format; *f; ++f) { GLboolean const norm = (*f & RK_VERTEX_FORMAT_NORMALIZE) != 0; switch (*f & RK_VERTEX_FORMAT_MASK) { case RK_VERTEX_FORMAT_VEC3_FLOAT: glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, offset); glVertexAttribBinding(attrib++, binding); offset += sizeof(rk_vec3_float); break; case RK_VERTEX_FORMAT_VEC3_INT10: glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, offset); glVertexAttribBinding(attrib++, binding); offset += sizeof(rk_vec3_int10); break; case RK_VERTEX_FORMAT_VEC3_UINT10: glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 4, GL_UNSIGNED_INT_2_10_10_10_REV, norm, offset); glVertexAttribBinding(attrib++, binding); offset += sizeof(rk_vec3_uint10); break; } } binding += 1; if (nparams) { offset = 0; rk_parameter * param = batch->params; for (rk_param_format const * f = params_format; *f; ++f, ++param, ++binding) { GLboolean const norm = (*f & RK_PARAM_FORMAT_NORMALIZE) != 0; param->dirty = false; param->binding = binding; param->offset = offset; switch (*f & RK_PARAM_FORMAT_MASK) { case RK_PARAM_FORMAT_VEC3_FLOAT: param->src_size = sizeof(rk_vec3); param->dst_size = sizeof(rk_vec3_float); param->packer = rk_pack_vec3_float; glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->dst_size); glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, 0); glVertexAttribBinding(attrib++, binding); break; case RK_PARAM_FORMAT_VEC3_SHORT: param->src_size = sizeof(rk_vec3); param->dst_size = sizeof(rk_vec3_short); param->packer = norm ? rk_pack_vec3_short_norm : rk_pack_vec3_short; glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->dst_size); glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 3, GL_SHORT, norm, 0); glVertexAttribBinding(attrib++, binding); break; case RK_PARAM_FORMAT_VEC3_INT10: param->src_size = sizeof(rk_vec3); param->dst_size = sizeof(rk_vec3_int10); param->packer = norm ? rk_pack_vec3_int10_norm : rk_pack_vec3_int10; glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->dst_size); glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, 0); glVertexAttribBinding(attrib++, binding); break; case RK_PARAM_FORMAT_MAT3_FLOAT: param->src_size = sizeof(rk_mat3); param->dst_size = sizeof(rk_mat3_float); param->packer = rk_pack_mat3_float; glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->dst_size); glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, offsetof(rk_mat3_float, x)); glVertexAttribBinding(attrib++, binding); glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, offsetof(rk_mat3_float, y)); glVertexAttribBinding(attrib++, binding); glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 3, GL_FLOAT, GL_FALSE, offsetof(rk_mat3_float, z)); glVertexAttribBinding(attrib++, binding); break; case RK_PARAM_FORMAT_MAT3_INT10: param->src_size = sizeof(rk_mat3); param->dst_size = sizeof(rk_mat3_int10); param->packer = norm ? rk_pack_mat3_int10_norm : rk_pack_mat3_int10; glBindVertexBuffer(binding, batch->params_buffer, param->offset, param->dst_size); glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, offsetof(rk_mat3_int10, x)); glVertexAttribBinding(attrib++, binding); glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, offsetof(rk_mat3_int10, y)); glVertexAttribBinding(attrib++, binding); glEnableVertexAttribArray(attrib); glVertexAttribFormat(attrib, 4, GL_INT_2_10_10_10_REV, norm, offsetof(rk_mat3_int10, z)); glVertexAttribBinding(attrib++, binding); break; } glVertexBindingDivisor(binding, 1); param->source = new rk_ubyte[max_size * param->src_size]; offset += max_size * param->dst_size; } } glBindVertexArray(0); rk_buckets_alloc(*batch); return reinterpret_cast(batch); } static void rk_sort_batch( rk_batch const & batch) { rk_bucket const * const last_bucket = rk_buckets + batch.vertices->nmeshes; for (rk_bucket * __restrict bucket = rk_buckets; bucket < last_bucket; ++bucket) { bucket->count = 0; } rk_instance_flags const * __restrict flags = batch.flags; rk_ushort const * __restrict mesh_index = batch.meshes; for (unsigned index = 0; index < batch.count; ++index, ++flags, ++mesh_index) { if ((*flags & RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) == RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) { rk_bucket & __restrict bucket = rk_buckets[*mesh_index]; bucket.indices[bucket.count++] = index; } } bool modified = false; rk_ushort * __restrict indices = batch.indices; rk_command * __restrict command = batch.commands; rk_mesh const * __restrict mesh = batch.vertices->meshes; for (rk_bucket const * __restrict bucket = rk_buckets; bucket < last_bucket; ++bucket, ++mesh) { if (bucket->count) { command->nvertices = static_cast(mesh->ntriangles) * 3; command->ninstances = bucket->count; command->base_index = mesh->base_index; command->base_instance = indices - batch.indices; modified |= rk_cmp_memcpy(indices, bucket->indices, bucket->count * sizeof(rk_ushort)); indices += bucket->count; ++command; } } unsigned const ninstances = indices - batch.indices; modified |= (ninstances != batch.ninstances); batch.ninstances = ninstances; batch.ncommands = command - batch.commands; if (modified && rk_MultiDrawElementsIndirect) { glBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, batch.ncommands * sizeof(rk_command), batch.commands); } batch.state = RK_BATCH_STATE_SORTED; } static void rk_pack_batch( rk_batch const & batch) { if (batch.nparams) { glBindBuffer(GL_ARRAY_BUFFER, batch.params_buffer); for (rk_parameter const * param = batch.params; param < batch.params + batch.nparams; ++param) { if (param->dirty) { param->dirty = false; if (batch.ninstances) { rk_ubyte * const dst = reinterpret_cast( glMapBufferRange(GL_ARRAY_BUFFER, param->offset, batch.ninstances * param->dst_size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT)); if (dst) { param->packer(batch.ninstances, batch.indices, dst, param->source); glUnmapBuffer(GL_ARRAY_BUFFER); } } } } glBindBuffer(GL_ARRAY_BUFFER, 0); } batch.state = RK_BATCH_STATE_PACKED; } void rk_fill_batch( rk_batch_t _batch, rk_uint count, rk_instance_flags const * flags, rk_ushort const * meshes, rk_ubyte const * const * params) { rk_batch const * const batch = reinterpret_cast(_batch); if (!batch || !count || count > batch->max_size) { rk_printf("rk_fill_batch(): invalid params."); return; } bool got_any_params = false; bool got_all_params = !batch->nparams; if (batch->nparams) { got_all_params = (params != nullptr); if (params) { for (rk_ubyte const * const * param = params; param < params + batch->nparams; ++param) { bool const got_param = (*param != nullptr); got_any_params |= got_param; got_all_params &= got_param; } } } bool const is_empty = (batch->state < RK_BATCH_STATE_FILLED); bool const resized = (count != batch->count); bool const got_everything = (flags && meshes && got_all_params); if (is_empty && !got_everything) { rk_printf("rk_fill_batch(): cannot freeze and empty batch."); return; } else if (count > batch->count && !got_everything) { rk_printf("rk_fill_batch(): cannot grow a frozen batch."); return; } batch->count = count; bool const cmp_flags = (flags && rk_cmp_memcpy(batch->flags, flags, batch->count * sizeof(rk_instance_flags))); bool const cmp_meshes = (meshes && rk_cmp_memcpy(batch->meshes, meshes, batch->count * sizeof(rk_mesh))); bool const need_sorting = (cmp_flags || cmp_meshes || resized); if (batch->nparams) { rk_parameter const * const last_param = batch->params + batch->nparams; if (got_any_params) { rk_ubyte const * const * src = params; for (rk_parameter const * dst = batch->params; dst < last_param; ++dst, ++src) { dst->dirty = ((*src && rk_cmp_memcpy(dst->source, *src, batch->count * dst->src_size)) || need_sorting); } } else if (need_sorting) { for (rk_parameter const * dst = batch->params; dst < last_param; ++dst) { dst->dirty = true; } } } if (is_empty) { glBindVertexArray(batch->vertex_array); if (rk_MultiDrawElementsIndirect) { glBindBuffer(GL_DRAW_INDIRECT_BUFFER, batch->commands_buffer); } rk_sort_batch(*batch); rk_pack_batch(*batch); if (rk_MultiDrawElementsIndirect) { glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); } glBindVertexArray(0); } else if (need_sorting) { batch->state = RK_BATCH_STATE_FILLED; } else { batch->state = RK_BATCH_STATE_SORTED; } } void rk_clear_buffer( rk_bool pixels, rk_bool depth, rk_bool stencil) { glClear((GL_COLOR_BUFFER_BIT * pixels) | (GL_DEPTH_BUFFER_BIT * depth) | (GL_STENCIL_BUFFER_BIT * stencil)); } void rk_select_shader( rk_shader_t _shader) { rk_shader * const shader = reinterpret_cast(_shader); if (shader) { glUseProgram(shader->program); } } void rk_set_input_float( rk_input_t _input, float value) { GLint const input = reinterpret_cast(_input) - 1; if (input > -1) { glUniform1f(input, value); } } void rk_set_input_vec3( rk_input_t _input, rk_vec3 const & value) { GLint const input = reinterpret_cast(_input) - 1; if (input > -1) { glUniform3fv(input, 1, glm::value_ptr(value)); } } void rk_set_input_mat3( rk_input_t _input, rk_mat3 const & value) { GLint const input = reinterpret_cast(_input) - 1; if (input > -1) { glUniformMatrix3fv(input, 1, GL_FALSE, glm::value_ptr(value)); } } void rk_set_input_mat4( rk_input_t _input, rk_mat4 const & value) { GLint const input = reinterpret_cast(_input) - 1; if (input > -1) { glUniformMatrix4fv(input, 1, GL_FALSE, glm::value_ptr(value)); } } void rk_set_param_vec3( rk_param_t _param, rk_vec3 const & value) { GLint const param = reinterpret_cast(_param) - 1; if (param > -1) { glVertexAttrib3fv(param, glm::value_ptr(value)); } } void rk_set_param_mat3( rk_param_t _param, rk_mat3 const & value) { GLint const param = reinterpret_cast(_param) - 1; if (param > -1) { glVertexAttrib3fv(param + 0, glm::value_ptr(value[0])); glVertexAttrib3fv(param + 1, glm::value_ptr(value[1])); glVertexAttrib3fv(param + 2, glm::value_ptr(value[2])); } } void rk_select_texture( rk_uint slot, rk_texture_t _texture) { rk_texture const * const texture = reinterpret_cast(_texture); if (texture) { glActiveTexture(GL_TEXTURE0 + slot); if (texture->nlevels) { glBindTexture(GL_TEXTURE_2D_ARRAY, texture->texture); } else { glBindTexture(GL_TEXTURE_2D, texture->texture); } } } RK_EXPORT void rk_draw_triangles( rk_triangles_t _triangles) { rk_triangles const * const triangles = reinterpret_cast(_triangles); if (triangles) { glBindVertexArray(triangles->array); glDrawArrays(GL_TRIANGLES, 0, triangles->size); glBindVertexArray(0); } } void rk_draw_batch( rk_batch_t _batch) { rk_batch const * const batch = reinterpret_cast(_batch); if (!batch) { rk_printf("rk_draw_batch(): invalid params."); return; } if (batch->state < RK_BATCH_STATE_FILLED) { rk_printf("rk_draw_batch(): invalid state."); return; } glBindVertexArray(batch->vertex_array); if (rk_MultiDrawElementsIndirect) { glBindBuffer(GL_DRAW_INDIRECT_BUFFER, batch->commands_buffer); } if (batch->state < RK_BATCH_STATE_SORTED) { rk_sort_batch(*batch); } if (batch->state < RK_BATCH_STATE_PACKED && batch->nparams) { rk_pack_batch(*batch); } if (batch->ncommands) { if (rk_DrawElementsInstancedBaseInstance) { if (rk_MultiDrawElementsIndirect) { rk_MultiDrawElementsIndirect( GL_TRIANGLES, GL_UNSIGNED_SHORT, nullptr, batch->ncommands, sizeof(rk_command)); } else { rk_command const * const last_command = batch->commands + batch->ncommands; for (rk_command const * command = batch->commands; command < last_command; ++command) { rk_DrawElementsInstancedBaseInstance( GL_TRIANGLES, command->nvertices, GL_UNSIGNED_SHORT, reinterpret_cast(command->base_index << 1), command->ninstances, command->base_instance); } } } else { rk_command const * const last_command = batch->commands + batch->ncommands; rk_parameter const * const last_param = batch->params + batch->nparams; unsigned param_index = 0; for (rk_command const * command = batch->commands; command < last_command; ++command) { for (rk_parameter const * param = batch->params; param < last_param; ++param) { glBindVertexBuffer(param->binding, batch->params_buffer, param->offset + param_index * param->dst_size, param->dst_size); } glDrawElementsInstanced( GL_TRIANGLES, command->nvertices, GL_UNSIGNED_SHORT, reinterpret_cast(command->base_index << 1), command->ninstances); param_index += command->ninstances; } } } if (rk_MultiDrawElementsIndirect) { glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); } glBindVertexArray(0); } void rk_unselect_texture( rk_uint slot, rk_texture_t _texture) { rk_texture const * const texture = reinterpret_cast(_texture); if (texture) { glActiveTexture(GL_TEXTURE0 + slot); if (texture->nlevels) { glBindTexture(GL_TEXTURE_2D, 0); } else { glBindTexture(GL_TEXTURE_2D_ARRAY, 0); } } } void rk_unselect_shader( rk_shader_t _shader) { glUseProgram(0); } void rk_destroy_batch( rk_batch_t _batch) { rk_batch * const batch = reinterpret_cast(_batch); if (batch) { delete[] batch->indices; delete[] batch->commands; delete[] batch->flags; delete[] batch->meshes; if (rk_MultiDrawElementsIndirect) { glDeleteBuffers(1, &batch->commands_buffer); } if (batch->nparams) { for (rk_parameter * param = batch->params; param < batch->params + batch->nparams; ++param) { delete[] param->source; } delete[] batch->params; glDeleteBuffers(1, &batch->params_buffer); } glDeleteVertexArrays(1, &batch->vertex_array); delete batch; } } void rk_destroy_triangles( rk_triangles_t _triangles) { rk_triangles * const triangles = reinterpret_cast(_triangles); if (triangles) { glDeleteBuffers(1, &triangles->vertices); glDeleteVertexArrays(1, &triangles->array); delete triangles; } } void rk_destroy_vertices( rk_vertices_t _vertices) { rk_vertices * const vertices = reinterpret_cast(_vertices); if (vertices) { delete[] vertices->format; delete[] vertices->vertices; delete[] vertices->indices; if (vertices->vertices_buffer) { glDeleteBuffers(1, &vertices->vertices_buffer); } if (vertices->indices_buffer) { glDeleteBuffers(1, &vertices->indices_buffer); } delete vertices; } } void rk_destroy_texture( rk_texture_t _texture) { rk_texture * const texture = reinterpret_cast(_texture); if (texture) { glDeleteTextures(1, &texture->texture); delete texture; } } void rk_destroy_shader( rk_shader_t _shader) { rk_shader * const shader = reinterpret_cast(_shader); if (shader) { glDeleteShader(shader->vertex); glDeleteShader(shader->fragment); glDeleteProgram(shader->program); delete shader; } }