diff --git a/cpp/render/render_opengles.cpp b/cpp/render/render_opengles.cpp
index 0d0246e..f56a815 100644
--- a/cpp/render/render_opengles.cpp
+++ b/cpp/render/render_opengles.cpp
@@ -13,9 +13,12 @@
 // You should have received a copy of the GNU Affero General Public License
 // along with this program.  If not, see .
 
+#define RK_BUCKETS_SORT
+
 #include "../render.hpp"
 #include "render_opengles.hpp"
 #include "../display/display_glx.hpp"
+#include 
 #include 
 #include 
 
@@ -25,6 +28,141 @@ typedef void (*rk_MultiDrawElementsIndirectFunc)(rk_uint, rk_uint, const void *,
 static rk_DrawElementsInstancedBaseInstanceFunc rk_DrawElementsInstancedBaseInstance = nullptr;
 static rk_MultiDrawElementsIndirectFunc rk_MultiDrawElementsIndirect = nullptr;
 
+static bool rk_compare_replace(
+    void * __restrict _dst,
+    void const * __restrict _src,
+    unsigned const size) {
+    rk_ulong hash = 0;
+    rk_ulong * dst = reinterpret_cast(_dst);
+    rk_ulong const * src = reinterpret_cast(_src);
+    unsigned count = size / sizeof(rk_ulong);
+    unsigned remain = (size - count * sizeof(rk_ulong));
+    if (count) {
+        do {
+            hash |= *dst ^ *src;
+            *dst++ = *src++;
+        } while(--count > 0);
+    }
+    if (remain) {
+        rk_ubyte * rdst = reinterpret_cast(dst);
+        rk_ubyte const * rsrc = reinterpret_cast(src);
+        do {
+            hash |= *rdst ^ *rsrc;
+            *rdst++ = *rsrc++;
+        } while(--remain > 0);
+    }
+    return (hash != 0);
+}
+
+#ifdef RK_BUCKETS_SORT
+
+struct rk_bucket {
+    unsigned size;
+    unsigned count;
+    rk_ushort * indices;
+};
+
+static unsigned rk_nbuckets = 0;
+static rk_bucket * rk_buckets = nullptr;
+
+static void rk_buckets_alloc(
+    rk_batch const & batch) {
+    unsigned const count = batch.vertices->nmeshes;
+    unsigned const size = batch.max_size / count;
+    if (!rk_nbuckets) {
+        rk_nbuckets = count;
+        rk_buckets = reinterpret_cast(malloc(count * sizeof(rk_bucket)));
+        for (unsigned index = 0; index < count; ++index) {
+            rk_bucket & bucket = rk_buckets[index];
+            bucket.size = size;
+            bucket.indices = reinterpret_cast(malloc(size * sizeof(rk_ushort)));
+        }
+    }
+    else if (count <= rk_nbuckets) {
+        for (unsigned index = 0; index < count; ++index) {
+            rk_bucket & bucket = rk_buckets[index];
+            if (bucket.size < size) {
+                bucket.size = size;
+                bucket.indices = reinterpret_cast(realloc(bucket.indices, size * sizeof(rk_ushort)));
+            }
+        }
+    }
+    else {
+        rk_buckets = reinterpret_cast(realloc(rk_buckets, count * sizeof(rk_bucket)));
+        for (unsigned index = 0; index < rk_nbuckets; ++index) {
+            rk_bucket & bucket = rk_buckets[index];
+            if (bucket.size < size) {
+                bucket.size = size;
+                bucket.indices = reinterpret_cast(realloc(bucket.indices, size * sizeof(rk_ushort)));
+            }
+        }
+        for (unsigned index = rk_nbuckets; index < count; ++index) {
+            rk_bucket & bucket = rk_buckets[index];
+            bucket.size = size;
+            bucket.indices = reinterpret_cast(malloc(size * sizeof(rk_ushort)));
+        }
+        rk_nbuckets = count;
+    }
+    unsigned total_size = rk_nbuckets * sizeof(rk_bucket);
+    for (unsigned index = 0; index < rk_nbuckets; ++index) {
+        rk_bucket const & bucket = rk_buckets[index];
+        total_size += bucket.size * sizeof(rk_ushort);
+    }
+    printf("[RK] rk_buckets_alloc() -> %d KiB\n", total_size / 1024);
+}
+
+static bool rk_buckets_sort(
+    rk_batch const & batch) {
+    bool reallocated = false;
+    rk_bucket const * const last_bucket = rk_buckets + batch.vertices->nmeshes;
+    for (rk_bucket * bucket = rk_buckets; bucket < last_bucket; ++bucket) {
+        bucket->count = 0;
+    }
+    rk_instance_flags const * flags = batch.flags;
+    rk_ushort const * mesh_index = batch.meshes;
+    for (unsigned index = 0; index < batch.count; ++index, ++flags, ++mesh_index) {
+        if ((*flags & RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) == RK_INSTANCE_FLAGS_SPAWNED_VISIBLE) {
+            rk_bucket & bucket = rk_buckets[*mesh_index];
+            if (bucket.count == bucket.size) {
+                bucket.size *= 2;
+                bucket.indices = reinterpret_cast(
+                    realloc(bucket.indices, bucket.size * sizeof(rk_ushort)));
+                reallocated = true;
+            }
+            bucket.indices[bucket.count++] = index;
+        }
+    }
+    bool modified = false;
+    rk_ushort * indices = batch.indices;
+    rk_command * command = batch.commands;
+    rk_mesh const * mesh = batch.vertices->meshes;
+    for (rk_bucket const * bucket = rk_buckets; bucket < last_bucket; ++bucket, ++mesh) {
+        if (bucket->count) {
+            command->nvertices = static_cast(mesh->ntriangles) * 3;
+            command->ninstances = bucket->count;
+            command->base_index = mesh->base_index;
+            command->base_vertex = 0;
+            command->base_instance = indices - batch.indices;
+            modified |= rk_compare_replace(indices, bucket->indices, bucket->count * sizeof(rk_ushort));
+            indices += bucket->count;
+            ++command;
+        }
+    }
+    batch.ninstances = indices - batch.indices;
+    batch.ncommands = command - batch.commands;
+    if (reallocated) {
+        unsigned total_size = rk_nbuckets * sizeof(rk_bucket);
+        for (unsigned index = 0; index < rk_nbuckets; ++index) {
+            rk_bucket const & bucket = rk_buckets[index];
+            total_size += bucket.size * sizeof(rk_ushort);
+        }
+        printf("[RK] rk_buckets_sort() -> %d KiB\n", total_size / 1024);
+    }
+    return modified;
+}
+
+#endif // RK_BUCKETS_SORT
+
 static void rk_gl_printf(char const * message) {
     printf("[GL] %s\n", message);
 }
@@ -687,11 +825,20 @@ rk_batch_t rk_create_batch(
         }
     }
     glBindVertexArray(0);
+#ifdef RK_BUCKETS_SORT
+    rk_buckets_alloc(*batch);
+#endif
     return reinterpret_cast(batch);
 }
 
 static void rk_sort_batch(
     rk_batch const & batch) {
+#ifdef RK_BUCKETS_SORT
+    bool const modified = rk_buckets_sort(batch);
+    if (modified && rk_MultiDrawElementsIndirect) {
+        glBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, batch.ncommands * sizeof(rk_command), batch.commands);
+    }
+#else
     rk_instance_flags const * flags = batch.flags;
     rk_ushort * indices = batch.indices;
     for (unsigned index = 0; index < batch.count; ++index, ++flags) {
@@ -729,6 +876,7 @@ static void rk_sort_batch(
             glBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, batch.ncommands * sizeof(rk_command), batch.commands);
         }
     }
+#endif // RK_BUCKETS_SORT
     batch.state = RK_BATCH_STATE_SORTED;
 }
 
@@ -755,32 +903,6 @@ static void rk_pack_batch(
     batch.state = RK_BATCH_STATE_PACKED;
 }
 
-static bool rk_compare_replace(
-    void * __restrict _dst,
-    void const * __restrict _src,
-    unsigned const size) {
-    rk_ulong hash = 0;
-    rk_ulong * dst = reinterpret_cast(_dst);
-    rk_ulong const * src = reinterpret_cast(_src);
-    unsigned count = size / sizeof(rk_ulong);
-    unsigned remain = (size - count * sizeof(rk_ulong));
-    if (count) {
-        do {
-            hash |= *dst ^ *src;
-            *dst++ = *src++;
-        } while(--count > 0);
-    }
-    rk_ubyte * rdst = reinterpret_cast(dst);
-    rk_ubyte const * rsrc = reinterpret_cast(src);
-    if (remain) {
-        do {
-            hash |= *rdst ^ *rsrc;
-            *rdst++ = *rsrc++;
-        } while(--remain > 0);
-    }
-    return (hash != 0);
-}
-
 void rk_fill_batch(
     rk_batch_t _batch,
     rk_uint count,