From 506ec4b30554eaf055b839fc8f36cd04c860512d Mon Sep 17 00:00:00 2001 From: Morgan Lewis Date: Wed, 17 Nov 2021 16:10:20 -0600 Subject: [PATCH 1/6] Add additional buffer methods to ThinGL --- .../sodium/client/gl/device/CommandList.java | 7 +++++++ .../client/gl/device/GLRenderDevice.java | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/main/java/me/jellysquid/mods/sodium/client/gl/device/CommandList.java b/src/main/java/me/jellysquid/mods/sodium/client/gl/device/CommandList.java index afbea83eca..c685ad1aa9 100644 --- a/src/main/java/me/jellysquid/mods/sodium/client/gl/device/CommandList.java +++ b/src/main/java/me/jellysquid/mods/sodium/client/gl/device/CommandList.java @@ -9,6 +9,7 @@ import me.jellysquid.mods.sodium.client.gl.util.EnumBitField; import java.nio.ByteBuffer; +import java.nio.IntBuffer; public interface CommandList extends AutoCloseable { GlMutableBuffer createMutableBuffer(); @@ -21,10 +22,16 @@ public interface CommandList extends AutoCloseable { void uploadData(GlMutableBuffer glBuffer, ByteBuffer byteBuffer, GlBufferUsage usage); + void bufferData(GlBufferTarget target, GlMutableBuffer glBuffer, int[] intArray, GlBufferUsage usage); + + void bufferData(GlBufferTarget target, GlMutableBuffer glBuffer, IntBuffer intBuffer, GlBufferUsage usage); + void copyBufferSubData(GlBuffer src, GlBuffer dst, long readOffset, long writeOffset, long bytes); void bindBuffer(GlBufferTarget target, GlBuffer buffer); + void bindBufferBase(GlBufferTarget target, int index, GlBuffer buffer); + void unbindVertexArray(); void allocateStorage(GlMutableBuffer buffer, long bufferSize, GlBufferUsage usage); diff --git a/src/main/java/me/jellysquid/mods/sodium/client/gl/device/GLRenderDevice.java b/src/main/java/me/jellysquid/mods/sodium/client/gl/device/GLRenderDevice.java index b15f7a4e92..c90a6320d6 100644 --- a/src/main/java/me/jellysquid/mods/sodium/client/gl/device/GLRenderDevice.java +++ b/src/main/java/me/jellysquid/mods/sodium/client/gl/device/GLRenderDevice.java @@ -88,6 +88,20 @@ public void uploadData(GlMutableBuffer glBuffer, ByteBuffer byteBuffer, GlBuffer glBuffer.setSize(byteBuffer.remaining()); } + @Override + public void bufferData(GlBufferTarget target, GlMutableBuffer glBuffer, int[] intArray, GlBufferUsage usage) { + this.bindBuffer(target, glBuffer); + + GL15C.glBufferData(target.getTargetParameter(), intArray, usage.getId()); + } + + @Override + public void bufferData(GlBufferTarget target, GlMutableBuffer glBuffer, IntBuffer intBuffer, GlBufferUsage usage) { + this.bindBuffer(target, glBuffer); + + GL15C.glBufferData(target.getTargetParameter(), intBuffer, usage.getId()); + } + @Override public void copyBufferSubData(GlBuffer src, GlBuffer dst, long readOffset, long writeOffset, long bytes) { this.bindBuffer(GlBufferTarget.COPY_READ_BUFFER, src); @@ -103,6 +117,11 @@ public void bindBuffer(GlBufferTarget target, GlBuffer buffer) { } } + @Override + public void bindBufferBase(GlBufferTarget target, int index, GlBuffer buffer) { + GL30C.glBindBufferBase(target.getTargetParameter(), index, buffer.handle()); + } + @Override public void unbindVertexArray() { if (this.stateTracker.makeVertexArrayActive(null)) { From 13b3bd326b310f3465844d6449f0218e697869b1 Mon Sep 17 00:00:00 2001 From: Morgan Lewis Date: Wed, 17 Nov 2021 16:12:57 -0600 Subject: [PATCH 2/6] Add support for shader storage buffer --- .../mods/sodium/client/gl/buffer/GlBufferTarget.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/me/jellysquid/mods/sodium/client/gl/buffer/GlBufferTarget.java b/src/main/java/me/jellysquid/mods/sodium/client/gl/buffer/GlBufferTarget.java index 522128d970..4378387648 100644 --- a/src/main/java/me/jellysquid/mods/sodium/client/gl/buffer/GlBufferTarget.java +++ b/src/main/java/me/jellysquid/mods/sodium/client/gl/buffer/GlBufferTarget.java @@ -2,12 +2,14 @@ import org.lwjgl.opengl.GL20C; import org.lwjgl.opengl.GL31C; +import org.lwjgl.opengl.GL43C; public enum GlBufferTarget { ARRAY_BUFFER(GL20C.GL_ARRAY_BUFFER, GL20C.GL_ARRAY_BUFFER_BINDING), ELEMENT_BUFFER(GL20C.GL_ELEMENT_ARRAY_BUFFER, GL20C.GL_ELEMENT_ARRAY_BUFFER_BINDING), COPY_READ_BUFFER(GL31C.GL_COPY_READ_BUFFER, GL31C.GL_COPY_READ_BUFFER), - COPY_WRITE_BUFFER(GL31C.GL_COPY_WRITE_BUFFER, GL31C.GL_COPY_WRITE_BUFFER); + COPY_WRITE_BUFFER(GL31C.GL_COPY_WRITE_BUFFER, GL31C.GL_COPY_WRITE_BUFFER), + SHADER_STORAGE_BUFFER(GL43C.GL_SHADER_STORAGE_BUFFER, GL43C.GL_SHADER_STORAGE_BUFFER_BINDING); public static final GlBufferTarget[] VALUES = GlBufferTarget.values(); public static final int COUNT = VALUES.length; From 3a104d9d04766dca863934c1f0766e9b441611e7 Mon Sep 17 00:00:00 2001 From: Morgan Lewis Date: Wed, 17 Nov 2021 16:13:12 -0600 Subject: [PATCH 3/6] Add support for compute shader type --- .../jellysquid/mods/sodium/client/gl/shader/ShaderType.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/me/jellysquid/mods/sodium/client/gl/shader/ShaderType.java b/src/main/java/me/jellysquid/mods/sodium/client/gl/shader/ShaderType.java index 4791e3dc81..17d230af03 100644 --- a/src/main/java/me/jellysquid/mods/sodium/client/gl/shader/ShaderType.java +++ b/src/main/java/me/jellysquid/mods/sodium/client/gl/shader/ShaderType.java @@ -1,13 +1,15 @@ package me.jellysquid.mods.sodium.client.gl.shader; import org.lwjgl.opengl.GL20C; +import org.lwjgl.opengl.GL43C; /** * An enumeration over the supported OpenGL shader types. */ public enum ShaderType { VERTEX(GL20C.GL_VERTEX_SHADER), - FRAGMENT(GL20C.GL_FRAGMENT_SHADER); + FRAGMENT(GL20C.GL_FRAGMENT_SHADER), + COMPUTE(GL43C.GL_COMPUTE_SHADER); public final int id; From 45952da912124097a778b2373c9a088f0eca3bd6 Mon Sep 17 00:00:00 2001 From: Morgan Lewis Date: Wed, 17 Nov 2021 16:17:40 -0600 Subject: [PATCH 4/6] Remove support for byte and short GlIndexType --- .../client/gl/tessellation/GlIndexType.java | 4 -- .../client/model/IndexBufferBuilder.java | 40 ++-------------- .../render/chunk/RegionChunkRenderer.java | 46 ++++--------------- 3 files changed, 13 insertions(+), 77 deletions(-) diff --git a/src/main/java/me/jellysquid/mods/sodium/client/gl/tessellation/GlIndexType.java b/src/main/java/me/jellysquid/mods/sodium/client/gl/tessellation/GlIndexType.java index 867bc09010..ece65dff05 100644 --- a/src/main/java/me/jellysquid/mods/sodium/client/gl/tessellation/GlIndexType.java +++ b/src/main/java/me/jellysquid/mods/sodium/client/gl/tessellation/GlIndexType.java @@ -3,8 +3,6 @@ import org.lwjgl.opengl.GL32C; public enum GlIndexType { - UNSIGNED_BYTE(GL32C.GL_UNSIGNED_BYTE, 1), - UNSIGNED_SHORT(GL32C.GL_UNSIGNED_SHORT, 2), UNSIGNED_INT(GL32C.GL_UNSIGNED_INT, 4); private final int id; @@ -22,6 +20,4 @@ public int getFormatId() { public int getStride() { return this.stride; } - - public static final GlIndexType[] VALUES = GlIndexType.values(); } diff --git a/src/main/java/me/jellysquid/mods/sodium/client/model/IndexBufferBuilder.java b/src/main/java/me/jellysquid/mods/sodium/client/model/IndexBufferBuilder.java index ae9d8b7b4e..1e6b6a6de5 100644 --- a/src/main/java/me/jellysquid/mods/sodium/client/model/IndexBufferBuilder.java +++ b/src/main/java/me/jellysquid/mods/sodium/client/model/IndexBufferBuilder.java @@ -32,43 +32,16 @@ public Result pop() { return new Result(this.indices); } - private static GlIndexType getOptimalIndexType(int count) { - if (count < 65536) { - return GlIndexType.UNSIGNED_SHORT; - } else { - return GlIndexType.UNSIGNED_INT; - } - } - public int getCount() { return this.indices.size(); } public static class Result { private final IntArrayList indices; - - private final int maxIndex, minIndex; - private final GlIndexType format; + private final GlIndexType format = GlIndexType.UNSIGNED_INT; private Result(IntArrayList indices) { this.indices = indices; - - int maxIndex = Integer.MIN_VALUE; - int minIndex = Integer.MAX_VALUE; - - IntIterator it = this.indices.iterator(); - - while (it.hasNext()) { - int i = it.nextInt(); - - minIndex = Math.min(minIndex, i); - maxIndex = Math.max(maxIndex, i); - } - - this.minIndex = minIndex; - this.maxIndex = maxIndex; - - this.format = getOptimalIndexType(this.maxIndex - this.minIndex); } public int writeTo(int offset, ByteBuffer buffer) { @@ -78,14 +51,7 @@ public int writeTo(int offset, ByteBuffer buffer) { int pointer = offset; while (it.hasNext()) { - int value = it.nextInt() - this.minIndex; - - switch (this.format) { - case UNSIGNED_BYTE -> buffer.put(pointer, (byte) value); - case UNSIGNED_SHORT -> buffer.putShort(pointer, (short) value); - case UNSIGNED_INT -> buffer.putInt(pointer, value); - } - + buffer.putInt(pointer, it.nextInt()); pointer += stride; } @@ -101,7 +67,7 @@ public int getCount() { } public int getBaseVertex() { - return this.minIndex; + return 0; } public GlIndexType getFormat() { diff --git a/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/RegionChunkRenderer.java b/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/RegionChunkRenderer.java index 54a70c97df..477a7e261b 100644 --- a/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/RegionChunkRenderer.java +++ b/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/RegionChunkRenderer.java @@ -1,7 +1,6 @@ package me.jellysquid.mods.sodium.client.render.chunk; import com.google.common.collect.Lists; -import com.mojang.blaze3d.systems.RenderSystem; import me.jellysquid.mods.sodium.client.SodiumClientMod; import me.jellysquid.mods.sodium.client.gl.attribute.GlVertexAttributeBinding; import me.jellysquid.mods.sodium.client.gl.buffer.GlBufferUsage; @@ -21,10 +20,7 @@ import me.jellysquid.mods.sodium.client.render.chunk.format.ChunkMeshAttribute; import me.jellysquid.mods.sodium.client.render.chunk.passes.BlockRenderPass; import me.jellysquid.mods.sodium.client.render.chunk.region.RenderRegion; -import me.jellysquid.mods.sodium.client.render.chunk.shader.ChunkShaderBindingPoints; import me.jellysquid.mods.sodium.client.render.chunk.shader.ChunkShaderInterface; -import org.joml.Matrix4f; -import org.lwjgl.system.MemoryStack; import org.lwjgl.system.MemoryUtil; import java.nio.ByteBuffer; @@ -34,7 +30,7 @@ public class RegionChunkRenderer extends ShaderChunkRenderer { private static final ByteBuffer DRAW_INFO_BUFFER = createChunkInfoBuffer(); - private final MultiDrawBatch[] batches; + private final MultiDrawBatch batch; private final GlVertexAttributeBinding[] vertexAttributeBindings; private final GlMutableBuffer chunkInfoBuffer; @@ -59,11 +55,7 @@ public RegionChunkRenderer(RenderDevice device, ChunkVertexType vertexType) { commandList.uploadData(this.chunkInfoBuffer, DRAW_INFO_BUFFER, GlBufferUsage.STATIC_DRAW); } - this.batches = new MultiDrawBatch[GlIndexType.VALUES.length]; - - for (int i = 0; i < this.batches.length; i++) { - this.batches[i] = MultiDrawBatch.create(ModelQuadFacing.COUNT * RenderRegion.REGION_SIZE); - } + this.batch = MultiDrawBatch.create(ModelQuadFacing.COUNT * RenderRegion.REGION_SIZE); } @Override @@ -81,7 +73,7 @@ public void render(ChunkRenderMatrices matrices, CommandList commandList, RenderRegion region = entry.getKey(); List regionSections = entry.getValue(); - if (!this.buildDrawBatches(regionSections, pass, camera)) { + if (!this.buildDrawBatch(regionSections, pass, camera)) { continue; } @@ -92,10 +84,8 @@ public void render(ChunkRenderMatrices matrices, CommandList commandList, super.end(); } - private boolean buildDrawBatches(List sections, BlockRenderPass pass, ChunkCameraContext camera) { - for (MultiDrawBatch batch : this.batches) { - batch.begin(); - } + private boolean buildDrawBatch(List sections, BlockRenderPass pass, ChunkCameraContext camera) { + batch.begin(); for (RenderSection render : sortedChunks(sections, pass.isTranslucent())) { ChunkGraphicsState state = render.getGraphicsState(pass); @@ -145,15 +135,8 @@ private boolean buildDrawBatches(List sections, BlockRenderPass p } } - boolean nonEmpty = false; - - for (MultiDrawBatch batch : this.batches) { - batch.end(); - - nonEmpty |= !batch.isEmpty(); - } - - return nonEmpty; + batch.end(); + return !batch.isEmpty(); } private GlTessellation createTessellationForRegion(CommandList commandList, RenderRegion.RenderRegionArenas arenas, BlockRenderPass pass) { @@ -167,14 +150,8 @@ private GlTessellation createTessellationForRegion(CommandList commandList, Rend } private void executeDrawBatches(CommandList commandList, GlTessellation tessellation) { - for (int i = 0; i < this.batches.length; i++) { - MultiDrawBatch batch = this.batches[i]; - - if (!batch.isEmpty()) { - try (DrawCommandList drawCommandList = commandList.beginTessellating(tessellation)) { - drawCommandList.multiDrawElementsBaseVertex(batch.getPointerBuffer(), batch.getCountBuffer(), batch.getBaseVertexBuffer(), GlIndexType.VALUES[i]); - } - } + try (DrawCommandList drawCommandList = commandList.beginTessellating(tessellation)) { + drawCommandList.multiDrawElementsBaseVertex(batch.getPointerBuffer(), batch.getCountBuffer(), batch.getBaseVertexBuffer(), GlIndexType.UNSIGNED_INT); } } @@ -194,7 +171,6 @@ private void setModelMatrixUniforms(ChunkShaderInterface shader, ChunkRenderMatr private void addDrawCall(ElementRange part, long baseIndexPointer, int baseVertexIndex) { if (part != null) { - MultiDrawBatch batch = this.batches[part.indexType().ordinal()]; batch.add(baseIndexPointer + part.elementPointer(), part.elementCount(), baseVertexIndex + part.baseVertex()); } } @@ -210,9 +186,7 @@ private GlTessellation createRegionTessellation(CommandList commandList, RenderR public void delete() { super.delete(); - for (MultiDrawBatch batch : this.batches) { - batch.delete(); - } + batch.delete(); RenderDevice.INSTANCE.createCommandList() .deleteBuffer(this.chunkInfoBuffer); From 031441b22d9d9b6af010053283a0e35204394c93 Mon Sep 17 00:00:00 2001 From: Morgan Lewis Date: Wed, 17 Nov 2021 16:18:44 -0600 Subject: [PATCH 5/6] Add menu option for translucent face sorting --- .../sodium/client/gui/SodiumGameOptionPages.java | 12 +++++++++++- .../mods/sodium/client/gui/SodiumGameOptions.java | 2 +- src/main/resources/assets/sodium/lang/en_us.json | 2 ++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/main/java/me/jellysquid/mods/sodium/client/gui/SodiumGameOptionPages.java b/src/main/java/me/jellysquid/mods/sodium/client/gui/SodiumGameOptionPages.java index c3fdf138c4..500a9c7f7e 100644 --- a/src/main/java/me/jellysquid/mods/sodium/client/gui/SodiumGameOptionPages.java +++ b/src/main/java/me/jellysquid/mods/sodium/client/gui/SodiumGameOptionPages.java @@ -11,12 +11,12 @@ import me.jellysquid.mods.sodium.client.gui.options.control.TickBoxControl; import me.jellysquid.mods.sodium.client.gui.options.storage.MinecraftOptionsStorage; import me.jellysquid.mods.sodium.client.gui.options.storage.SodiumOptionsStorage; +import me.jellysquid.mods.sodium.client.render.chunk.shader.ComputeShaderInterface; import net.minecraft.client.MinecraftClient; import net.minecraft.client.gl.Framebuffer; import net.minecraft.client.option.Option; import net.minecraft.client.option.*; import net.minecraft.client.util.Window; -import net.minecraft.text.LiteralText; import net.minecraft.text.Text; import net.minecraft.text.TranslatableText; @@ -292,6 +292,16 @@ public static OptionPage performance() { .setFlags(OptionFlag.REQUIRES_RENDERER_UPDATE) .build() ) + .add(OptionImpl.createBuilder(boolean.class, sodiumOpts) + .setName(new TranslatableText("sodium.options.translucent_face_sorting.name")) + .setTooltip(new TranslatableText("sodium.options.translucent_face_sorting.tooltip")) + .setControl(TickBoxControl::new) + .setImpact(OptionImpact.VARIES) + .setEnabled(ComputeShaderInterface.isSupported(RenderDevice.INSTANCE)) + .setBinding((opts, value) -> opts.advanced.useTranslucentFaceSorting = value, opts -> opts.advanced.useTranslucentFaceSorting) + .setFlags(OptionFlag.REQUIRES_RENDERER_RELOAD) + .build() + ) .build()); return new OptionPage(new TranslatableText("sodium.options.pages.performance"), ImmutableList.copyOf(groups)); diff --git a/src/main/java/me/jellysquid/mods/sodium/client/gui/SodiumGameOptions.java b/src/main/java/me/jellysquid/mods/sodium/client/gui/SodiumGameOptions.java index 7c183dbc55..22a043aac4 100644 --- a/src/main/java/me/jellysquid/mods/sodium/client/gui/SodiumGameOptions.java +++ b/src/main/java/me/jellysquid/mods/sodium/client/gui/SodiumGameOptions.java @@ -6,7 +6,6 @@ import me.jellysquid.mods.sodium.client.gui.options.TextProvider; import net.fabricmc.loader.api.FabricLoader; import net.minecraft.client.option.GraphicsMode; -import net.minecraft.text.LiteralText; import net.minecraft.text.Text; import net.minecraft.text.TranslatableText; @@ -54,6 +53,7 @@ public static class AdvancedSettings { public boolean allowDirectMemoryAccess = true; public boolean enableMemoryTracing = false; public boolean useAdvancedStagingBuffers = true; + public boolean useTranslucentFaceSorting = true; public int cpuRenderAheadLimit = 3; } diff --git a/src/main/resources/assets/sodium/lang/en_us.json b/src/main/resources/assets/sodium/lang/en_us.json index 16d2634635..528d824bfe 100644 --- a/src/main/resources/assets/sodium/lang/en_us.json +++ b/src/main/resources/assets/sodium/lang/en_us.json @@ -40,6 +40,8 @@ "sodium.options.use_particle_culling.tooltip": "If enabled, only particles which are determined to be visible will be rendered. This can provide a significant improvement to frame rates when many particles are nearby.", "sodium.options.animate_only_visible_textures.name": "Animate Only Visible Textures", "sodium.options.animate_only_visible_textures.tooltip": "If enabled, only animated textures determined to be visible will be updated. This can provide a significant boost to frame rates on some hardware, especially with heavier resource packs. If you experience issues with some textures not being animated, try disabling this option.", + "sodium.options.translucent_face_sorting.name": "Translucent Face Sorting", + "sodium.options.translucent_face_sorting.tooltip": "If enabled, translucent effects from surfaces such as stained glass and water will be applied correctly. \n\nRequires OpenGL 4.3 or ARB_compute_shader and ARB_shader_storage_buffer_object", "sodium.options.cpu_render_ahead_limit.name": "CPU Render-Ahead Limit", "sodium.options.cpu_render_ahead_limit.tooltip": "Specifies the maximum number of frames the CPU can be waiting on the GPU to finish rendering. Very low or high values may create frame rate instability.", "sodium.options.cpu_render_ahead_limit.value": "%s frame(s)", From 044812254b463e2a7c7551da0f8ce6cc02a33f2e Mon Sep 17 00:00:00 2001 From: Morgan Lewis Date: Wed, 17 Nov 2021 16:20:00 -0600 Subject: [PATCH 6/6] Implement Translucent Face Sorting using compute shader --- .../render/chunk/RegionChunkRenderer.java | 74 +++- .../client/render/chunk/RenderSection.java | 3 +- .../render/chunk/ShaderChunkRenderer.java | 48 ++- .../render/chunk/region/RenderRegion.java | 10 + .../chunk/shader/ComputeShaderInterface.java | 185 ++++++++++ .../block_layer_translucent_compute.glsl | 343 ++++++++++++++++++ 6 files changed, 654 insertions(+), 9 deletions(-) create mode 100644 src/main/java/me/jellysquid/mods/sodium/client/render/chunk/shader/ComputeShaderInterface.java create mode 100644 src/main/resources/assets/sodium/shaders/blocks/block_layer_translucent_compute.glsl diff --git a/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/RegionChunkRenderer.java b/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/RegionChunkRenderer.java index 477a7e261b..1ec596b163 100644 --- a/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/RegionChunkRenderer.java +++ b/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/RegionChunkRenderer.java @@ -20,7 +20,10 @@ import me.jellysquid.mods.sodium.client.render.chunk.format.ChunkMeshAttribute; import me.jellysquid.mods.sodium.client.render.chunk.passes.BlockRenderPass; import me.jellysquid.mods.sodium.client.render.chunk.region.RenderRegion; +import me.jellysquid.mods.sodium.client.render.chunk.shader.ChunkShaderBindingPoints; import me.jellysquid.mods.sodium.client.render.chunk.shader.ChunkShaderInterface; +import me.jellysquid.mods.sodium.client.render.chunk.shader.ComputeShaderInterface; +import org.joml.Matrix4f; import org.lwjgl.system.MemoryUtil; import java.nio.ByteBuffer; @@ -36,6 +39,10 @@ public class RegionChunkRenderer extends ShaderChunkRenderer { private final GlMutableBuffer chunkInfoBuffer; private final boolean isBlockFaceCullingEnabled = SodiumClientMod.options().performance.useBlockFaceCulling; + private double lastComputeUpdateX = 0; + private double lastComputeUpdateY = 0; + private double lastComputeUpdateZ = 0; + public RegionChunkRenderer(RenderDevice device, ChunkVertexType vertexType) { super(device, vertexType); @@ -62,6 +69,69 @@ public RegionChunkRenderer(RenderDevice device, ChunkVertexType vertexType) { public void render(ChunkRenderMatrices matrices, CommandList commandList, ChunkRenderList list, BlockRenderPass pass, ChunkCameraContext camera) { + if(pass.isTranslucent() && SodiumClientMod.options().advanced.useTranslucentFaceSorting) { + super.beginCompute(pass); + + boolean fullRebuild = false; + if (activeComputeProgram != null) { + ComputeShaderInterface compute = activeComputeProgram.getInterface(); + + double cameraX = camera.blockX + camera.deltaX; + double cameraY = camera.blockY + camera.deltaY; + double cameraZ = camera.blockZ + camera.deltaZ; + + //If we have moved set all chunks as needing compute + double dx = cameraX - lastComputeUpdateX; + double dy = cameraY - lastComputeUpdateY; + double dz = cameraZ - lastComputeUpdateZ; + if(dx * dx + dy * dy + dz * dz > 1.0D) { + lastComputeUpdateX = cameraX; + lastComputeUpdateY = cameraY; + lastComputeUpdateZ = cameraZ; + fullRebuild = true; + } + + compute.setDrawUniforms(this.chunkInfoBuffer); + + boolean runCompute = true; + //We want compute to run beginning with the closest chunks + for (Map.Entry> entry : sortedRegions(list, false)) { + RenderRegion region = entry.getKey(); + List regionSections = entry.getValue(); + + if(fullRebuild) { + region.setNeedsTranslucencyCompute(true); + if(!runCompute) { + continue; + } + } + + if (region.getNeedsTranslucencyCompute() && !regionSections.isEmpty()) { + if (!buildDrawBatch(regionSections, pass, camera)) { + continue; + } + float x = getCameraTranslation(region.getOriginX(), camera.blockX, camera.deltaX); + float y = getCameraTranslation(region.getOriginY(), camera.blockY, camera.deltaY); + float z = getCameraTranslation(region.getOriginZ(), camera.blockZ, camera.deltaZ); + + Matrix4f matrix = this.cachedModelViewMatrix; + matrix.set(matrices.modelView()); + matrix.translate(x, y, z); + + compute.setModelViewMatrix(matrix); + + RenderRegion.RenderRegionArenas arenas = region.getArenas(); + runCompute = compute.execute(commandList, batch, arenas); + region.setNeedsTranslucencyCompute(false); + } + if(!runCompute && !fullRebuild) { + break; + } + } + } + super.endCompute(); + } + super.begin(pass); ChunkShaderInterface shader = this.activeProgram.getInterface(); @@ -80,7 +150,7 @@ public void render(ChunkRenderMatrices matrices, CommandList commandList, this.setModelMatrixUniforms(shader, matrices, region, camera); this.executeDrawBatches(commandList, this.createTessellationForRegion(commandList, region.getArenas(), pass)); } - + super.end(); } @@ -104,7 +174,7 @@ private boolean buildDrawBatch(List sections, BlockRenderPass pas this.addDrawCall(state.getModelPart(ModelQuadFacing.UNASSIGNED), indexOffset, baseVertex); - if (this.isBlockFaceCullingEnabled) { + if (this.isBlockFaceCullingEnabled && !(pass.isTranslucent() && SodiumClientMod.options().advanced.useTranslucentFaceSorting)) { if (camera.posY > bounds.y1) { this.addDrawCall(state.getModelPart(ModelQuadFacing.UP), indexOffset, baseVertex); } diff --git a/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/RenderSection.java b/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/RenderSection.java index 3e3c519a88..553cc4b444 100644 --- a/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/RenderSection.java +++ b/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/RenderSection.java @@ -2,9 +2,9 @@ import me.jellysquid.mods.sodium.client.render.SodiumWorldRenderer; import me.jellysquid.mods.sodium.client.render.chunk.compile.ChunkBuildResult; -import me.jellysquid.mods.sodium.client.render.chunk.graph.ChunkGraphInfo; import me.jellysquid.mods.sodium.client.render.chunk.data.ChunkRenderBounds; import me.jellysquid.mods.sodium.client.render.chunk.data.ChunkRenderData; +import me.jellysquid.mods.sodium.client.render.chunk.graph.ChunkGraphInfo; import me.jellysquid.mods.sodium.client.render.chunk.passes.BlockRenderPass; import me.jellysquid.mods.sodium.client.render.chunk.region.RenderRegion; import me.jellysquid.mods.sodium.client.render.texture.SpriteUtil; @@ -307,6 +307,7 @@ public boolean canAcceptBuildResults(ChunkBuildResult result) { public void onBuildFinished(ChunkBuildResult result) { this.setData(result.data); this.lastAcceptedBuildTime = result.buildTime; + region.setNeedsTranslucencyCompute(true); } public int getChunkId() { diff --git a/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/ShaderChunkRenderer.java b/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/ShaderChunkRenderer.java index d10532167d..704f646381 100644 --- a/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/ShaderChunkRenderer.java +++ b/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/ShaderChunkRenderer.java @@ -2,21 +2,19 @@ import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap; import me.jellysquid.mods.sodium.client.gl.attribute.GlVertexFormat; -import me.jellysquid.mods.sodium.client.gl.shader.*; import me.jellysquid.mods.sodium.client.gl.device.RenderDevice; +import me.jellysquid.mods.sodium.client.gl.shader.*; import me.jellysquid.mods.sodium.client.model.vertex.type.ChunkVertexType; import me.jellysquid.mods.sodium.client.render.chunk.format.ChunkMeshAttribute; import me.jellysquid.mods.sodium.client.render.chunk.passes.BlockRenderPass; -import me.jellysquid.mods.sodium.client.render.chunk.shader.ChunkFogMode; -import me.jellysquid.mods.sodium.client.render.chunk.shader.ChunkShaderInterface; -import me.jellysquid.mods.sodium.client.render.chunk.shader.ChunkShaderBindingPoints; -import me.jellysquid.mods.sodium.client.render.chunk.shader.ChunkShaderOptions; +import me.jellysquid.mods.sodium.client.render.chunk.shader.*; import net.minecraft.util.Identifier; import java.util.Map; public abstract class ShaderChunkRenderer implements ChunkRenderer { private final Map> programs = new Object2ObjectOpenHashMap<>(); + private final Map> computes = new Object2ObjectOpenHashMap<>(); protected final ChunkVertexType vertexType; protected final GlVertexFormat vertexFormat; @@ -24,6 +22,7 @@ public abstract class ShaderChunkRenderer implements ChunkRenderer { protected final RenderDevice device; protected GlProgram activeProgram; + protected GlProgram activeComputeProgram; public ShaderChunkRenderer(RenderDevice device, ChunkVertexType vertexType) { this.device = device; @@ -41,12 +40,31 @@ protected GlProgram compileProgram(ChunkShaderOptions opti return program; } + protected GlProgram compileComputeProgram(ChunkShaderOptions options) { + GlProgram compute = this.computes.get(options); + + if (compute == null) { + GlShader shader = ShaderLoader.loadShader(ShaderType.COMPUTE, + new Identifier("sodium", "blocks/block_layer_translucent_compute.glsl"), options.constants()); + + try { + this.computes.put(options, + compute = GlProgram.builder(new Identifier("sodium", "chunk_shader_compute")) + .attachShader(shader) + .link(ComputeShaderInterface::new)); + } finally { + shader.delete(); + } + } + return compute; + } + private GlProgram createShader(String path, ChunkShaderOptions options) { ShaderConstants constants = options.constants(); GlShader vertShader = ShaderLoader.loadShader(ShaderType.VERTEX, new Identifier("sodium", path + ".vsh"), constants); - + GlShader fragShader = ShaderLoader.loadShader(ShaderType.FRAGMENT, new Identifier("sodium", path + ".fsh"), constants); @@ -69,6 +87,7 @@ private GlProgram createShader(String path, ChunkShaderOpt protected void begin(BlockRenderPass pass) { ChunkShaderOptions options = new ChunkShaderOptions(ChunkFogMode.SMOOTH, pass, this.vertexType); + this.activeComputeProgram = null; this.activeProgram = this.compileProgram(options); this.activeProgram.bind(); this.activeProgram.getInterface() @@ -80,11 +99,28 @@ protected void end() { this.activeProgram = null; } + protected void beginCompute(BlockRenderPass pass) { + ChunkShaderOptions options = new ChunkShaderOptions(ChunkFogMode.SMOOTH, pass, this.vertexType); + + this.activeProgram = null; + this.activeComputeProgram = this.compileComputeProgram(options); + this.activeComputeProgram.bind(); + this.activeComputeProgram.getInterface() + .setup(this.vertexType); + } + + protected void endCompute() { + this.activeComputeProgram.unbind(); + this.activeComputeProgram = null; + } + @Override public void delete() { this.programs.values() .forEach(GlProgram::delete); this.programs.clear(); + this.computes.values().forEach(GlProgram::delete); + this.computes.clear(); } @Override diff --git a/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/region/RenderRegion.java b/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/region/RenderRegion.java index fefbe7c705..831ef5f178 100644 --- a/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/region/RenderRegion.java +++ b/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/region/RenderRegion.java @@ -52,6 +52,8 @@ public class RenderRegion { private Frustum.Visibility visibility; + private boolean needsTranslucencyCompute = false; + public RenderRegion(RenderRegionManager manager, int x, int y, int z) { this.manager = manager; @@ -137,6 +139,14 @@ public Frustum.Visibility getVisibility() { return this.visibility; } + public void setNeedsTranslucencyCompute(boolean compute) { + this.needsTranslucencyCompute = compute; + } + + public boolean getNeedsTranslucencyCompute() { + return this.needsTranslucencyCompute; + } + public static int getChunkIndex(int x, int y, int z) { return (x * RenderRegion.REGION_LENGTH * RenderRegion.REGION_HEIGHT) + (y * RenderRegion.REGION_LENGTH) + z; } diff --git a/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/shader/ComputeShaderInterface.java b/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/shader/ComputeShaderInterface.java new file mode 100644 index 0000000000..3066161760 --- /dev/null +++ b/src/main/java/me/jellysquid/mods/sodium/client/render/chunk/shader/ComputeShaderInterface.java @@ -0,0 +1,185 @@ +package me.jellysquid.mods.sodium.client.render.chunk.shader; + +import me.jellysquid.mods.sodium.client.gl.buffer.GlBufferTarget; +import me.jellysquid.mods.sodium.client.gl.buffer.GlBufferUsage; +import me.jellysquid.mods.sodium.client.gl.buffer.GlMutableBuffer; +import me.jellysquid.mods.sodium.client.gl.device.CommandList; +import me.jellysquid.mods.sodium.client.gl.device.RenderDevice; +import me.jellysquid.mods.sodium.client.gl.shader.uniform.GlUniformBlock; +import me.jellysquid.mods.sodium.client.gl.shader.uniform.GlUniformFloat; +import me.jellysquid.mods.sodium.client.gl.shader.uniform.GlUniformInt; +import me.jellysquid.mods.sodium.client.gl.shader.uniform.GlUniformMatrix4f; +import me.jellysquid.mods.sodium.client.gl.util.MultiDrawBatch; +import me.jellysquid.mods.sodium.client.model.vertex.type.ChunkVertexType; +import me.jellysquid.mods.sodium.client.render.chunk.region.RenderRegion; +import net.minecraft.util.math.MathHelper; +import org.joml.Matrix4f; +import org.lwjgl.PointerBuffer; +import org.lwjgl.opengl.GL42C; +import org.lwjgl.opengl.GL43C; +import org.lwjgl.opengl.GLCapabilities; + +import java.nio.IntBuffer; +import java.util.ArrayList; + +public class ComputeShaderInterface { + //These constants must be the same as the constants defined in shaders/blocks/block_layer_translucent_compute.glsl + private static final int LOCAL_BMS = 0; + private static final int LOCAL_DISPERSE = 1; + private static final int GLOBAL_FLIP = 2; + private static final int GLOBAL_DISPERSE = 3; + //1024 is the minimum defined by OpenGL spec. + //Some cards support 2048 but then we may run into workgroup memory issues + private static final int computeWorkGroupSizeX = 1024; + private static final int MEMORY_BARRIERS = GL42C.GL_BUFFER_UPDATE_BARRIER_BIT | GL42C.GL_UNIFORM_BARRIER_BIT; + + public static boolean isSupported(RenderDevice instance) { + GLCapabilities capabilities = instance.getCapabilities(); + return capabilities.OpenGL43 || (capabilities.GL_ARB_compute_shader && capabilities.GL_ARB_shader_storage_buffer_object); + } + + private final GlUniformMatrix4f uniformModelViewMatrix; + private final GlUniformBlock uniformBlockDrawParameters; + private final GlUniformFloat uniformModelScale; + private final GlUniformFloat uniformModelOffset; + private final GlUniformInt uniformExecutionType; + private final GlUniformInt uniformSortHeight; + private final ArrayList pointerList = new ArrayList<>(); + private final ArrayList subDataList = new ArrayList<>(); + + public ComputeShaderInterface(ShaderBindingContext context) { + this.uniformModelViewMatrix = context.bindUniform("u_ModelViewMatrix", GlUniformMatrix4f::new); + this.uniformModelScale = context.bindUniform("u_ModelScale", GlUniformFloat::new); + this.uniformModelOffset = context.bindUniform("u_ModelOffset", GlUniformFloat::new); + this.uniformExecutionType = context.bindUniform("u_ExecutionType", GlUniformInt::new); + this.uniformSortHeight = context.bindUniform("u_SortHeight", GlUniformInt::new); + + this.uniformBlockDrawParameters = context.bindUniformBlock("ubo_DrawParameters", 0); + } + + public void setup(ChunkVertexType vertexType) { + this.uniformModelScale.setFloat(vertexType.getPositionScale()); + this.uniformModelOffset.setFloat(vertexType.getPositionOffset()); + } + + /** + * Executes the compute shader, using multiple calls to glDispatchCompute if + * the data set is too large to be sorted in one call. + */ + public boolean execute(CommandList commandList, MultiDrawBatch batch, RenderRegion.RenderRegionArenas arenas) { + boolean isCheap = true; + pointerList.clear(); + subDataList.clear(); + int chunkCount = 0; + PointerBuffer pointerBuffer = batch.getPointerBuffer(); + IntBuffer countBuffer = batch.getCountBuffer(); + IntBuffer baseVertexBuffer = batch.getBaseVertexBuffer(); + + int lastBaseVertexOffset = baseVertexBuffer.get(0); + int subDataCount = 0; + int totalSubDataCount = 0; + int subDataIndexCount = 0; + + int pointer; + int baseVertex; + int count; + int largestIndexCount = 0; + while(countBuffer.hasRemaining()) { + pointer = (int) (pointerBuffer.get()); + baseVertex = baseVertexBuffer.get(); + count = countBuffer.get(); + + if(baseVertex != lastBaseVertexOffset) { + lastBaseVertexOffset = baseVertex; + + subDataList.add(totalSubDataCount); + subDataList.add(subDataCount); + subDataList.add(subDataIndexCount); + if(subDataIndexCount > largestIndexCount) { + largestIndexCount = subDataIndexCount; + } + chunkCount++; + totalSubDataCount += subDataCount; + subDataCount = 0; + subDataIndexCount = 0; + } + pointerList.add(pointer); //IndexOffset + subDataIndexCount += count; + subDataCount++; + } + subDataList.add(totalSubDataCount); + subDataList.add(subDataCount); + subDataList.add(subDataIndexCount); + if(subDataIndexCount > largestIndexCount) { + largestIndexCount = subDataIndexCount; + } + chunkCount++; + + commandList.bindBufferBase(GlBufferTarget.SHADER_STORAGE_BUFFER, 1, arenas.vertexBuffers.getBufferObject()); + commandList.bindBufferBase(GlBufferTarget.SHADER_STORAGE_BUFFER, 2, arenas.indexBuffers.getBufferObject()); + + GlMutableBuffer shaderBuffer; + + shaderBuffer = commandList.createMutableBuffer(); + commandList.bufferData(GlBufferTarget.SHADER_STORAGE_BUFFER, shaderBuffer, subDataList.stream().mapToInt(i -> i).toArray(), GlBufferUsage.DYNAMIC_DRAW); + commandList.bindBufferBase(GlBufferTarget.SHADER_STORAGE_BUFFER, 3, shaderBuffer); + + shaderBuffer = commandList.createMutableBuffer(); + commandList.bufferData(GlBufferTarget.SHADER_STORAGE_BUFFER, shaderBuffer, pointerList.stream().mapToInt(i -> i).toArray(), GlBufferUsage.DYNAMIC_DRAW); + commandList.bindBufferBase(GlBufferTarget.SHADER_STORAGE_BUFFER, 4, shaderBuffer); + + shaderBuffer = commandList.createMutableBuffer(); + commandList.bufferData(GlBufferTarget.SHADER_STORAGE_BUFFER, shaderBuffer, batch.getCountBuffer(), GlBufferUsage.DYNAMIC_DRAW); + commandList.bindBufferBase(GlBufferTarget.SHADER_STORAGE_BUFFER, 5, shaderBuffer); + + shaderBuffer = commandList.createMutableBuffer(); + commandList.bufferData(GlBufferTarget.SHADER_STORAGE_BUFFER, shaderBuffer, batch.getBaseVertexBuffer(), GlBufferUsage.DYNAMIC_DRAW); + commandList.bindBufferBase(GlBufferTarget.SHADER_STORAGE_BUFFER, 6, shaderBuffer); + + + int maxHeight = (int) Math.pow(2, MathHelper.ceil(Math.log(largestIndexCount / 3)/Math.log(2))); + int groups = (maxHeight / (computeWorkGroupSizeX * 2)) + 1; + int height = computeWorkGroupSizeX * 2; + + //Begin by running a normal bitonic sort on all chunks. + //For chunks whose translucent verticies are < maxComputeWorkGroupSizeX * 3 this + //is the only work that needs to be done. + uniformSortHeight.setInt(height); + uniformExecutionType.setInt(LOCAL_BMS); + GL43C.glDispatchCompute(groups, chunkCount, 1); + GL43C.glMemoryBarrier(MEMORY_BARRIERS); + + height *= 2; + + //Keep getting height bigger until we cover all of n + for(; height <= maxHeight; height *= 2) { + isCheap = false; + uniformExecutionType.set(GLOBAL_FLIP); + uniformSortHeight.set(height); + GL43C.glDispatchCompute(groups, chunkCount, 1); + GL43C.glMemoryBarrier(MEMORY_BARRIERS); + for(int halfHeight = height / 2; halfHeight > 1; halfHeight /= 2) { + uniformSortHeight.set(halfHeight); + if(halfHeight >= computeWorkGroupSizeX * 2) { + uniformExecutionType.set(GLOBAL_DISPERSE); + GL43C.glDispatchCompute(groups, chunkCount, 1); + GL43C.glMemoryBarrier(MEMORY_BARRIERS); + } else { + uniformExecutionType.setInt(LOCAL_DISPERSE); + GL43C.glDispatchCompute(groups, chunkCount, 1); + GL43C.glMemoryBarrier(MEMORY_BARRIERS); + break; + } + } + } + return isCheap; + } + + public void setModelViewMatrix(Matrix4f matrix) { + this.uniformModelViewMatrix.set(matrix); + } + + public void setDrawUniforms(GlMutableBuffer buffer) { + this.uniformBlockDrawParameters.bindBuffer(buffer); + } +} diff --git a/src/main/resources/assets/sodium/shaders/blocks/block_layer_translucent_compute.glsl b/src/main/resources/assets/sodium/shaders/blocks/block_layer_translucent_compute.glsl new file mode 100644 index 0000000000..d9ccb261f0 --- /dev/null +++ b/src/main/resources/assets/sodium/shaders/blocks/block_layer_translucent_compute.glsl @@ -0,0 +1,343 @@ +#version 420 core + +#extension GL_ARB_shader_storage_buffer_object : require +#extension GL_ARB_compute_shader : require + +#define DUMMY_INDEX 10000000 +#define DUMMY_DISTANCE -1000000 + +//These constants must match the definitions in me.jellysquid.mods.sodium.client.render.chunk.shader.ComputeshaderInterface +#define LOCAL_SIZE_X 1024 +#define LOCAL_BMS 0 +#define LOCAL_DISPERSE 1 +#define GLOBAL_FLIP 2 +#define GLOBAL_DISPERSE 3 + +layout(local_size_x = LOCAL_SIZE_X) in; + +struct DrawParameters { +// Older AMD drivers can't handle vec3 in std140 layouts correctly +// The alignment requirement is 16 bytes (4 float components) anyways, so we're not wasting extra memory with this, +// only fixing broken drivers. + vec4 Offset; +}; + +//Define packed vertex data +struct Packed { + uint a_Pos1; //ushort[2] //x,y //The position of the vertex around the model origin + uint a_Pos2; //ushort[2] //z,w + uint a_Color; //The color of the vertex + uint a_TexCoord; // The block texture coordinate of the vertex + uint a_LightCoord; // The light texture coordinate of the vertex +}; + +struct IndexGroup { + uint i1; + uint i2; + uint i3; +}; + +struct ChunkMultiDrawRange { + uint DataOffset; //Offset into the MultiDrawEntry array that this chunk starts + uint DataCount; //How many entries in the MultiDrawEntry array this chunk covers + uint DataIndexCount; //The count of all indicies referenced by this chunk. +}; + +uniform mat4 u_ModelViewMatrix; +uniform float u_ModelScale; +uniform float u_ModelOffset; +uniform int u_IndexOffsetStride = 4; //Number of bits referenced per array entry in regionIndex +uniform int u_IndexLengthStride = 3; //Number of vertices referenced per IndexGroup +uniform int u_ExecutionType; +uniform int u_SortHeight; + +layout(std140, binding = 0) uniform ubo_DrawParameters { + DrawParameters Chunks[256]; +}; + +/* +A chunk is "big" if the number of verts in its translucent mesh is > LOCAL_SIZE_X * 2 * 3. +If a chunk is "big" multiple dispatches are required to fully sort the chunk and therefor the region. + +Compute shaders have 3 levels of granularity: +Dispatch - A call to glDispatchCompute creates a Dispatch consisting of multiple work groups. + The number of work groups per dispatch are defined when calling the dispatch as X, Y, and Z values. + +WorkGroup - For this implementation gl_WorkGroupID.y indicates the chunk within the region that each work group is working on + while gl_WorkGroupID.x indicates the position within the chunk, and is only used for regions where + at least one chunk is "big" + +Invocation or Thread - The smallest unit of a compute shader. There are LOCAL_SIZE_X Invocations for each WorkGroup + Invocations have the distinct advantage of being able to share memory between other invocations + within their work group and also are able to sync execution within their work group. +*/ + +layout(std430, binding = 1) restrict readonly buffer region_mesh_buffer { + Packed regionMesh[]; +}; + +layout(std430, binding = 2) coherent buffer region_index_buffer { + uint regionIndex[]; +}; + +layout(std430, binding = 3) restrict readonly buffer chunk_sub_count { + ChunkMultiDrawRange chunkMultiDrawRange[]; +}; + +layout(std430, binding = 4) restrict readonly buffer index_offset_buffer { + int indexOffset[]; +}; + +layout(std430, binding = 5) restrict readonly buffer index_length_buffer { + int indexLength[]; +}; + +layout(std430, binding = 6) restrict readonly buffer vertex_offset_buffer { + int vertexOffset[]; +}; + +struct IndexDistancePair { + IndexGroup indexGroup; + float distance; +}; + +//Workgroup memory. +shared IndexDistancePair local_value[LOCAL_SIZE_X * 2]; + +uint getIndexOffset(uint i) { + return indexOffset[i] / u_IndexOffsetStride; +} + +uint getIndexLength(uint i) { + return indexLength[i] / u_IndexLengthStride; +} + +ChunkMultiDrawRange getSubInfo() { + return chunkMultiDrawRange[gl_WorkGroupID.y]; +} + +vec4 unpackPos(Packed p) { + uint x = p.a_Pos1 & uint(0xFFFF); + uint y = (p.a_Pos1 >> 16); + uint z = p.a_Pos2 & uint(0xFFFF); + uint w = (p.a_Pos2 >> 16); + return vec4(x,y,z,w); +} + +float getAverageDistance(IndexGroup indexGroup) { + ChunkMultiDrawRange subInfo = getSubInfo(); + uint vOffset = vertexOffset[subInfo.DataOffset]; + + //Nvidia drivers need these variables defined before unpackPos + Packed rm1 = regionMesh[indexGroup.i1 + vOffset]; + Packed rm2 = regionMesh[indexGroup.i2 + vOffset]; + Packed rm3 = regionMesh[indexGroup.i3 + vOffset]; + vec4 rawPosition1 = unpackPos(rm1); + vec4 rawPosition2 = unpackPos(rm2); + vec4 rawPosition3 = unpackPos(rm3); + + float dist12 = length(rawPosition1 - rawPosition2); + float dist23 = length(rawPosition2 - rawPosition3); + float dist31 = length(rawPosition3 - rawPosition1); + vec4 rawPosition; + //TODO There is probably a better way to find the longest side + if(dist12 > dist23) { + if(dist12 > dist31) { + rawPosition = (rawPosition1 + rawPosition2) / 2; + } else { + rawPosition = (rawPosition3 + rawPosition1) / 2; + } + } else { + if(dist23 > dist31) { + rawPosition = (rawPosition2 + rawPosition3) / 2; + } else { + rawPosition = (rawPosition3 + rawPosition1) / 2; + } + } + + vec3 vertexPosition = rawPosition.xyz * u_ModelScale + u_ModelOffset; + vec3 chunkOffset = Chunks[int(rawPosition1.w)].Offset.xyz; + vec4 pos = u_ModelViewMatrix * vec4(chunkOffset + vertexPosition, 1.0); + + return length(pos); +} + +//Convert an index into the indices array from [0..IndicesInChunk] to [0..IndicesInBuffer] +uint getFullIndex(uint index) { + ChunkMultiDrawRange subInfo = getSubInfo(); + uint i = 0; + while(i < subInfo.DataCount) { + uint data = subInfo.DataOffset + i; + if(index < getIndexLength(data)) { + return getIndexOffset(data) + index * u_IndexLengthStride; + } + index = index - getIndexLength(data); + i = i + 1; + } + return DUMMY_INDEX; +} + +IndexGroup readIndexGroup(uint fullIndex) { + return IndexGroup(regionIndex[fullIndex + 0], regionIndex[fullIndex + 1], regionIndex[fullIndex + 2]); +} + +void writeIndexGroup(uint fullIndex, IndexGroup indexGroup) { + regionIndex[fullIndex + 0] = indexGroup.i1; + regionIndex[fullIndex + 1] = indexGroup.i2; + regionIndex[fullIndex + 2] = indexGroup.i3; +} + +// Performs compare-and-swap over elements held in shared, workgroup-local memory +void local_compare_and_swap(uvec2 idx){ + if (local_value[idx.x].distance < local_value[idx.y].distance) { + IndexDistancePair tmp = local_value[idx.x]; + local_value[idx.x] = local_value[idx.y]; + local_value[idx.y] = tmp; + } +} + +// Performs full-height flip (h height) over locally available indices. +void local_flip(uint h){ + uint t = gl_LocalInvocationID.x; + barrier(); + + uint half_h = h >> 1; // Note: h >> 1 is equivalent to h / 2 + ivec2 indices = + ivec2( h * ( ( 2 * t ) / h ) ) + + ivec2( t % half_h, h - 1 - ( t % half_h ) ); + + local_compare_and_swap(indices); +} + +// Performs progressively diminishing disperse operations (starting with height h) +// on locally available indices: e.g. h==8 -> 8 : 4 : 2. +// One disperse operation for every time we can half h. +void local_disperse(in uint h){ + uint t = gl_LocalInvocationID.x; + for ( ; h > 1 ; h /= 2 ) { + + barrier(); + + uint half_h = h >> 1; // Note: h >> 1 is equivalent to h / 2 + ivec2 indices = + ivec2( h * ( ( 2 * t ) / h ) ) + + ivec2( t % half_h, half_h + ( t % half_h ) ); + + local_compare_and_swap(indices); + } +} + +// Perform binary merge sort for local elements, up to a maximum number of elements h. +void local_bms(uint h){ + for (uint hh = 2; hh <= h; hh <<= 1) { // note: h <<= 1 is same as h *= 2 + local_flip(hh); + local_disperse(hh/2); + } +} + +void global_compare_and_swap(uvec2 idx){ + uint i1 = getFullIndex(idx.x); + uint i2 = getFullIndex(idx.y); + if(i1 != DUMMY_INDEX && i2 != DUMMY_INDEX) { + IndexGroup ig1 = readIndexGroup(i1); + IndexGroup ig2 = readIndexGroup(i2); + float distance1 = getAverageDistance(ig1); + float distance2 = getAverageDistance(ig2); + + if (distance1 < distance2) { + writeIndexGroup(i1, ig2); + writeIndexGroup(i2, ig1); + } + } +} + +// Performs full-height flip (h height) in buffer +void global_flip(uint h){ + uint t = gl_GlobalInvocationID.x; + + uint half_h = h >> 1; + uint q = uint((2 * t) / h) * h; + uint x = q + (t % half_h); + uint y = q + h - (t % half_h) - 1; + + global_compare_and_swap(uvec2(x,y)); +} + +// Performs progressively diminishing disperse operations (starting with height h) +// One disperse operation for every time we can half h. +void global_disperse(uint h){ + uint t = gl_GlobalInvocationID.x; + uint half_h = h >> 1; + uint q = uint((2 * t) / h) * h; + uint x = q + (t % half_h); + uint y = q + (t % half_h) + half_h; + global_compare_and_swap(uvec2(x,y)); +} + +void local_main(uint executionType, uint height) { + uint t = gl_LocalInvocationID.x; + uint offset = gl_WorkGroupSize.x * 2 * gl_WorkGroupID.x; + + uint fullIndex1 = getFullIndex(offset+t*2); + uint fullIndex2 = getFullIndex(offset+t*2+1); + IndexGroup rig1 = readIndexGroup(fullIndex1); + IndexGroup rig2 = readIndexGroup(fullIndex2); + float distance1 = getAverageDistance(rig1); + float distance2 = getAverageDistance(rig2); + + if (fullIndex1 == DUMMY_INDEX) { + rig1 = IndexGroup(DUMMY_INDEX, DUMMY_INDEX, DUMMY_INDEX); + distance1 = DUMMY_DISTANCE; + } + if (fullIndex2 == DUMMY_INDEX) { + rig2 = IndexGroup(DUMMY_INDEX, DUMMY_INDEX, DUMMY_INDEX); + distance2 = DUMMY_DISTANCE; + } + + // Each local worker must save two elements to local memory, as there + // are twice as many elments as workers. + local_value[t*2] = IndexDistancePair(rig1, distance1); + local_value[t*2+1] = IndexDistancePair(rig2, distance2); + + if (executionType == LOCAL_BMS) { + local_bms(height); + } + if (executionType == LOCAL_DISPERSE) { + local_disperse(height); + } + + barrier(); + //Write local memory back to buffer + IndexGroup ig1 = local_value[t*2].indexGroup; + IndexGroup ig2 = local_value[t*2+1].indexGroup; + + if (fullIndex1 != DUMMY_INDEX) { + writeIndexGroup(fullIndex1, ig1); + } + if (fullIndex2 != DUMMY_INDEX) { + writeIndexGroup(fullIndex2, ig2); + } +} + +void main(){ + uint height = gl_WorkGroupSize.x * 2; + uint indexLength = getSubInfo().DataIndexCount / u_IndexLengthStride; + uint computeSize = uint(pow(2, ceil(log(indexLength)/log(2)))); + uint usedWorkgroups = (computeSize / (gl_WorkGroupSize.x * 2)) + 1; + + //Exit early for unneeded work groups + if(gl_WorkGroupID.x >= usedWorkgroups) { + return; + } + + if(u_ExecutionType == LOCAL_BMS || u_ExecutionType == LOCAL_DISPERSE) { + local_main(u_ExecutionType, u_SortHeight); + } + + if(u_ExecutionType == GLOBAL_FLIP) { + global_flip(u_SortHeight); + } + if(u_ExecutionType == GLOBAL_DISPERSE) { + global_disperse(u_SortHeight); + } +} \ No newline at end of file