data/shaders/ism/pull.comp

#version 430

#extension GL_ARB_shading_language_include : require
#include </data/shaders/common/floatpacking.glsl>

#define LEVEL_ZERO

layout (local_size_x = 8, local_size_y = 8) in;

layout (binding = 0) uniform usampler2D softrenderBuffer;
layout (rgba32f, binding = 0) restrict readonly uniform image2D ismDepthImage;
layout (rgba32f, binding = 1) restrict writeonly uniform image2D img_output;

uniform int level;
uniform float zFar;
uniform int ismPixelSize;

const float infinity = 1. / 0.;

void main()
{
    ivec2 outputPixelCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + gl_LocalInvocationID.xy);

    ivec2[4] offsets = { {0,0}, {0,1}, {1,0}, {1,1} };

    float[4] depthSamples;
    float[4] maxDepths;
    float[4] radiuses;
    vec2[4] displacementVectors;
    bool[4] valid;

    for (int i = 0; i < 4; i++) {
        ivec2 inputPixelCoord = outputPixelCoord * 2 + offsets[i];
        # ifdef LEVEL_ZERO
            // float depthSample = float(texelFetch(softrenderBuffer, ivec3(inputPixelCoord, 0), 0).r) / (1 << 24);
            // uint normalRadius = texelFetch(softrenderBuffer, inputPixelCoord, 0).r;
            uint depthRadiusSample = texelFetch(softrenderBuffer, ivec2(inputPixelCoord), 0).r;
            float depthSample = float(depthRadiusSample >> 8) / (1 << 24);
            float radius = float(depthRadiusSample & 0xFFu) / 10;

            vec2 displacementVector = vec2(0.0);
        # else
            vec4 read = imageLoad(ismDepthImage, inputPixelCoord);
            float depthSample = read.r;
            float maxDepth = read.g;
            vec2 displacementVector = unpack2FloatsFromFloat(read.a);
            float radius = read.b;
        # endif


        #ifdef LEVEL_ZERO
            // the projection is performed here, not in ism.comp,
            // as the *world* radius, not projected radius, is needed for the maxDepth calculation here.

            float magicFactor = 0.6; // hand-tuned to make things look better
            // radius * 2 since the next point on the same surface is 2r away.
            float maxDepth = depthSample + (radius * 2) / zFar * magicFactor;

            // radius is in world units so far, project & convert to pixels
            float distToCamera = depthSample * zFar;
            radius = radius / distToCamera / 3.14 * ismPixelSize; // approximation that breaks especially for near points.
            // boost radius a bit to make circle area match the point rendering square area
            radius *= 1.3;
            // clamp to avoid overly large points ruining everything
            radius = min(radius, 15);
        #endif

        // radius check
        vec2 newDisplacementVector = (inputPixelCoord + 0.5 + displacementVector) / 2 - (outputPixelCoord + 0.5);
        float dist = length(newDisplacementVector);

        float scaledRadius = radius * pow(2, -level);

        bool radiusCheckPassed = dist <= scaledRadius;

        depthSamples[i] = depthSample;
        maxDepths[i] = maxDepth;
        radiuses[i] = radius;
        displacementVectors[i] = newDisplacementVector; // TODO blocky results, but are round when displacment vector set to 0
        valid[i] = depthSample != 1.0;
        valid[i] = valid[i] && radiusCheckPassed; // TODO unknown whether this helps
    }

    float minimum = infinity;
    float maxDepth;
    for (int i = 0; i < 4; i++) {
        if (!valid[i])
            continue;
        minimum = min(depthSamples[i], minimum);
        if (minimum == depthSamples[i])
            maxDepth = maxDepths[i];
    }

    for(int i = 0; i < 4; i++) {
        if (depthSamples[i] > maxDepth)
            valid[i] = false;
    }

    float depthAcc = 0.0;
    float radiusAcc = 0.0;
    vec2 displacementAcc = vec2(0.0);
    uint numValid = 0;
    float maxValidMaxDepth = 0.0;
    for (int i = 0; i < 4; i++) {
        if (!valid[i])
            continue;

        depthAcc += depthSamples[i];
        displacementAcc += displacementVectors[i];
        radiusAcc += radiuses[i];
        maxValidMaxDepth = max(maxValidMaxDepth, maxDepths[i]);
        numValid++;
    }

    vec4 result;
    if (numValid > 0) {
        result.r = depthAcc / numValid;
        result.g = maxValidMaxDepth;
        result.b = radiusAcc / numValid;
        result.a = pack2FloatsToFloat(displacementAcc / numValid);
    } else {
        result = vec4(1.0, 0.0, 0.0, 0.0);
    }

    imageStore(img_output, outputPixelCoord, result);
}