tobspr · kergalym · Feb 17, 2022
diff --git a/data/builtin_models/water/water_foam.png b/data/builtin_models/water/water_foam.png
diff --git a/data/builtin_models/water/water_grid.bam b/data/builtin_models/water/water_grid.bam
diff --git a/effects/projected_water.yaml b/effects/projected_water.yaml
@@ -0,0 +1,144 @@
+# Projected Water effect.
+
+vertex:
+    inout: |
+        uniform vec3 cameraPosition;
+        uniform sampler2D waterHeightfield;
+        uniform float waterHeight;
+        uniform mat4 currentMVP;
+
+    includes: |
+        #pragma include "rpcore/water/shader/projected_water_func.inc.glsl"
+        #pragma include "rpcore/water/shader/position_reconstruction.inc.glsl"
+
+    transform: |
+        vec2 coord = vec2( (p3d_Vertex.xz * 1.1)  * 0.5 + 0.5);
+
+        // Compute ray start and direction
+        vec3 rayStart = cameraPosition;
+        vec3 rayDir = normalize(rayStart - calculateSurfacePos(1.0, coord));
+
+        // Intersect the ray with the water
+        float dist = (-rayStart.z+waterHeight) / rayDir.z;
+        vec3 intersectedPos = vec3(0);
+        float maxWaterDist = 25000.0;
+
+        // If plane is hit
+        if (dist < 0.0) {
+            intersectedPos = rayStart + rayDir * dist;
+        } else {
+            intersectedPos = vec3(rayStart.xy + rayDir.xy * -maxWaterDist, 0.0);
+        }
+
+        vOutput.position = vec3(intersectedPos);
+        vOutput.texcoord = vec2(vOutput.position.xy / WATER_COORD_FACTOR);
+
+        // Fade displacement at borders
+        float fade = 1.0;
+        float fadeArea = 0.12;
+
+        fade *= saturate(coord.x / fadeArea);
+        fade *= saturate( (1.0 - coord.y) / fadeArea);
+
+        fade *= saturate(coord.y / fadeArea);
+        fade *= saturate( (1.0 - coord.y) / fadeArea);
+        // fade = 1.0;
+        float displaceLod = 0.0;
+
+        vec3 displace = textureLod(waterHeightfield, vOutput.texcoord, displaceLod).xyz + textureLod(waterHeightfield,
+                                   vOutput.texcoord * WATER_LOWRES_FACTOR, displaceLod).xyz * 2.0 - 1.0;
+        float displaceFactor = 1.0 - saturate(distance(cameraPosition, intersectedPos) / WATER_DISPLACE_DIST);
+
+        vOutput.position += displace * WATER_DISPLACE * fade * displaceFactor;
+        vOutput.position = (currentMVP * vec4(vOutput.position.xyz, 1)).xyz;
+
+fragment:
+    inout: |
+        uniform sampler2D waterHeightfield;
+        uniform sampler2D waterNormal;
+        uniform sampler2D waterFoam;
+        uniform float waterHeight;
+        uniform vec3 cameraPosition;
+
+        uniform sampler2D terrainHeightmap;
+        uniform vec3 terrainScale;
+        uniform vec3 terrainOffset;
+
+    includes: |
+        #pragma include "rpcore/water/shader/projected_water_func.inc.glsl"
+
+    material: |
+            float hmapSize = textureSize(terrainHeightmap, 0).x;
+            // vec2 terrainCoord = vec2(vOutput.position.xy * terrainScale.xy + terrainOffset.xy) / hmapSize;
+            vec2 terrainCoord = vec2(vOutput.position.xy - terrainOffset.xy) / hmapSize / terrainScale.xy;
+            float terrainHeight = texture(terrainHeightmap, terrainCoord).x * terrainScale.z + terrainOffset.z;
+
+            float sampleLod = saturate( (distance(vOutput.position, cameraPosition)-10.0) / 500.0);
+            sampleLod = pow(log2(1.0 + sampleLod), 0.45);
+            sampleLod = clamp(sampleLod, 0.0, 0.55);
+            sampleLod = 0.0;
+
+            vec3 normalDetail = textureLod(waterNormal, vOutput.texcoord, sampleLod ).xyz;
+            vec3 normalLow = textureLod(waterNormal, vOutput.texcoord * WATER_LOWRES_FACTOR, sampleLod).xyz;
+            vec3 normal = (normalDetail + normalLow) * 0.5;
+
+            vec3 dispDetail = textureLod(waterHeightfield, vOutput.texcoord, sampleLod).xyz;
+            vec3 dispLow = textureLod(waterHeightfield, vOutput.texcoord * WATER_LOWRES_FACTOR, sampleLod).xyz;
+            vec3 disp = (dispDetail + dispLow) * 0.5;
+
+            float displaceFactor = saturate(distance(vOutput.position, cameraPosition) / WATER_DISPLACE_DIST);
+            // displaceFactor = 0.0;
+            normal = mix(normal, vec3(0,0,1), displaceFactor);
+            // normalDetail = mix(vec3(0, 0, 1), normalDetail, displaceFactor);
+
+            float heightDifference = abs(waterHeight - terrainHeight + disp.z * (WATER_DISPLACE).z );
+            float foamFactor = 1.0 - saturate(heightDifference * 0.1);
+
+            foamFactor = 0.0;
+
+            vec3 foam = textureLod(waterFoam, vOutput.texcoord * 0.1, sampleLod).xyz
+                        * textureLod(waterFoam, vOutput.texcoord * 0.2, sampleLod).xyz;
+            foam = pow(foam, vec3(3.0));
+            foam = foam.xxx;
+
+            float fold = max(0.0, pow( mix(normalLow.z * normalDetail.z * 25.0, normalDetail.z, 0.5), 2.1) * 0.2);
+
+            fold *= saturate(1.0 - displaceFactor * 1.0);
+
+            normal = normalize(vec3(normal.x, normal.y, 12.0 / 512.0));
+
+            // normal = vec3(0, 0, 1);
+
+            vec3 groundCol = vec3(0.12, 0.39, 0.5) * 0.05;
+
+            m.basecolor = groundCol;
+            m.basecolor += saturate(pow(disp.z, 0.8)) * vec3(0.18, 0.5, 0.6) * 0.05;
+
+            // m.basecolor *= 2.2;
+
+            // m.basecolor = vec3(0.1,0.2, 0.25) * 0.1;
+            // m.basecolor = vec3(0.0, 0.1, 0.02) * 0.1;
+            // m.basecolor = foam * vec3(1.5,1.0,1.0) * displaceFactor * 1.0;
+            m.basecolor += fold * vec3(1.0,1.0,1.0) * 4.0 * 0.0;
+
+            m.basecolor += foamFactor * foam * 1.0;
+
+            m.basecolor = pow(m.basecolor, vec3(2.0));
+            // m.basecolor *= 0.0;
+
+            #if !defined(IS_TRANSPARENT)
+            //m.translucency = 1.0;
+            #endif
+
+            m.basecolor *= 20.0;
+
+            m.normal = normal * (cameraPosition.z < waterHeight ? -1 : 1);
+            m.metallic = 1.0;
+            m.specular_ior = 1.0;
+            m.roughness = 0.25;
+
+            // if defined(IS_TRANSPARENT)
+            //m.alpha = 1.0 - saturate(foamFactor);
+            //m.alpha = 1.0;
+            // endif
+
diff --git a/rpcore/water/__init__.py b/rpcore/water/__init__.py
@@ -0,0 +1 @@
+__author__ = 'croxis'
diff --git a/rpcore/water/gpu_fft.py b/rpcore/water/gpu_fft.py
@@ -0,0 +1,251 @@
+from panda3d.core import PNMImage, Texture, LVecBase3d, NodePath, Shader, LVecBase3i
+from panda3d.core import ShaderAttrib, LVecBase2i, Vec2
+
+from rpcore.globals import Globals
+
+import math
+
+
+class GPUFFT:
+    """ This is a collection of compute shaders to generate the inverse
+    fft efficiently on the gpu, with butterfly FFT and precomputed weights """
+
+    def __init__(self, size, source_tex, normalization_factor):
+        """ Creates a new fft instance. The source texture has to specified
+        from the begining, as the shaderAttributes are pregenerated for
+        performance reasons """
+
+        self.size = size
+        self.log2_size = int(math.log(size, 2))
+        self.normalization_factor = normalization_factor
+
+        # Create a ping and a pong texture, because we can't write to the
+        # same texture while reading to it (that would lead to unexpected
+        # behaviour, we could solve that by using an appropriate thread size,
+        # but it works fine so far)
+        self.ping_texture = Texture("FFTPing")
+        self.ping_texture.setup_2d_texture(
+            self.size, self.size, Texture.TFloat, Texture.FRgba32)
+        self.pong_texture = Texture("FFTPong")
+        self.pong_texture.setup_2d_texture(
+            self.size, self.size, Texture.TFloat, Texture.FRgba32)
+        self.source_tex = source_tex
+
+        for tex in [self.ping_texture, self.pong_texture, source_tex]:
+            tex.set_minfilter(Texture.FTNearest)
+            tex.set_magfilter(Texture.FTNearest)
+            tex.set_wrap_u(Texture.WMClamp)
+            tex.set_wrap_v(Texture.WMClamp)
+
+        # Pregenerate weights & indices for the shaders
+        self._compute_weighting()
+
+        # Pre generate the shaders, we have 2 passes: Horizontal and Vertical
+        # which both execute log2(N) times with varying radii
+        self.horizontal_fft_shader = Shader.load_compute(Shader.SLGLSL,
+                                                         "/$$rp/rpcore/water/shader/horizontal_fft.compute")
+        self.horizontal_fft = NodePath("HorizontalFFT")
+        self.horizontal_fft.set_shader(self.horizontal_fft_shader)
+        self.horizontal_fft.set_shader_input(
+            "precomputedWeights", self.weights_lookup_tex)
+        self.horizontal_fft.set_shader_input("N", LVecBase2i(self.size))
+
+        self.vertical_fft_shader = Shader.load_compute(Shader.SLGLSL,
+                                                       "/$$rp/rpcore/water/shader/vertical_fft.compute")
+        self.vertical_fft = NodePath("VerticalFFT")
+        self.vertical_fft.set_shader(self.vertical_fft_shader)
+        self.vertical_fft.set_shader_input(
+            "precomputedWeights", self.weights_lookup_tex)
+        self.vertical_fft.set_shader_input("N", LVecBase2i(self.size))
+
+        # Create a texture where the result is stored
+        self.result_texture = Texture("Result")
+        self.result_texture.setup2dTexture(
+            self.size, self.size, Texture.TFloat, Texture.FRgba16)
+        self.result_texture.set_minfilter(Texture.FTLinear)
+        self.result_texture.set_magfilter(Texture.FTLinear)
+
+        # Prepare the shader attributes, so we don't have to regenerate them
+        # every frame -> That is VERY slow (3ms per fft instance)
+        self._prepare_attributes()
+
+    def get_result_texture(self):
+        """ Returns the result texture, only contains valid data after execute
+        was called at least once """
+        return self.result_texture
+
+    def _generate_indices(self, storage_a, storage_b):
+        """ This method generates the precompute indices, see
+        http://cnx.org/content/m12012/latest/image1.png """
+        num_iter = self.size
+        offset = 1
+        step = 0
+        for i in range(self.log2_size):
+            num_iter = num_iter >> 1
+            step = offset
+            for j in range(self.size):
+                goLeft = (j // step) % 2 == 1
+                index_a, index_b = 0, 0
+                if goLeft:
+                    index_a, index_b = j - step, j
+                else:
+                    index_a, index_b = j, j + step
+
+                storage_a[i][j] = index_a
+                storage_b[i][j] = index_b
+            offset = offset << 1
+
+    def _generate_weights(self, storage):
+        """ This method generates the precomputed weights """
+
+        # Using a custom pi variable should force the calculations to use
+        # high precision (I hope so)
+        pi = 3.141592653589793238462643383
+        num_iter = self.size // 2
+        num_k = 1
+        resolution_float = float(self.size)
+        for i in range(self.log2_size):
+            start = 0
+            end = 2 * num_k
+            for b in range(num_iter):
+                K = 0
+                for k in range(start, end, 2):
+                    fK = float(K)
+                    f_num_iter = float(num_iter)
+                    weight_a = Vec2(
+                        math.cos(2.0 * pi * fK * f_num_iter / resolution_float),
+                        -math.sin(2.0 * pi * fK * f_num_iter / resolution_float))
+                    weight_b = Vec2(
+                        -math.cos(2.0 * pi * fK * f_num_iter / resolution_float),
+                        math.sin(2.0 * pi * fK * f_num_iter / resolution_float))
+                    storage[i][k // 2] = weight_a
+                    storage[i][k // 2 + num_k] = weight_b
+                    K += 1
+                start += 4 * num_k
+                end = start + 2 * num_k
+
+            num_iter = num_iter >> 1
+            num_k = num_k << 1
+
+    def _reverse_row(self, indices):
+        """ Reverses the bits in the given row. This is required for inverse
+        fft (actually we perform a normal fft, but reversing the bits gives
+        us an inverse fft) """
+        mask = 0x1
+        for j in range(self.size):
+            val = 0x0
+            temp = int(indices[j])  # Int is required, for making a copy
+            for i in range(self.log2_size):
+                t = mask & temp
+                val = (val << 1) | t
+                temp = temp >> 1
+            indices[j] = val
+
+    def _compute_weighting(self):
+        """ Precomputes the weights & indices, and stores them in a texture """
+        indices_a = [[0 for i in range(self.size)]
+                     for k in range(self.log2_size)]
+        indices_b = [[0 for i in range(self.size)]
+                     for k in range(self.log2_size)]
+        weights = [[Vec2(0.0) for i in range(self.size)]
+                   for k in range(self.log2_size)]
+
+        # Pre-Generating indices ..
+        self._generate_indices(indices_a, indices_b)
+        self._reverse_row(indices_a[0])
+        self._reverse_row(indices_b[0])
+
+        # Pre-Generating weights .."
+        self._generate_weights(weights)
+
+        # Create storage for the weights & indices
+        self.weights_lookup = PNMImage(self.size, self.log2_size, 4)
+        self.weights_lookup.setMaxval((2 ** 16) - 1)
+        self.weights_lookup.fill(0.0)
+
+        # Populate storage
+        for x in range(self.size):
+            for y in range(self.log2_size):
+                index_a = indices_a[y][x]
+                index_b = indices_b[y][x]
+                weight = weights[y][x]
+
+                self.weights_lookup.set_red(x, y, index_a / float(self.size))
+                self.weights_lookup.set_green(x, y, index_b / float(self.size))
+                self.weights_lookup.set_blue(x, y, weight.x * 0.5 + 0.5)
+                self.weights_lookup.set_alpha(x, y, weight.y * 0.5 + 0.5)
+
+        # Convert storage to texture so we can use it in a shader
+        self.weights_lookup_tex = Texture("Weights Lookup")
+        self.weights_lookup_tex.load(self.weights_lookup)
+        self.weights_lookup_tex.set_format(Texture.FRgba16)
+        self.weights_lookup_tex.set_minfilter(Texture.FTNearest)
+        self.weights_lookup_tex.set_magfilter(Texture.FTNearest)
+        self.weights_lookup_tex.set_wrap_u(Texture.WMClamp)
+        self.weights_lookup_tex.set_wrap_v(Texture.WMClamp)
+
+    def _prepare_attributes(self):
+        """ Prepares all shaderAttributes, so that we have a list of
+        ShaderAttributes we can simply walk through in the update method,
+        that is MUCH faster than using set_shader_input, as each call to
+        set_shader_input forces the generation of a new ShaderAttrib """
+        self.attributes = []
+        textures = [self.ping_texture, self.pong_texture]
+
+        current_index = 0
+        firstPass = True
+
+        # Horizontal
+        for step in range(self.log2_size):
+            source = textures[current_index]
+            dest = textures[1 - current_index]
+
+            if firstPass:
+                source = self.source_tex
+                firstPass = False
+
+            index = self.log2_size - step - 1
+            self.horizontal_fft.set_shader_input("source", source)
+            self.horizontal_fft.set_shader_input("dest", dest)
+            self.horizontal_fft.set_shader_input(
+                "butterflyIndex", LVecBase2i(index))
+            self._queue_shader(self.horizontal_fft)
+            current_index = 1 - current_index
+
+        # Vertical
+        for step in range(self.log2_size):
+            source = textures[current_index]
+            dest = textures[1 - current_index]
+            is_last_pass = step == self.log2_size - 1
+            if is_last_pass:
+                dest = self.result_texture
+            index = self.log2_size - step - 1
+            self.vertical_fft.set_shader_input("source", source)
+            self.vertical_fft.set_shader_input("dest", dest)
+            self.vertical_fft.set_shader_input(
+                "isLastPass", is_last_pass)
+            self.vertical_fft.set_shader_input(
+                "normalizationFactor", self.normalization_factor)
+            self.vertical_fft.set_shader_input(
+                "butterflyIndex", LVecBase2i(index))
+            self._queue_shader(self.vertical_fft)
+
+            current_index = 1 - current_index
+
+    def execute(self):
+        """ Executes the inverse fft once """
+        for attr in self.attributes:
+            self._execute_shader(attr)
+
+    def _queue_shader(self, node):
+        """ Internal method to fetch the ShaderAttrib of a node and store it
+        in the update queue """
+        sattr = node.getAttrib(ShaderAttrib)
+        self.attributes.append(sattr)
+
+    def _execute_shader(self, sattr):
+        """ Internal method to execute a shader by a given ShaderAttrib """
+        Globals.base.graphicsEngine.dispatch_compute(
+            (self.size // 16, self.size // 16, 1),
+            sattr,
+            Globals.base.win.get_gsg())