CIS565-Fall-2016 · WindyDarian · Nov 9, 2016 · Nov 12, 2016 · Nov 12, 2016 · Nov 12, 2016
diff --git a/README.md b/README.md
@@ -3,13 +3,39 @@ Vulkan Flocking: compute and shading in one pipeline!
 
 **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 6**
 
-* (TODO) YOUR NAME HERE
-  Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab)
+* Ruoyu Fan
+* Tested on: Windows 10 x64, i7-4720HQ @ 2.60GHz, 16GB Memory, GTX 970M 3072MB (personal laptop)
+  * Visual Studio 2015 & LunarG Vulkan SDK 1.0.30.0
 
-  ### (TODO: Your README)
+![](screenshots/1.gif)
 
-  Include screenshots, analysis, etc. (Remember, this is public, so don't put
-  anything here that you don't want to share with the world.)
+### Q&A
+
+> * Why do you think Vulkan expects explicit descriptors for things like
+generating pipelines and commands? HINT: this may relate to something in the
+comments about some components using pre-allocated GPU memory.
+
+Because command buffers in Vulkan lives in pre-allocated GPU command pool, and we cannot
+update them once created, they need updatable descriptor sets to figure out which
+buffers to operate on and how to correctly map data from buffers to inputs and outputs of every stages of the pipeline.
+This way we can use a single command buffer to operate on varying data.
+
+> * Describe a situation besides flip-flop buffers in which you may need multiple
+descriptor sets to fit one descriptor layout.
+
+For example, in deferred shading pipeline's debug view, instead of passing current state and all g-buffers into debug fragment shader, I can use depth/color/normal maps as different descriptor sets in one descriptor layout, and use different sets according to current configration
+
+> * What are some problems to keep in mind when using multiple Vulkan queues?
+>   * take into consideration that different queues may be backed by different hardware
+>   * take into consideration that the same buffer may be used across multiple queues>
+
+* Queue operations on different queues have no implicit ordering constraints, and may execute in any order. Explicit ordering constraints between queues can be expressed with semaphores and fences. (https://www.khronos.org/registry/vulkan/specs/1.0/xhtml/vkspec.html#fundamentals-queueoperation)
+* When two queues are operating on the same buffer, we need to take race condition into consideration.
+
+> * What is one advantage of using compute commands that can share data with a
+rendering pipeline?
+
+Don't need to copy the inputs and outputs of compute and render stages around, that might be some giant amount of data for copying.
 
 ### Credits
 

diff --git a/data/shaders/computeparticles/generate-spirv.bat b/data/shaders/computeparticles/generate-spirv.bat
@@ -1,5 +1,3 @@
-glslangvalidator -V particle.frag -o particle.frag.spv
-glslangvalidator -V particle.vert -o particle.vert.spv
-glslangvalidator -V particle.comp -o particle.comp.spv
-
-
+glslangvalidator -V particle.frag.glsl -o particle.frag.spv -S frag
+glslangvalidator -V particle.vert.glsl -o particle.vert.spv -S vert
+glslangvalidator -V particle.comp.glsl -o particle.comp.spv -S comp
diff --git a/data/shaders/computeparticles/particle.comp b/data/shaders/computeparticles/particle.comp
diff --git a/data/shaders/computeparticles/particle.comp.glsl b/data/shaders/computeparticles/particle.comp.glsl
@@ -0,0 +1,131 @@
+#version 450
+
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_ARB_shading_language_420pack : enable
+
+struct Particle
+{
+    vec2 pos;
+    vec2 vel;
+};
+
+// LOOK: These bindings correspond to the DescriptorSetLayouts and
+// the DescriptorSets from prepareCompute()!
+
+// Binding 0 : Particle storage buffer (read)
+layout(std140, binding = 0) buffer ParticlesA
+{
+   Particle particlesA[ ];
+};
+
+// Binding 1 : Particle storage buffer (write)
+layout(std140, binding = 1) buffer ParticlesB
+{
+   Particle particlesB[ ];
+};
+
+layout (local_size_x = 16, local_size_y = 16) in;
+
+// LOOK: rule weights and distances, as well as particle count, based off uniforms.
+// The deltaT here has to be updated every frame to account for changes in
+// frame rate.
+layout (binding = 2) uniform UBO
+{
+    float deltaT;
+    float rule1Distance;
+    float rule2Distance;
+    float rule3Distance;
+    float rule1Scale;
+    float rule2Scale;
+    float rule3Scale;
+    int particleCount;
+} ubo;
+
+void main()
+{
+    // LOOK: This is very similar to a CUDA kernel.
+    // Right now, the compute shader only advects the particles with their
+    // velocity and handles wrap-around.
+    // DONE: implement flocking behavior.
+
+    // Current SSBO index
+    uint index = gl_GlobalInvocationID.x;
+    // Don't try to write beyond particle count
+    if (index >= ubo.particleCount)
+        return;
+
+    // Read position and velocity
+    vec2 vPos = particlesA[index].pos.xy;
+    vec2 vVel = particlesA[index].vel.xy;
+
+    vec2 delta_vel = vec2(0.0);
+    vec2 rule1_neighbor_pos_sum = vec2(0.0);
+    float rule1_neighbor_count = 0.0;
+    vec2 rule2_total_offset = vec2(0.0);
+    vec2 rule3_neighbor_vel_sum = vec2(0.0);
+    float rule3_neighbor_count = 0.0;
+
+    vec2 current_offset;
+    float current_distance;
+    for (int i = 0; i < ubo.particleCount; i++)
+    {
+        if (i == index) continue;
+
+        current_offset = particlesA[i].pos.xy - vPos;
+        current_distance = length(current_offset);
+
+        // Rule 1: Get neighbor position sum and neighbor count for rule1
+        if (current_distance < ubo.rule1Distance)
+        {
+            rule1_neighbor_pos_sum += particlesA[i].pos.xy;
+            rule1_neighbor_count += 1.0;
+        }
+        // Rule 2: Calculate offset for rule 2
+        if (current_distance < ubo.rule2Distance)
+        {
+            rule2_total_offset -= current_offset;
+        }
+        // Rule 3: Get velocity sum and neighbor count for rule 3
+        if (current_distance < ubo.rule3Distance)
+        {
+            rule3_neighbor_vel_sum += particlesA[i].vel.xy;
+            rule3_neighbor_count += 1.0;
+        }
+
+    }
+
+    // Rule 1: boids fly towards their local perceived center of mass, which excludes themselves
+    if (rule1_neighbor_count > 0.0)
+    {
+        delta_vel += ubo.rule1Scale * ((rule1_neighbor_pos_sum / rule1_neighbor_count) - vPos);
+    }
+
+    // Rule 2: boids try to stay a distance d away from each other
+    delta_vel += ubo.rule2Scale *  rule2_total_offset;
+
+    // Rule 3: boids try to match the speed of surrounding boids
+    if (rule3_neighbor_count > 0.0)
+    {
+        delta_vel += ubo.rule3Scale * (rule3_neighbor_vel_sum / rule3_neighbor_count); // said this looks better using the parameters
+        //delta_vel += ubo.rule3Scale * ((rule3_neighbor_vel_sum / rule3_neighbor_count) - vVel);
+    }
+
+    vVel += delta_vel;
+
+    // clamp velocity for a more pleasing simulation.
+    vVel = normalize(vVel) * clamp(length(vVel), 0.0, 0.1);
+
+    // kinematic update
+    vPos += vVel * ubo.deltaT;
+
+    // Wrap around boundary
+    if (vPos.x < -1.0) vPos.x = 1.0;
+    if (vPos.x > 1.0) vPos.x = -1.0;
+    if (vPos.y < -1.0) vPos.y = 1.0;
+    if (vPos.y > 1.0) vPos.y = -1.0;
+
+    particlesB[index].pos.xy = vPos;
+
+    // Write back
+    particlesB[index].vel.xy = vVel;
+}
diff --git a/data/shaders/computeparticles/particle.comp.spv b/data/shaders/computeparticles/particle.comp.spv
diff --git a/data/shaders/computeparticles/particle.frag → ...aders/computeparticles/particle.frag.glsl b/data/shaders/computeparticles/particle.frag → ...aders/computeparticles/particle.frag.glsl
diff --git a/data/shaders/computeparticles/particle.vert → ...aders/computeparticles/particle.vert.glsl b/data/shaders/computeparticles/particle.vert → ...aders/computeparticles/particle.vert.glsl
diff --git a/screenshots/1.gif b/screenshots/1.gif
diff --git a/screenshots/11.12.2016_progress_1.jpg b/screenshots/11.12.2016_progress_1.jpg
diff --git a/screenshots/2.gif b/screenshots/2.gif
diff --git a/vulkanBoids/vulkanBoids.cpp b/vulkanBoids/vulkanBoids.cpp
@@ -22,12 +22,13 @@
 #define GLM_FORCE_DEPTH_ZERO_TO_ONE
 #include <glm/glm.hpp>
 #include <glm/gtc/matrix_transform.hpp>
+#include <glm/gtc/random.hpp>
 
 #include <vulkan/vulkan.h>
 #include "vulkanexamplebase.h"
 
 #define VERTEX_BUFFER_BIND_ID 0
-#define ENABLE_VALIDATION true // LOOK: toggle Vulkan validation layers. These make debugging much easier!
+#define ENABLE_VALIDATION false // LOOK: toggle Vulkan validation layers. These make debugging much easier!
 #define PARTICLE_COUNT 4 * 1024 // LOOK: change particle count here
 
 // LOOK: constants for the boids algorithm. These will be passed to the GPU compute part of the assignment
@@ -157,7 +158,8 @@ class VulkanExample : public VulkanExampleBase
 		for (auto& particle : particleBuffer)
 		{
 			particle.pos = glm::vec2(rDistribution(rGenerator), rDistribution(rGenerator));
-			// TODO: add randomized velocities with a slight scale here, something like 0.1f.
+			// DONE: add randomized velocities with a slight scale here, something like 0.1f.
+			particle.vel = glm::diskRand(0.1f);
 		}
 
 		VkDeviceSize storageBufferSize = particleBuffer.size() * sizeof(Particle);
@@ -244,7 +246,7 @@ class VulkanExample : public VulkanExampleBase
 			VERTEX_BUFFER_BIND_ID,
 			1,
 			VK_FORMAT_R32G32_SFLOAT,
-			offsetof(Particle, pos)); // TODO: change this so that we can color the particles based on velocity.
+			offsetof(Particle, vel)); // DONE: change this so that we can color the particles based on velocity.
 
 		// vertices.inputState encapsulates everything we need for these particular buffers to
 		// interface with the graphics pipeline.
@@ -540,14 +542,37 @@ class VulkanExample : public VulkanExampleBase
 			compute.descriptorSets[0],
 			VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
 			2,
-			&compute.uniformBuffer.descriptor)
+			&compute.uniformBuffer.descriptor),
 
-			// TODO: write the second descriptorSet, using the top for reference.
+			// DONE: write the second descriptorSet, using the top for reference.
 			// We want the descriptorSets to be used for flip-flopping:
 			// on one frame, we use one descriptorSet with the compute pass,
 			// on the next frame, we use the other.
 			// What has to be different about how the second descriptorSet is written here?
+
+			// Binding 0 : Particle position storage buffer
+			vkTools::initializers::writeDescriptorSet(
+				compute.descriptorSets[1], 
+				VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+				0, // LOOK: which binding in the descriptor set Layout?
+				&compute.storageBufferB.descriptor), 
+
+			 // Binding 1 : Particle position storage buffer
+			vkTools::initializers::writeDescriptorSet(
+				compute.descriptorSets[1],
+				VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+				1,
+				&compute.storageBufferA.descriptor),
+
+			// Binding 2 : Uniform buffer
+			vkTools::initializers::writeDescriptorSet(
+				compute.descriptorSets[1],
+				VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+				2,
+				&compute.uniformBuffer.descriptor)
 		};
+
+
 
 		vkUpdateDescriptorSets(device, static_cast<uint32_t>(computeWriteDescriptorSets.size()), computeWriteDescriptorSets.data(), 0, NULL);
 	}
@@ -568,7 +593,7 @@ class VulkanExample : public VulkanExampleBase
 		VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE));
 
 		VulkanExampleBase::submitFrame();
-
+		
 		// LOOK: wait for fence that was submitted with the compute commandBuffer to complete.
 		// Then, reset it for the next round of compute
 		vkWaitForFences(device, 1, &compute.fence, VK_TRUE, UINT64_MAX);
@@ -583,13 +608,16 @@ class VulkanExample : public VulkanExampleBase
 		// are done executing.
 		VK_CHECK_RESULT(vkQueueSubmit(compute.queue, 1, &computeSubmitInfo, compute.fence));
 
-		// TODO: handle flip-flop logic. We want the next iteration to
+		// DONE: handle flip-flop logic. We want the next iteration to
 		// run the compute pipeline with flipped SSBOs, so we have to
 		// swap the descriptorSets, which each allow access to the SSBOs
 		// in one configuration.
 		// We also want to flip what SSBO we draw with in the next
 		// pass through the graphics pipeline.
 		// Feel free to use std::swap here. You should need it twice.
+		std::swap(compute.descriptorSets[0], compute.descriptorSets[1]);
+		std::swap(compute.storageBufferA, compute.storageBufferB);
+		// TODO: ping-pong command buffers?
 	}
 
 	// Record command buffers for drawing using the graphics pipeline
@@ -639,7 +667,9 @@ class VulkanExample : public VulkanExampleBase
 			// How does this influence flip-flopping in draw()?
 			// Try drawing with storageBufferA instead of storageBufferB. What happens? Why?
 			VkDeviceSize offsets[1] = { 0 };
+			//vkCmdBindVertexBuffers(drawCmdBuffers[i], VERTEX_BUFFER_BIND_ID, 1, &compute.storageBufferA.buffer, offsets);
 			vkCmdBindVertexBuffers(drawCmdBuffers[i], VERTEX_BUFFER_BIND_ID, 1, &compute.storageBufferB.buffer, offsets);
+
 			vkCmdDraw(drawCmdBuffers[i], PARTICLE_COUNT, 1, 0, 0);
 
 			vkCmdEndRenderPass(drawCmdBuffers[i]);