diff --git a/cmd/render.go b/cmd/render.go index 5fc5a62..7cda514 100644 --- a/cmd/render.go +++ b/cmd/render.go @@ -132,14 +132,16 @@ func RenderInteractive(ctx *cli.Context) error { return err } - // Update projection matrix + // Due to the way that gl.TexSubImage2D works we need to + // generate a mirrored image of the frame buffer. + sc.Camera.InvertY = true sc.Camera.SetupProjection(float32(opts.FrameW) / float32(opts.FrameH)) // Setup tracing pipeline pipeline := opencl.DefaultPipeline(opencl.NoDebug) // Create renderer - r, err := renderer.NewInteractive(sc, tracer.NaiveScheduler(), pipeline, opts) + r, err := renderer.NewInteractive(sc, tracer.PerfectScheduler(), pipeline, opts) if err != nil { return err } diff --git a/renderer/default.go b/renderer/default.go index 98961ae..e36d1ac 100644 --- a/renderer/default.go +++ b/renderer/default.go @@ -135,6 +135,14 @@ func (r *defaultRenderer) renderFrame(accumulatedSamples uint32) error { blockReq.BlockY += blockH } + var tot uint32 = 0 + for _, bh := range r.blockAssignments { + tot += bh + } + if tot != r.options.FrameH { + fmt.Printf("S(assigned blocks) = %d != %d\n", tot, r.options.FrameH) + } + // Wait for all tracers to finish pending := len(r.tracers) for pending != 0 { @@ -181,8 +189,8 @@ func (r *defaultRenderer) jobWorker(trIndex int) { } _, err := r.tracers[trIndex].Trace(&blockReq) - if err == nil && trIndex != r.primary { - // Merge accumulator output with primary tracer + if err == nil { + // Merge trace accumulator output for this pass with primary tracer's frame accumulator _, err = r.tracers[r.primary].MergeOutput(r.tracers[trIndex], &blockReq) } r.jobCompleteChan <- err diff --git a/tracer/opencl/CL/kernels/accumulator.cl b/tracer/opencl/CL/kernels/accumulator.cl index 92258b1..dbda2ae 100644 --- a/tracer/opencl/CL/kernels/accumulator.cl +++ b/tracer/opencl/CL/kernels/accumulator.cl @@ -3,10 +3,19 @@ // Clear accumulation buffer __kernel void clearAccumulator( - __global float3 *accumulator, - const uint frameW + __global float3 *accumulator ){ - accumulator[(get_global_id(1) * frameW) + get_global_id(0)] = (float3)(0.0f, 0.0f, 0.0f); + accumulator[get_global_id(0)] = (float3)(0.0f, 0.0f, 0.0f); +} + + +// Aggregate trace accumulator to the primary tracer's frame accumulator +__kernel void aggregateAccumulator( + __global float3 *srcAccumulator, + __global float3 *dstAccumulator + ){ + int globalId = get_global_id(0); + dstAccumulator[globalId] += srcAccumulator[globalId]; } #endif diff --git a/tracer/opencl/buffers.go b/tracer/opencl/buffers.go index 4d4bafb..ba101d0 100644 --- a/tracer/opencl/buffers.go +++ b/tracer/opencl/buffers.go @@ -52,7 +52,16 @@ type bufferSet struct { HitFlags *device.Buffer Intersections *device.Buffer - Accumulator *device.Buffer + // A buffer that stores trace samples for a single trace request. It is + // cleared before starting a new trace. + TraceAccumulator *device.Buffer + + // A buffer that aggregates the trace accumulator content between + // multiple frames. All post-processing pipeline stages operate on + // this buffer. The buffer is cleared when the pipeline Reset stage + // is executed. + FrameAccumulator *device.Buffer + EmissiveSamples *device.Buffer DebugOutput *device.Buffer @@ -82,12 +91,13 @@ func newBufferSet(dev *device.Device) *bufferSet { dev.Buffer("rays1"), dev.Buffer("rays2"), }, - Paths: dev.Buffer("paths"), - HitFlags: dev.Buffer("hitFlags"), - Intersections: dev.Buffer("intersections"), - EmissiveSamples: dev.Buffer("emissiveSamples"), - Accumulator: dev.Buffer("accumulator"), - DebugOutput: dev.Buffer("debugOutput"), + Paths: dev.Buffer("paths"), + HitFlags: dev.Buffer("hitFlags"), + Intersections: dev.Buffer("intersections"), + EmissiveSamples: dev.Buffer("emissiveSamples"), + TraceAccumulator: dev.Buffer("traceAccumulator"), + FrameAccumulator: dev.Buffer("frameAccumulator"), + DebugOutput: dev.Buffer("debugOutput"), RayCounters: [3]*device.Buffer{ dev.Buffer("numRays0"), dev.Buffer("numRays1"), @@ -144,7 +154,11 @@ func (bs *bufferSet) Resize(frameW, frameH uint32) error { if err != nil { return err } - err = bs.Accumulator.Allocate(int(pixels*sizeofAccumulatorSample), cl.MEM_READ_WRITE) + err = bs.TraceAccumulator.Allocate(int(pixels*sizeofAccumulatorSample), cl.MEM_READ_WRITE) + if err != nil { + return err + } + err = bs.FrameAccumulator.Allocate(int(pixels*sizeofAccumulatorSample), cl.MEM_READ_WRITE) if err != nil { return err } diff --git a/tracer/opencl/kernel_type.go b/tracer/opencl/kernel_type.go index 018c13f..ff0c2d4 100644 --- a/tracer/opencl/kernel_type.go +++ b/tracer/opencl/kernel_type.go @@ -19,9 +19,10 @@ const ( accumulateEmissiveSamples // hdr kernels tonemapSimpleReinhard - // utils + // accumulator clearAccumulator - // Debugging + aggregateAccumulator + // debugging debugClearBuffer debugRayIntersectionDepth debugRayIntersectionNormals @@ -55,6 +56,8 @@ func (kt kernelType) String() string { return "tonemapSimpleReinhard" case clearAccumulator: return "clearAccumulator" + case aggregateAccumulator: + return "aggregateAccumulator" case debugClearBuffer: return "debugClearBuffer" case debugRayIntersectionDepth: diff --git a/tracer/opencl/pipeline.go b/tracer/opencl/pipeline.go index 8187a0d..db9a58b 100644 --- a/tracer/opencl/pipeline.go +++ b/tracer/opencl/pipeline.go @@ -69,10 +69,10 @@ func DefaultPipeline(debugFlags DebugFlag) *Pipeline { return pipeline } -// Clear the accumulator buffer. +// Clear the frame accumulator buffer. func ClearAccumulator() PipelineStage { return func(tr *Tracer, blockReq *tracer.BlockRequest) (time.Duration, error) { - return tr.resources.ClearAccumulator(blockReq) + return tr.resources.ClearFrameAccumulator(blockReq) } } @@ -101,6 +101,11 @@ func MonteCarloIntegrator(debugFlags DebugFlag) PipelineStage { var activeRayBuf uint32 = 0 + _, err = tr.resources.ClearTraceAccumulator(blockReq) + if err != nil { + return time.Since(start), err + } + // Intersect primary rays outside of the loop // Use packet query intersector for GPUs as opencl forces CPU // to use a local workgroup size equal to 1 diff --git a/tracer/opencl/resources.go b/tracer/opencl/resources.go index bc45e0e..e7007a2 100644 --- a/tracer/opencl/resources.go +++ b/tracer/opencl/resources.go @@ -77,18 +77,50 @@ func (dr *deviceResources) Close() { } } -// Clear a rectangular region of the output accumulator. -func (dr *deviceResources) ClearAccumulator(blockReq *tracer.BlockRequest) (time.Duration, error) { +// Clear the frame accumulator. +func (dr *deviceResources) ClearFrameAccumulator(blockReq *tracer.BlockRequest) (time.Duration, error) { kernel := dr.kernels[clearAccumulator] err := kernel.SetArgs( - dr.buffers.Accumulator, - blockReq.BlockW, + dr.buffers.FrameAccumulator, ) if err != nil { return 0, err } - return kernel.Exec2D(0, int(blockReq.BlockY), int(blockReq.BlockW), int(blockReq.BlockH), 0, 0) + return kernel.Exec1D(0, int(blockReq.FrameW*blockReq.FrameH), 0) +} + +// Clear the trace accumulator. +func (dr *deviceResources) ClearTraceAccumulator(blockReq *tracer.BlockRequest) (time.Duration, error) { + kernel := dr.kernels[clearAccumulator] + err := kernel.SetArgs( + dr.buffers.TraceAccumulator, + ) + if err != nil { + return 0, err + } + + return kernel.Exec1D(0, int(blockReq.FrameW*blockReq.FrameH), 0) +} + +// Aggregate the trace accumulator contents from another tracer into +// this tracer's frame accumulator. +func (dr *deviceResources) AggregateAccumulator(srcAccumulator *device.Buffer, blockReq *tracer.BlockRequest) (time.Duration, error) { + kernel := dr.kernels[aggregateAccumulator] + err := kernel.SetArgs( + srcAccumulator, + dr.buffers.FrameAccumulator, + ) + if err != nil { + return 0, err + } + + // Add the contents of block specified by blockReq + return kernel.Exec1D( + int(blockReq.FrameW*blockReq.BlockY), + int(blockReq.BlockW*blockReq.BlockH), + 0, + ) } // Generate primary rays. @@ -231,7 +263,7 @@ func (dr *deviceResources) ShadeHits(bounce, minBouncesForRR, randSeed, numEmiss dr.buffers.Rays[1-rayBufferIndex], dr.buffers.RayCounters[1-rayBufferIndex], // - dr.buffers.Accumulator, + dr.buffers.TraceAccumulator, ) if err != nil { return 0, err @@ -255,7 +287,7 @@ func (dr *deviceResources) ShadePrimaryRayMisses(diffuseMatNodeIndex, rayBufferI diffuseMatNodeIndex, dr.buffers.TextureMetadata, dr.buffers.Textures, - dr.buffers.Accumulator, + dr.buffers.TraceAccumulator, ) if err != nil { return 0, err @@ -279,7 +311,7 @@ func (dr *deviceResources) ShadeIndirectRayMisses(diffuseMatNodeIndex, rayBuffer diffuseMatNodeIndex, dr.buffers.TextureMetadata, dr.buffers.Textures, - dr.buffers.Accumulator, + dr.buffers.TraceAccumulator, ) if err != nil { return 0, err @@ -299,7 +331,7 @@ func (dr *deviceResources) AccumulateEmissiveSamples(rayBufferIndex uint32, numP dr.buffers.Paths, dr.buffers.HitFlags, dr.buffers.EmissiveSamples, - dr.buffers.Accumulator, + dr.buffers.TraceAccumulator, ) if err != nil { return 0, err @@ -314,7 +346,7 @@ func (dr *deviceResources) TonemapSimpleReinhard(blockReq *tracer.BlockRequest) numPixels := int(blockReq.FrameW * blockReq.BlockH) sampleWeight := float32(1.0 / float32(blockReq.AccumulatedSamples+blockReq.SamplesPerPixel)) err := kernel.SetArgs( - dr.buffers.Accumulator, + dr.buffers.FrameAccumulator, dr.buffers.Paths, dr.buffers.FrameBuffer, sampleWeight, @@ -480,7 +512,7 @@ func (dr *deviceResources) DebugAccumulator(blockReq *tracer.BlockRequest) (time err = kernel.SetArgs( sampleWeight, dr.buffers.Paths, - dr.buffers.Accumulator, + dr.buffers.TraceAccumulator, dr.buffers.DebugOutput, ) if err != nil { diff --git a/tracer/opencl/tracer.go b/tracer/opencl/tracer.go index 0bcbbc2..4222ed6 100644 --- a/tracer/opencl/tracer.go +++ b/tracer/opencl/tracer.go @@ -233,6 +233,8 @@ func (tr *Tracer) Trace(blockReq *tracer.BlockRequest) (time.Duration, error) { } } + tr.stats.BlockW = blockReq.BlockW + tr.stats.BlockH = blockReq.BlockH tr.stats.RenderTime = time.Since(start) return tr.stats.RenderTime, nil } @@ -267,11 +269,5 @@ func (tr *Tracer) MergeOutput(other tracer.Tracer, blockReq *tracer.BlockRequest return 0, fmt.Errorf("merge failed: unsupported tracer instance") } - start := time.Now() - - // Each accumulator entry is 16 bytes long (float3 stored as float4) - dstOffset := int((blockReq.BlockY * blockReq.FrameW * 16) + (blockReq.BlockX * 16)) - bytes := int((blockReq.BlockW * blockReq.BlockH * 16)) - - return time.Since(start), tr.resources.buffers.Accumulator.CopyDataFrom(src.resources.buffers.Accumulator, dstOffset, dstOffset, bytes) + return tr.resources.AggregateAccumulator(src.resources.buffers.TraceAccumulator, blockReq) }