Skip to content

Commit

Permalink
Correctly merge tracer output when using dynamic schedulers
Browse files Browse the repository at this point in the history
Due to the way that schedulers work, one tracer may be responsible for a
particular frame row in one frame and a different tracer may be assigned
to it on the following frame. This causes a problem as we need to
properly aggregate the tracer outputs to ensure that our post-processing
pipeline always operates on the same number of aggregated samples
(for example, averaging them would otherwise generate inconsistent
output).

To avoid such problems, the primary tracer maintains two accumulators, a
trace accumulator which is reset on each trace request and a frame
accumulator that accumulates (via an add operation) the assigned rows
from each tracer. Post-processing pipelines operate on the primary
tracer's frame accumulator.
  • Loading branch information
Achilleas Anagnostopoulos committed Sep 16, 2016
1 parent d9fd3ff commit f2075ec
Show file tree
Hide file tree
Showing 8 changed files with 106 additions and 37 deletions.
6 changes: 4 additions & 2 deletions cmd/render.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,14 +132,16 @@ func RenderInteractive(ctx *cli.Context) error {
return err
}

// Update projection matrix
// Due to the way that gl.TexSubImage2D works we need to
// generate a mirrored image of the frame buffer.
sc.Camera.InvertY = true
sc.Camera.SetupProjection(float32(opts.FrameW) / float32(opts.FrameH))

// Setup tracing pipeline
pipeline := opencl.DefaultPipeline(opencl.NoDebug)

// Create renderer
r, err := renderer.NewInteractive(sc, tracer.NaiveScheduler(), pipeline, opts)
r, err := renderer.NewInteractive(sc, tracer.PerfectScheduler(), pipeline, opts)
if err != nil {
return err
}
Expand Down
12 changes: 10 additions & 2 deletions renderer/default.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,14 @@ func (r *defaultRenderer) renderFrame(accumulatedSamples uint32) error {
blockReq.BlockY += blockH
}

var tot uint32 = 0
for _, bh := range r.blockAssignments {
tot += bh
}
if tot != r.options.FrameH {
fmt.Printf("S(assigned blocks) = %d != %d\n", tot, r.options.FrameH)
}

// Wait for all tracers to finish
pending := len(r.tracers)
for pending != 0 {
Expand Down Expand Up @@ -181,8 +189,8 @@ func (r *defaultRenderer) jobWorker(trIndex int) {
}

_, err := r.tracers[trIndex].Trace(&blockReq)
if err == nil && trIndex != r.primary {
// Merge accumulator output with primary tracer
if err == nil {
// Merge trace accumulator output for this pass with primary tracer's frame accumulator
_, err = r.tracers[r.primary].MergeOutput(r.tracers[trIndex], &blockReq)
}
r.jobCompleteChan <- err
Expand Down
15 changes: 12 additions & 3 deletions tracer/opencl/CL/kernels/accumulator.cl
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,19 @@

// Clear accumulation buffer
__kernel void clearAccumulator(
__global float3 *accumulator,
const uint frameW
__global float3 *accumulator
){
accumulator[(get_global_id(1) * frameW) + get_global_id(0)] = (float3)(0.0f, 0.0f, 0.0f);
accumulator[get_global_id(0)] = (float3)(0.0f, 0.0f, 0.0f);
}


// Aggregate trace accumulator to the primary tracer's frame accumulator
__kernel void aggregateAccumulator(
__global float3 *srcAccumulator,
__global float3 *dstAccumulator
){
int globalId = get_global_id(0);
dstAccumulator[globalId] += srcAccumulator[globalId];
}

#endif
30 changes: 22 additions & 8 deletions tracer/opencl/buffers.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,16 @@ type bufferSet struct {
HitFlags *device.Buffer
Intersections *device.Buffer

Accumulator *device.Buffer
// A buffer that stores trace samples for a single trace request. It is
// cleared before starting a new trace.
TraceAccumulator *device.Buffer

// A buffer that aggregates the trace accumulator content between
// multiple frames. All post-processing pipeline stages operate on
// this buffer. The buffer is cleared when the pipeline Reset stage
// is executed.
FrameAccumulator *device.Buffer

EmissiveSamples *device.Buffer
DebugOutput *device.Buffer

Expand Down Expand Up @@ -82,12 +91,13 @@ func newBufferSet(dev *device.Device) *bufferSet {
dev.Buffer("rays1"),
dev.Buffer("rays2"),
},
Paths: dev.Buffer("paths"),
HitFlags: dev.Buffer("hitFlags"),
Intersections: dev.Buffer("intersections"),
EmissiveSamples: dev.Buffer("emissiveSamples"),
Accumulator: dev.Buffer("accumulator"),
DebugOutput: dev.Buffer("debugOutput"),
Paths: dev.Buffer("paths"),
HitFlags: dev.Buffer("hitFlags"),
Intersections: dev.Buffer("intersections"),
EmissiveSamples: dev.Buffer("emissiveSamples"),
TraceAccumulator: dev.Buffer("traceAccumulator"),
FrameAccumulator: dev.Buffer("frameAccumulator"),
DebugOutput: dev.Buffer("debugOutput"),
RayCounters: [3]*device.Buffer{
dev.Buffer("numRays0"),
dev.Buffer("numRays1"),
Expand Down Expand Up @@ -144,7 +154,11 @@ func (bs *bufferSet) Resize(frameW, frameH uint32) error {
if err != nil {
return err
}
err = bs.Accumulator.Allocate(int(pixels*sizeofAccumulatorSample), cl.MEM_READ_WRITE)
err = bs.TraceAccumulator.Allocate(int(pixels*sizeofAccumulatorSample), cl.MEM_READ_WRITE)
if err != nil {
return err
}
err = bs.FrameAccumulator.Allocate(int(pixels*sizeofAccumulatorSample), cl.MEM_READ_WRITE)
if err != nil {
return err
}
Expand Down
7 changes: 5 additions & 2 deletions tracer/opencl/kernel_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ const (
accumulateEmissiveSamples
// hdr kernels
tonemapSimpleReinhard
// utils
// accumulator
clearAccumulator
// Debugging
aggregateAccumulator
// debugging
debugClearBuffer
debugRayIntersectionDepth
debugRayIntersectionNormals
Expand Down Expand Up @@ -55,6 +56,8 @@ func (kt kernelType) String() string {
return "tonemapSimpleReinhard"
case clearAccumulator:
return "clearAccumulator"
case aggregateAccumulator:
return "aggregateAccumulator"
case debugClearBuffer:
return "debugClearBuffer"
case debugRayIntersectionDepth:
Expand Down
9 changes: 7 additions & 2 deletions tracer/opencl/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,10 @@ func DefaultPipeline(debugFlags DebugFlag) *Pipeline {
return pipeline
}

// Clear the accumulator buffer.
// Clear the frame accumulator buffer.
func ClearAccumulator() PipelineStage {
return func(tr *Tracer, blockReq *tracer.BlockRequest) (time.Duration, error) {
return tr.resources.ClearAccumulator(blockReq)
return tr.resources.ClearFrameAccumulator(blockReq)
}
}

Expand Down Expand Up @@ -101,6 +101,11 @@ func MonteCarloIntegrator(debugFlags DebugFlag) PipelineStage {

var activeRayBuf uint32 = 0

_, err = tr.resources.ClearTraceAccumulator(blockReq)
if err != nil {
return time.Since(start), err
}

// Intersect primary rays outside of the loop
// Use packet query intersector for GPUs as opencl forces CPU
// to use a local workgroup size equal to 1
Expand Down
54 changes: 43 additions & 11 deletions tracer/opencl/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,18 +77,50 @@ func (dr *deviceResources) Close() {
}
}

// Clear a rectangular region of the output accumulator.
func (dr *deviceResources) ClearAccumulator(blockReq *tracer.BlockRequest) (time.Duration, error) {
// Clear the frame accumulator.
func (dr *deviceResources) ClearFrameAccumulator(blockReq *tracer.BlockRequest) (time.Duration, error) {
kernel := dr.kernels[clearAccumulator]
err := kernel.SetArgs(
dr.buffers.Accumulator,
blockReq.BlockW,
dr.buffers.FrameAccumulator,
)
if err != nil {
return 0, err
}

return kernel.Exec2D(0, int(blockReq.BlockY), int(blockReq.BlockW), int(blockReq.BlockH), 0, 0)
return kernel.Exec1D(0, int(blockReq.FrameW*blockReq.FrameH), 0)
}

// Clear the trace accumulator.
func (dr *deviceResources) ClearTraceAccumulator(blockReq *tracer.BlockRequest) (time.Duration, error) {
kernel := dr.kernels[clearAccumulator]
err := kernel.SetArgs(
dr.buffers.TraceAccumulator,
)
if err != nil {
return 0, err
}

return kernel.Exec1D(0, int(blockReq.FrameW*blockReq.FrameH), 0)
}

// Aggregate the trace accumulator contents from another tracer into
// this tracer's frame accumulator.
func (dr *deviceResources) AggregateAccumulator(srcAccumulator *device.Buffer, blockReq *tracer.BlockRequest) (time.Duration, error) {
kernel := dr.kernels[aggregateAccumulator]
err := kernel.SetArgs(
srcAccumulator,
dr.buffers.FrameAccumulator,
)
if err != nil {
return 0, err
}

// Add the contents of block specified by blockReq
return kernel.Exec1D(
int(blockReq.FrameW*blockReq.BlockY),
int(blockReq.BlockW*blockReq.BlockH),
0,
)
}

// Generate primary rays.
Expand Down Expand Up @@ -231,7 +263,7 @@ func (dr *deviceResources) ShadeHits(bounce, minBouncesForRR, randSeed, numEmiss
dr.buffers.Rays[1-rayBufferIndex],
dr.buffers.RayCounters[1-rayBufferIndex],
//
dr.buffers.Accumulator,
dr.buffers.TraceAccumulator,
)
if err != nil {
return 0, err
Expand All @@ -255,7 +287,7 @@ func (dr *deviceResources) ShadePrimaryRayMisses(diffuseMatNodeIndex, rayBufferI
diffuseMatNodeIndex,
dr.buffers.TextureMetadata,
dr.buffers.Textures,
dr.buffers.Accumulator,
dr.buffers.TraceAccumulator,
)
if err != nil {
return 0, err
Expand All @@ -279,7 +311,7 @@ func (dr *deviceResources) ShadeIndirectRayMisses(diffuseMatNodeIndex, rayBuffer
diffuseMatNodeIndex,
dr.buffers.TextureMetadata,
dr.buffers.Textures,
dr.buffers.Accumulator,
dr.buffers.TraceAccumulator,
)
if err != nil {
return 0, err
Expand All @@ -299,7 +331,7 @@ func (dr *deviceResources) AccumulateEmissiveSamples(rayBufferIndex uint32, numP
dr.buffers.Paths,
dr.buffers.HitFlags,
dr.buffers.EmissiveSamples,
dr.buffers.Accumulator,
dr.buffers.TraceAccumulator,
)
if err != nil {
return 0, err
Expand All @@ -314,7 +346,7 @@ func (dr *deviceResources) TonemapSimpleReinhard(blockReq *tracer.BlockRequest)
numPixels := int(blockReq.FrameW * blockReq.BlockH)
sampleWeight := float32(1.0 / float32(blockReq.AccumulatedSamples+blockReq.SamplesPerPixel))
err := kernel.SetArgs(
dr.buffers.Accumulator,
dr.buffers.FrameAccumulator,
dr.buffers.Paths,
dr.buffers.FrameBuffer,
sampleWeight,
Expand Down Expand Up @@ -480,7 +512,7 @@ func (dr *deviceResources) DebugAccumulator(blockReq *tracer.BlockRequest) (time
err = kernel.SetArgs(
sampleWeight,
dr.buffers.Paths,
dr.buffers.Accumulator,
dr.buffers.TraceAccumulator,
dr.buffers.DebugOutput,
)
if err != nil {
Expand Down
10 changes: 3 additions & 7 deletions tracer/opencl/tracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,8 @@ func (tr *Tracer) Trace(blockReq *tracer.BlockRequest) (time.Duration, error) {
}
}

tr.stats.BlockW = blockReq.BlockW
tr.stats.BlockH = blockReq.BlockH
tr.stats.RenderTime = time.Since(start)
return tr.stats.RenderTime, nil
}
Expand Down Expand Up @@ -267,11 +269,5 @@ func (tr *Tracer) MergeOutput(other tracer.Tracer, blockReq *tracer.BlockRequest
return 0, fmt.Errorf("merge failed: unsupported tracer instance")
}

start := time.Now()

// Each accumulator entry is 16 bytes long (float3 stored as float4)
dstOffset := int((blockReq.BlockY * blockReq.FrameW * 16) + (blockReq.BlockX * 16))
bytes := int((blockReq.BlockW * blockReq.BlockH * 16))

return time.Since(start), tr.resources.buffers.Accumulator.CopyDataFrom(src.resources.buffers.Accumulator, dstOffset, dstOffset, bytes)
return tr.resources.AggregateAccumulator(src.resources.buffers.TraceAccumulator, blockReq)
}

0 comments on commit f2075ec

Please sign in to comment.