diff --git a/cmd/render.go b/cmd/render.go index 54fa11d..4f052ef 100644 --- a/cmd/render.go +++ b/cmd/render.go @@ -1,5 +1,18 @@ package cmd +import ( + "bytes" + "errors" + "fmt" + + "github.com/achilleasa/go-pathtrace/asset/scene/reader" + "github.com/achilleasa/go-pathtrace/renderer" + "github.com/achilleasa/go-pathtrace/tracer" + "github.com/achilleasa/go-pathtrace/tracer/opencl" + "github.com/olekukonko/tablewriter" + "github.com/urfave/cli" +) + const ( // Coefficients for converting delta cursor movements to yaw/pitch camera angles. mouseSensitivityX float32 = 0.005 @@ -9,6 +22,83 @@ const ( cameraMoveSpeed float32 = 0.05 ) +// Render a still frame. +func RenderFrame(ctx *cli.Context) error { + setupLogging(ctx) + + opts := renderer.Options{ + FrameW: uint32(ctx.Int("width")), + FrameH: uint32(ctx.Int("height")), + SamplesPerPixel: uint32(ctx.Int("spp")), + Exposure: float32(ctx.Float64("exposure")), + NumBounces: uint32(ctx.Int("num-bounces")), + MinBouncesForRR: uint32(ctx.Int("rr-bounces")), + // + BlackListedDevices: ctx.StringSlice("blacklist"), + ForcePrimaryDevice: ctx.String("force-primary"), + } + + if opts.MinBouncesForRR == 0 || opts.MinBouncesForRR >= opts.NumBounces { + logger.Notice("disabling RR for path elimination") + opts.MinBouncesForRR = opts.NumBounces + 1 + } + + // Load scene + if ctx.NArg() != 1 { + return errors.New("missing scene file argument") + } + + sc, err := reader.ReadScene(ctx.Args().First()) + if err != nil { + return err + } + + // Update projection matrix + sc.Camera.SetupProjection(float32(opts.FrameW) / float32(opts.FrameH)) + + // Setup tracing pipeline + pipeline := opencl.DefaultPipeline(opencl.NoDebug) + pipeline.PostProcess = append(pipeline.PostProcess, opencl.SaveFrameBuffer(ctx.String("out"))) + + // Create renderer + r, err := renderer.NewDefault(sc, tracer.NaiveScheduler(), pipeline, opts) + if err != nil { + return err + } + defer r.Close() + + _, err = r.Render(0) + if err != nil { + return err + } + + // Display stats + displayFrameStats(r.Stats()) + + return err +} + +func displayFrameStats(stats renderer.FrameStats) { + var buf bytes.Buffer + table := tablewriter.NewWriter(&buf) + table.SetAutoFormatHeaders(false) + table.SetAutoWrapText(false) + table.SetHeader([]string{"Device", "Primary", "Block height", "% of frame", "Render time"}) + for _, stat := range stats.Tracers { + table.Append([]string{ + stat.Id, + fmt.Sprintf("%t", stat.IsPrimary), + fmt.Sprintf("%d", stat.BlockH), + fmt.Sprintf("%02.1f %%", stat.FramePercent), + fmt.Sprintf("%s", stat.RenderTime), + }) + } + table.SetFooter([]string{"", "", "", "TOTAL", fmt.Sprintf("%s", stats.RenderTime)}) + + table.Render() + logger.Noticef("frame statistics\n%s", buf.String()) +} + /* // Return the available opencl devices after applying the blacklist filters. func filteredDeviceList(ctx *cli.Context) []opencl.Device { diff --git a/main.go b/main.go index d514a3f..925d935 100644 --- a/main.go +++ b/main.go @@ -8,6 +8,16 @@ import ( "github.com/urfave/cli" ) +var ( + sceneCompileHelp = ` +Parse a scene definition from a wavefront obj file, build a BVH tree to optimize +ray intersection tests and package scene assets in a GPU-friendly format. + +The optimized scene data is then written to a zip archive which can be supplied +as an argument to the render commands. +` +) + func main() { cli.VersionFlag = cli.BoolFlag{ Name: "version", @@ -30,16 +40,22 @@ func main() { } app.Commands = []cli.Command{ { - Name: "compile", - Usage: "compile text scene representation into a binary compressed format", - Description: ` -Parse a scene definition from a wavefront obj file, build a BVH tree to optimize -ray intersection tests and package scene elements in a GPU-friendly format. - -The optimized scene data is then written to a zip archive which can be supplied -as an argument to the render command.`, - ArgsUsage: "scene_file1.obj scene_file2.obj ...", - Action: cmd.CompileScene, + Name: "scene", + Subcommands: []cli.Command{ + { + Name: "compile", + Usage: "compile text scene representation into a binary compressed format", + Description: sceneCompileHelp, + ArgsUsage: "scene_file1.obj scene_file2.obj ...", + Action: cmd.CompileScene, + }, + { + Name: "info", + Usage: "print the size of the various compiled scene assets", + ArgsUsage: "scene_file.zip", + Action: cmd.ShowSceneInfo, + }, + }, }, { Name: "list-devices", @@ -55,15 +71,16 @@ as an argument to the render command.`, Name: "frame", Usage: "render single frame", Description: `Render a single frame.`, + ArgsUsage: "scene_file.zip or scene_file.obj", Flags: []cli.Flag{ cli.IntFlag{ Name: "width", - Value: 512, + Value: 1024, Usage: "frame width", }, cli.IntFlag{ Name: "height", - Value: 512, + Value: 1024, Usage: "frame height", }, cli.IntFlag{ @@ -71,9 +88,19 @@ as an argument to the render command.`, Value: 16, Usage: "samples per pixel", }, + cli.IntFlag{ + Name: "num-bounces, nb", + Value: 5, + Usage: "number of indirect ray bounces", + }, + cli.IntFlag{ + Name: "rr-bounces, nr", + Value: 3, + Usage: "number of indirect ray bounces before applying RR (disabled if 0 or >= than num-bounces)", + }, cli.Float64Flag{ Name: "exposure", - Value: 1.0, + Value: 1.2, Usage: "camera exposure for tone-mapping", }, cli.StringSliceFlag{ @@ -81,13 +108,18 @@ as an argument to the render command.`, Value: &cli.StringSlice{}, Usage: "blacklist opencl device whose names contain this value", }, + cli.StringFlag{ + Name: "force-primary", + Value: "", + Usage: "force a particular device name as the primary device", + }, cli.StringFlag{ Name: "out, o", Value: "frame.png", Usage: "image filename for the rendered frame", }, }, - // Action: cmd.RenderFrame, + Action: cmd.RenderFrame, }, { Name: "interactive", diff --git a/renderer/options.go b/renderer/options.go new file mode 100644 index 0000000..10ada9e --- /dev/null +++ b/renderer/options.go @@ -0,0 +1,23 @@ +package renderer + +type Options struct { + // Frame dims. + FrameW uint32 + FrameH uint32 + + // Number of indirect bounces. + NumBounces uint32 + + // Min bounces before applying russian roulette for path elimination. + MinBouncesForRR uint32 + + // Number of samples. + SamplesPerPixel uint32 + + // Exposure for tonemapping. + Exposure float32 + + // Device selection. + BlackListedDevices []string + ForcePrimaryDevice string +} diff --git a/renderer/renderer.go b/renderer/renderer.go index 880518f..1b561bf 100644 --- a/renderer/renderer.go +++ b/renderer/renderer.go @@ -1,194 +1,286 @@ package renderer -type SamplesPerPixel uint32 - -const ( - // Renderer should automatically select the appropriate SPP value. - AutoSamplesPerPixel SamplesPerPixel = iota +import ( + "fmt" + "math/rand" + "strings" + "sync" + "time" + + "github.com/achilleasa/go-pathtrace/asset/scene" + "github.com/achilleasa/go-pathtrace/log" + "github.com/achilleasa/go-pathtrace/tracer" + "github.com/achilleasa/go-pathtrace/tracer/opencl" + "github.com/achilleasa/go-pathtrace/tracer/opencl/device" ) -/* -type Renderer struct { - // A lock for synchronizing access to the framebuffer. - sync.Mutex - - // Renderer frame dims. - frameW uint32 - frameH uint32 +type Renderer interface { + // Render frame. + Render(accumulatedSamples uint32) (time.Duration, error) - // A block scheduler instance - scheduler tracer.BlockScheduler + // Shutdown renderer and any attached tracer. + Close() - // The scene to be rendered. - scene *scene.Scene + // Get render statistics. + Stats() FrameStats +} - // A buffered channel for receiving block completions. - tracerDoneChan chan uint32 +type defaultRenderer struct { + logger log.Logger - // A channel for receiving tracer errors. - tracerErrChan chan error + options Options - // This buffer contains the final composited frame. It is generated by - // the contents of the accumulation buffer after applying tone-mapping - // and gamma correction. It uses an image.RGBA as its backing store making it - // easy to display on screen on export to any format supported by the image package. - frameBuffer *image.RGBA + // Worker sync primitives + workerInitGroup sync.WaitGroup + workerCloseGroup sync.WaitGroup - // An accumulation buffer where each frame's output is blended with - // the previous frames' output. This enables progressive scene rendering. - accumBuffer []float32 + // The list of registered tracers. + tracers []tracer.Tracer + jobChans []chan tracer.BlockRequest + jobCompleteChan chan error - // Number of sequential frames rendered from the current camera location. - // This value is used to calculate a blend weight for adding the - // framebuffer contents into the accumulation buffer. - frameCount uint32 + // The selected primary tracer. + primary int - // The time it took to render last frame (in nanoseconds) - lastFrameTime int64 + // The scheduler for distributing blocks to the list of tracers. + scheduler tracer.BlockScheduler - // The list of attached tracers - tracers []tracer.Tracer + // Renderer statistics. + stats FrameStats +} - // SPP estimate based on last frame stats. - sppEstimate uint32 +// Create a new default renderer using the specified block scheduler and tracing pipeline. +func NewDefault(sc *scene.Scene, scheduler tracer.BlockScheduler, pipeline *opencl.Pipeline, opts Options) (Renderer, error) { + if sc == nil { + return nil, ErrSceneNotDefined + } else if sc.Camera == nil { + return nil, ErrCameraNotDefined + } - // The exposure parameter controls tone-mapping - Exposure float32 -} + r := &defaultRenderer{ + logger: log.New("renderer"), + scheduler: scheduler, + options: opts, + } -// Create a new renderer -func NewRenderer(frameW, frameH uint32, sc *scene.Scene) *Renderer { - return &Renderer{ - frameW: frameW, - frameH: frameH, - scheduler: tracer.NewPerfectScheduler(), - tracerDoneChan: make(chan uint32, frameH), - tracerErrChan: make(chan error, 0), - frameBuffer: image.NewRGBA(image.Rect(0, 0, int(frameW), int(frameH))), - accumBuffer: make([]float32, frameW*frameH*4), - frameCount: 0, - tracers: make([]tracer.Tracer, 0), - scene: sc, - sppEstimate: 1, - Exposure: 1, + err := r.initTracers(pipeline) + if err != nil { + return nil, err } + r.jobChans = make([]chan tracer.BlockRequest, len(r.tracers)) + r.jobCompleteChan = make(chan error, 0) + + // INit + + // Start workers + r.workerInitGroup.Add(len(r.tracers)) + r.workerCloseGroup.Add(len(r.tracers)) + for trIndex := 0; trIndex < len(r.tracers); trIndex++ { + // Queue state changes + r.tracers[trIndex].UpdateState(tracer.Synchronous, tracer.FrameDimensions, [2]uint32{opts.FrameW, opts.FrameH}) + r.tracers[trIndex].UpdateState(tracer.Synchronous, tracer.SceneData, sc) + r.tracers[trIndex].UpdateState(tracer.Synchronous, tracer.CameraData, sc.Camera) + + // Start worker + r.jobChans[trIndex] = make(chan tracer.BlockRequest, 0) + go r.jobWorker(trIndex) + } + + // wait for all workers to start + r.workerInitGroup.Wait() + + return r, nil } -// Shutdown and cleanup renderer and all connected tracers. This function will -// block if a frame is currently being rendered. -func (r *Renderer) Close() { - r.Lock() - defer r.Unlock() +// Get last frame stats. +func (r *defaultRenderer) Stats() FrameStats { + return r.stats +} - for _, tr := range r.tracers { - tr.Close() +// Shutdown renderer and any attached tracers. +func (r *defaultRenderer) Close() { + for _, ch := range r.jobChans { + close(ch) } - r.tracers = make([]tracer.Tracer, 0) + + r.workerCloseGroup.Wait() } -// Add a tracer to the renderer's tracer pool. -func (r *Renderer) AddTracer(tr tracer.Tracer) error { - err := tr.Setup(r.frameW, r.frameH, r.accumBuffer, r.frameBuffer.Pix) - if err != nil { - return err +// Render next frame. +func (r *defaultRenderer) Render(accumulatedSamples uint32) (time.Duration, error) { + var blockReq = tracer.BlockRequest{ + FrameW: r.options.FrameW, + FrameH: r.options.FrameH, + BlockW: r.options.FrameW, + SamplesPerPixel: r.options.SamplesPerPixel, + Exposure: r.options.Exposure, + NumBounces: r.options.NumBounces, + MinBouncesForRR: r.options.MinBouncesForRR, + AccumulatedSamples: accumulatedSamples, + Seed: rand.Uint32(), } - // sync scene with tracer - err = r.syncScene(tr) - if err != nil { - return err + start := time.Now() + + // Schedule blocks and process them in parallel + blockAssignments := r.scheduler.Schedule(r.tracers, blockReq.FrameH) + for trIndex, blockH := range blockAssignments { + blockReq.BlockH = blockH + r.jobChans[trIndex] <- blockReq + + r.stats.Tracers[trIndex].BlockH = blockH + r.stats.Tracers[trIndex].FramePercent = 100.0 * float32(blockH) / float32(blockReq.FrameH) + + blockReq.BlockY += blockH } - // Lock renderer so we can add the new tracer to the pool - r.Lock() - defer r.Unlock() + // Wait for all tracers to finish + pending := len(r.tracers) + for pending != 0 { + err, ok := <-r.jobCompleteChan + if !ok { + err = ErrInterrupted + } - r.tracers = append(r.tracers, tr) - return nil -} + if err != nil { + return time.Since(start), err + } + + pending-- + } + + // Run post-process filters on the primary tracer + blockReq.BlockY = 0 + blockReq.BlockH = blockReq.FrameH + r.tracers[r.primary].SyncFramebuffer(&blockReq) -// Synchronize scene changes with tracer and manually trigger a flush. -func (r *Renderer) syncScene(tr tracer.Tracer) error { - tr.AppendChange(tracer.UpdateCamera, r.scene.Camera) - tr.AppendChange(tracer.SetMaterials, r.scene.Materials) - tr.AppendChange(tracer.SetBvhNodes, r.scene.BvhNodes) - tr.AppendChange(tracer.SetPrimitivies, r.scene.Primitives) - tr.AppendChange(tracer.SetEmissiveLightIndices, r.scene.EmissivePrimitiveIndices) + r.stats.RenderTime = time.Since(start) - return tr.ApplyPendingChanges() + // Collect stats + for trIndex, tr := range r.tracers { + r.stats.Tracers[trIndex].RenderTime = tr.Stats().RenderTime + } + + return r.stats.RenderTime, nil } -// Render frame. This method splits the screen into blocks and distributes them -// to all available tracers. Once the blocks have been successfully rendered -// it composes the results into an RGBA image. Callers must never modify the -// returned RGBA image. -func (r *Renderer) Render(spp SamplesPerPixel) (*image.RGBA, error) { - r.Lock() - defer r.Unlock() +// A tracing job processor. +func (r *defaultRenderer) jobWorker(trIndex int) { + r.workerInitGroup.Done() + defer func() { + r.tracers[trIndex].Close() + r.workerCloseGroup.Done() + }() - startTime := time.Now() + for { + select { + case blockReq, ok := <-r.jobChans[trIndex]: + if !ok { + return + } - if r.scene == nil { - return nil, ErrSceneNotDefined + _, err := r.tracers[trIndex].Trace(&blockReq) + if err == nil && trIndex != r.primary { + // Merge accumulator output with primary tracer + _, err = r.tracers[r.primary].MergeOutput(r.tracers[trIndex], &blockReq) + } + r.jobCompleteChan <- err + } } - if r.scene.Camera == nil { - return nil, ErrCameraNotDefined +} + +// Select and initialize opencl devices excluding the ones which match the blacklist entries. +func (r *defaultRenderer) initTracers(pipeline *opencl.Pipeline) error { + if len(r.options.BlackListedDevices) != 0 { + r.logger.Infof("blacklisted devices: %s", strings.Join(r.options.BlackListedDevices, ", ")) } - if len(r.tracers) == 0 { - return nil, ErrNoTracers + + platforms, err := device.GetPlatformInfo() + if err != nil { + return err } - // Update block assignments - blockAssignment := r.scheduler.Schedule(r.tracers, r.frameH, r.lastFrameTime) + selectedDevices := make([]*device.Device, 0) + for _, platformInfo := range platforms { + for _, device := range platformInfo.Devices { + keep := true + for _, text := range r.options.BlackListedDevices { + if text != "" && strings.Contains(device.Name, text) { + keep = false + break + } + } - // Setup common block request values - var blockReq tracer.BlockRequest - blockReq.FrameCount = r.frameCount + 1 - blockReq.DoneChan = r.tracerDoneChan - blockReq.ErrChan = r.tracerErrChan - if spp == AutoSamplesPerPixel { - blockReq.SamplesPerPixel = r.sppEstimate - } else { - blockReq.SamplesPerPixel = uint32(spp) + if keep { + selectedDevices = append(selectedDevices, device) + } + } } - blockReq.Exposure = r.Exposure - blockReq.Seed = rand.Uint32() - // Enqueue work units - var pendingRows uint32 = 0 - for idx, tr := range r.tracers { - blockReq.BlockY = pendingRows - blockReq.BlockH = blockAssignment[idx] - tr.Enqueue(blockReq) + // Create shared context for seleected devices + sharedCtx, err := device.NewSharedContext(selectedDevices) + if err != nil { + return err + } - pendingRows += blockReq.BlockH + // Initialize all tracers using the shared context + r.tracers = make([]tracer.Tracer, 0) + r.stats.Tracers = make([]TracerStat, 0) + r.primary = -1 + + for _, device := range selectedDevices { + // Create and initialize tracer + tr, err := opencl.NewTracer( + fmt.Sprintf("%s (%d)", device.Name, len(r.tracers)), + device, + sharedCtx, + pipeline, + ) + if err == nil { + err = tr.Init() + } + + if err != nil { + r.logger.Warningf("could not init device %q: %v", device.Name, err) + continue + } + + // If no error occured add to list + r.logger.Noticef("using device %q", tr.Id()) + r.tracers = append(r.tracers, tr) + + if r.options.ForcePrimaryDevice != "" && strings.Contains(device.Name, r.options.ForcePrimaryDevice) { + r.primary = len(r.tracers) - 1 + } + + // Init statistics + r.stats.Tracers = append(r.stats.Tracers, TracerStat{ + Id: tr.Id(), + }) } - // Wait for all rows to be completed - for { - select { - case completedRows := <-r.tracerDoneChan: - pendingRows -= completedRows - if pendingRows == 0 { - r.frameCount++ - r.lastFrameTime = time.Since(startTime).Nanoseconds() - return r.frameBuffer, nil + if len(r.tracers) == 0 { + return ErrNoTracers + } + + // If no primary tracer selected, pick the GPU with max estimated speed + if r.primary == -1 { + var bestSpeed uint32 = 0 + for trIndex, tr := range r.tracers { + if ((tr.Flags() & tracer.CpuDevice) == 0) && tr.Speed() > bestSpeed { + bestSpeed = tr.Speed() + r.primary = trIndex } - case err := <-r.tracerErrChan: - return nil, err } } -} -// Sync all tracers with the updated camera position. -func (r *Renderer) UpdateCamera() { - r.Lock() - defer r.Unlock() + // If we still haven't found a primary device just select the first available + if r.primary == -1 { + r.primary = 0 + } - // This is equivalent to clearing the accumulation buffer - r.frameCount = 0 + r.stats.Tracers[r.primary].IsPrimary = true + r.logger.Noticef("selected %q as primary device", r.tracers[r.primary].Id()) - for _, tr := range r.tracers { - tr.AppendChange(tracer.UpdateCamera, r.scene.Camera) - } -}*/ + return nil +} diff --git a/renderer/stats.go b/renderer/stats.go new file mode 100644 index 0000000..8a2e234 --- /dev/null +++ b/renderer/stats.go @@ -0,0 +1,26 @@ +package renderer + +import "time" + +type TracerStat struct { + // The tracer id. + Id string + + // True if this is the primary tracer + IsPrimary bool + + // The block height and the percentage of total frame area it represents. + BlockH uint32 + FramePercent float32 + + // Render time for assigned block + RenderTime time.Duration +} + +type FrameStats struct { + // Individual tracer stats. + Tracers []TracerStat + + // Total render time for entire frame. + RenderTime time.Duration +}