From 163bb6fdc1445f6832a3a4fb7b1be5b842efb2df Mon Sep 17 00:00:00 2001 From: Mike Smith Date: Sun, 2 Jun 2024 16:03:52 +0800 Subject: [PATCH] remove util/u64.h and use luisa::ulong instead (#29) --- src/integrators/mega_vpt.cpp | 107 +++++++++---------- src/integrators/mega_vpt_naive.cpp | 83 +++++++-------- src/integrators/pssmlt.cpp | 85 ++++++++------- src/samplers/sobol.cpp | 73 +++++++------ src/samplers/zsobol.cpp | 83 ++++++++------- src/util/CMakeLists.txt | 1 - src/util/rng.cpp | 20 ++-- src/util/rng.h | 12 +-- src/util/u64.cpp | 15 --- src/util/u64.h | 162 ----------------------------- 10 files changed, 234 insertions(+), 407 deletions(-) delete mode 100644 src/util/u64.cpp delete mode 100644 src/util/u64.h diff --git a/src/integrators/mega_vpt.cpp b/src/integrators/mega_vpt.cpp index ad941dfa..f04a2187 100644 --- a/src/integrators/mega_vpt.cpp +++ b/src/integrators/mega_vpt.cpp @@ -54,7 +54,7 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: [[nodiscard]] UInt event(const SampledWavelengths &swl, luisa::shared_ptr it, Expr time, Expr wo, Expr wi) const noexcept { Float3 wo_local, wi_local; - $if(it->shape().has_surface()) { + $if (it->shape().has_surface()) { PolymorphicCall call; pipeline().surfaces().dispatch(it->shape().surface_tag(), [&](auto surface) noexcept { surface->closure(call, *it, swl, wo, 1.f, time); @@ -109,13 +109,13 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: auto ray = camera_ray; // TODO: bug in initialization of medium tracker where the angle between shared edge is small auto depth_track = def(0u); - $while(true) { + $while (true) { auto it = pipeline().geometry()->intersect(ray); - $if(!it->valid()) { $break; }; + $if (!it->valid()) { $break; }; device_log("depth={}", depth_track + 1u); - $if(it->shape().has_medium()) { + $if (it->shape().has_medium()) { auto surface_tag = it->shape().surface_tag(); auto medium_tag = it->shape().medium_tag(); @@ -130,13 +130,13 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: pipeline().surfaces().dispatch(surface_tag, [&](auto surface) { device_log("surface event={}", surface_event); // update medium tracker - $switch(surface_event) { - $case(Surface::event_enter) { + $switch (surface_event) { + $case (Surface::event_enter) { medium_tracker.enter(medium_priority, medium_info); device_log("enter: priority={}, medium_tag={}", medium_priority, medium_tag); }; - $case(Surface::event_exit) { - $if(medium_tracker.exist(medium_priority, medium_info)) { + $case (Surface::event_exit) { + $if (medium_tracker.exist(medium_priority, medium_info)) { medium_tracker.exit(medium_priority, medium_info); device_log("exit exist: priority={}, medium_tag={}", medium_priority, medium_tag); } @@ -165,11 +165,11 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: auto eta_scale = def(1.f); auto depth = def(0u); auto max_depth = node()->max_depth(); - $while(depth < max_depth) { + $while (depth < max_depth) { auto eta = def(1.f); auto u_rr = def(0.f); Bool scattered = def(false); - $if(depth + 1u >= rr_depth) { u_rr = sampler()->generate_1d(); }; + $if (depth + 1u >= rr_depth) { u_rr = sampler()->generate_1d(); }; // trace auto it = pipeline().geometry()->intersect(ray); @@ -177,18 +177,19 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: device_log("depth={}", depth + 1u); device_log("before: medium tracker size={}, priority={}, tag={}", - medium_tracker.size(), medium_tracker.current().priority, medium_tracker.current().medium_tag); + medium_tracker.size(), medium_tracker.current().priority, medium_tracker.current().medium_tag); device_log("it->p(): ({}, {}, {})", it->p().x, it->p().y, it->p().z); // sample the participating medium - $if(!medium_tracker.vacuum()) { + $if (!medium_tracker.vacuum()) { // Sample the participating medium auto t_max = ite(it->valid(), length(it->p() - ray->origin()), Interaction::default_t_max); // Initialize RNG for sampling the majorant transmittance - auto hash0 = U64(as(sampler()->generate_2d())); - auto hash1 = U64(as(sampler()->generate_2d())); - PCG32 rng(hash0, hash1); + auto u1 = as(sampler()->generate_2d()); + auto u2 = as(sampler()->generate_2d()); + PCG32 rng((cast(u1.x) << 32ull) | cast(u1.y), + (cast(u2.x) << 32ull) | cast(u2.y)); // Sample medium using delta tracking Float u = sampler()->generate_1d(); @@ -208,13 +209,13 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: Bool ans = def(true); // Handle medium scattering event for ray - $if(beta.all([](auto b) noexcept { return b <= 0.f; })) { + $if (beta.all([](auto b) noexcept { return b <= 0.f; })) { terminated = true; ans = false; } $else { // Add emission from medium scattering event - $if((depth < max_depth) & !closure_p->le().is_zero()) { + $if ((depth < max_depth) & !closure_p->le().is_zero()) { // Compute beta' at new path vertex Float pdf = sigma_maj[0u] * T_maj[0u]; SampledSpectrum betap = beta * T_maj / pdf; @@ -236,18 +237,18 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: Float um = rng.uniform_float(); UInt medium_event = Medium::sample_event(pAbsorb, pScatter, pNull, um); // don't use switch-case here, because of local variable definition - $if(medium_event == Medium::event_absorb) { + $if (medium_event == Medium::event_absorb) { device_log("Absorb"); // Handle absorption along ray path terminated = true; ans = false; } - $elif(medium_event == Medium::event_scatter) { + $elif (medium_event == Medium::event_scatter) { device_log("Scatter"); // Handle scattering along ray path // Stop path sampling if maximum depth has been reached depth += 1u; - $if(depth >= max_depth) { + $if (depth >= max_depth) { terminated = true; ans = false; } @@ -258,7 +259,7 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: r_u *= T_maj * closure_p->sigma_s() / pdf; Bool Ld_medium_zero = def(false); - $if(!beta.is_zero() & !r_u.is_zero()) { + $if (!beta.is_zero() & !r_u.is_zero()) { // Sample direct lighting at volume scattering event // generate uniform samples auto u_light_selection = sampler()->generate_1d(); @@ -270,7 +271,7 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: light_it, u_light_selection, u_light_surface, swl, time); // direct lighting - $if(light_sample.eval.pdf > 0.0f) { + $if (light_sample.eval.pdf > 0.0f) { auto wo = closure->ray()->direction(); auto wi = light_sample.shadow_ray->direction(); @@ -279,9 +280,9 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: // PCG32 rng(U64(make_uint2(xxhash32(light_ray.origin()), xxhash32(light_ray.direction())))); - $while(any(light_ray->direction() != 0.f)) { + $while (any(light_ray->direction() != 0.f)) { auto si = pipeline().geometry()->intersect(light_ray); - $if(si->valid() & si->shape().has_surface()) { + $if (si->valid() & si->shape().has_surface()) { Ld_medium_zero = true; $break; }; @@ -314,17 +315,17 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: r_u *= T_maj / T_maj[0u]; // Generate next ray segment or return final transmittance - $if(!T_ray.is_zero()) { + $if (!T_ray.is_zero()) { Ld_medium_zero = true; $break; }; - $if(!si->valid()) { + $if (!si->valid()) { $break; }; light_ray = si->spawn_ray_to(light_sample.shadow_ray->origin()); }; - $if(!Ld_medium_zero) { + $if (!Ld_medium_zero) { auto phase_function = closure->phase_function(); auto f_hat = phase_function->p(wo, wi); auto scatter_pdf = phase_function->pdf(wo, wi); @@ -339,7 +340,7 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: // Sample new direction at real scattering event Float2 u = sampler()->generate_2d(); auto ps = closure->phase_function()->sample_p(-ray->direction(), u); - $if(!ps.valid | (ps.pdf == 0.f)) { + $if (!ps.valid | (ps.pdf == 0.f)) { terminated = true; } $else { @@ -357,13 +358,13 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: ans = false; }; } - $elif(medium_event == Medium::event_null) { + $elif (medium_event == Medium::event_null) { device_log("Null"); // Handle null scattering along ray path SampledSpectrum sigma_n = max(sigma_maj - closure_p->sigma_a() - closure_p->sigma_s(), 0.f); Float pdf = T_maj[0u] * sigma_n[0u]; beta *= T_maj * sigma_n / pdf; - $if(pdf == 0.f) { + $if (pdf == 0.f) { beta = 0.f; }; r_u *= T_maj * sigma_n / pdf; @@ -375,11 +376,11 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: }); // Handle terminated, scattered, and unscattered medium rays - $if(terminated | beta.all(le_zero) | r_u.all(le_zero)) { + $if (terminated | beta.all(le_zero) | r_u.all(le_zero)) { // Terminate path sampling if ray has been terminated $break; }; - $if(scattered) { + $if (scattered) { $continue; }; @@ -395,10 +396,10 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: // sample the surface // miss, environment light - $if(!it->valid()) { + $if (!it->valid()) { if (pipeline().environment()) { auto eval = light_sampler()->evaluate_miss(ray->direction(), swl, time); - $if(depth == 0u) { + $if (depth == 0u) { Li += beta * eval.L / r_u.average(); } $else { @@ -411,9 +412,9 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: // hit light if (!pipeline().lights().empty()) { - $if(it->shape().has_light()) { + $if (it->shape().has_light()) { auto eval = light_sampler()->evaluate_hit(*it, ray->origin(), swl, time); - $if(depth == 0u) { + $if (depth == 0u) { Li += beta * eval.L / r_u.average(); } $else { @@ -424,7 +425,7 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: } // hit ordinary surface - $if(!it->shape().has_surface()) { + $if (!it->shape().has_surface()) { // TODO: if shape has no surface, we cannot get the right normal direction // so we cannot deal with medium tracker correctly (enter/exit) ray = it->spawn_ray(ray->direction()); @@ -451,7 +452,7 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: auto closure = medium->closure(ray, swl, time); eta_next = closure->eta(); }); - $if(has_medium) { + $if (has_medium) { pipeline().media().dispatch(medium_tag, [&](auto medium) { medium_priority = medium->priority(); auto closure = medium->closure(ray, swl, time); @@ -473,18 +474,18 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: call.execute([&](auto closure) noexcept { // apply opacity map UInt surface_event; - $if(medium_tag != medium_tracker.current().medium_tag) { + $if (medium_tag != medium_tracker.current().medium_tag) { surface_event = surface_event_skip; ray = it->spawn_ray(ray->direction()); pdf_bsdf = 1e16f; } $else { if (auto dispersive = closure->is_dispersive()) { - $if(*dispersive) { swl.terminate_secondary(); }; + $if (*dispersive) { swl.terminate_secondary(); }; } // direct lighting // TODO: add medium to direct lighting - $if(light_sample.eval.pdf > 0.0f & !occluded) { + $if (light_sample.eval.pdf > 0.0f & !occluded) { auto wi = light_sample.shadow_ray->direction(); auto eval = closure->evaluate(wo, wi); auto w = balance_heuristic(light_sample.eval.pdf, eval.pdf) / @@ -501,24 +502,24 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: beta *= w * surface_sample.eval.f; r_l = r_u * w; // apply eta scale & update medium tracker - $if(has_medium) { - $switch(surface_event) { - $case(Surface::event_enter) { + $if (has_medium) { + $switch (surface_event) { + $case (Surface::event_enter) { eta_scale = sqr(eta_next / eta); }; - $case(Surface::event_exit) { + $case (Surface::event_exit) { eta_scale = sqr(eta / eta_next); }; }; }; }; - $if(has_medium) { - $switch(surface_event) { - $case(Surface::event_enter) { + $if (has_medium) { + $switch (surface_event) { + $case (Surface::event_enter) { medium_tracker.enter(medium_priority, medium_info); }; - $case(Surface::event_exit) { + $case (Surface::event_exit) { medium_tracker.exit(medium_priority, medium_info); }; }; @@ -527,12 +528,12 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: }; beta = zero_if_any_nan(beta); - $if(beta.all(le_zero)) { $break; }; + $if (beta.all(le_zero)) { $break; }; // rr auto rr_threshold = node()->rr_threshold(); auto q = max(beta.max() * eta_scale, .05f); - $if(depth + 1u >= rr_depth) { - $if(q < rr_threshold & u_rr >= q) { $break; }; + $if (depth + 1u >= rr_depth) { + $if (q < rr_threshold & u_rr >= q) { $break; }; beta *= ite(q < rr_threshold, 1.0f / q, 1.f); }; depth += 1u; @@ -541,7 +542,7 @@ class MegakernelVolumePathTracingInstance final : public ProgressiveIntegrator:: "scattered={}, beta=({}, {}, {}), pdf_bsdf={}, Li: ({}, {}, {})", scattered, beta[0u], beta[1u], beta[2u], pdf_bsdf, Li[0u], Li[1u], Li[2u]); device_log("after: medium tracker size={}, priority={}, tag={}", - medium_tracker.size(), medium_tracker.current().priority, medium_tracker.current().medium_tag); + medium_tracker.size(), medium_tracker.current().priority, medium_tracker.current().medium_tag); }; return spectrum->srgb(swl, Li); } diff --git a/src/integrators/mega_vpt_naive.cpp b/src/integrators/mega_vpt_naive.cpp index c8c17d45..7978d1e2 100644 --- a/src/integrators/mega_vpt_naive.cpp +++ b/src/integrators/mega_vpt_naive.cpp @@ -69,7 +69,7 @@ class MegakernelVolumePathTracingNaiveInstance final : public ProgressiveIntegra [[nodiscard]] UInt _event(const SampledWavelengths &swl, luisa::shared_ptr it, Expr time, Expr wo, Expr wi) const noexcept { Float3 wo_local, wi_local; - $if(it->shape().has_surface()) { + $if (it->shape().has_surface()) { PolymorphicCall call; pipeline().surfaces().dispatch(it->shape().surface_tag(), [&](auto surface) noexcept { surface->closure(call, *it, swl, wo, 1.f, time); @@ -105,11 +105,11 @@ class MegakernelVolumePathTracingNaiveInstance final : public ProgressiveIntegra transmittance.pdf = 0.f; // trace shadow ray - $while(any(transmittance.f > 0.f)) { + $while (any(transmittance.f > 0.f)) { auto it = pipeline().geometry()->intersect(ray); // end tracing - $if(!it->valid()) { $break; }; + $if (!it->valid()) { $break; }; auto t2surface = length(it->p() - ray->origin()); auto has_medium = it->shape().has_medium(); @@ -120,7 +120,7 @@ class MegakernelVolumePathTracingNaiveInstance final : public ProgressiveIntegra auto surface_event = _event(swl, it, time, wo, wi); // transmittance through medium - $if(!medium_tracker.vacuum()) { + $if (!medium_tracker.vacuum()) { pipeline().media().dispatch(medium_tracker.current().medium_tag, [&](auto medium) { medium_priority = medium->priority(); auto closure = medium->closure(ray, swl, time); @@ -131,12 +131,12 @@ class MegakernelVolumePathTracingNaiveInstance final : public ProgressiveIntegra }; // update medium tracker - $if(has_medium) { + $if (has_medium) { pipeline().media().dispatch(medium_tag, [&](auto medium) { medium_priority = medium->priority(); }); auto medium_info = make_medium_info(medium_priority, medium_tag); - $if(surface_event == Surface::event_exit) { + $if (surface_event == Surface::event_exit) { medium_tracker.exit(medium_priority, medium_info); } $else { @@ -145,7 +145,7 @@ class MegakernelVolumePathTracingNaiveInstance final : public ProgressiveIntegra }; // hit solid/transmissive surface - $if(it->shape().has_surface()) { + $if (it->shape().has_surface()) { auto surface_tag = it->shape().surface_tag(); PolymorphicCall call; pipeline().surfaces().dispatch(surface_tag, [&](auto surface) noexcept { @@ -182,7 +182,8 @@ class MegakernelVolumePathTracingNaiveInstance final : public ProgressiveIntegra MediumTracker medium_tracker; // Initialize RNG for sampling the majorant transmittance - PCG32 rng(U64(as(sampler()->generate_2d()))); + auto u = as(sampler()->generate_2d()); + PCG32 rng{(cast(u.x) << 32ull) | cast(u.y)}; // initialize medium tracker auto env_medium_tag = pipeline().environment_medium_tag(); @@ -194,13 +195,13 @@ class MegakernelVolumePathTracingNaiveInstance final : public ProgressiveIntegra #ifdef VPT_NAIVE_ENABLE_MEDIUM_STACK_INIT // TODO: bug in initialization of medium tracker where the angle between shared edge is small auto depth_track = def(0u); - $while(true) { + $while (true) { auto it = pipeline().geometry()->intersect(ray); - $if(!it->valid()) { $break; }; + $if (!it->valid()) { $break; }; device_log("depth={}", depth_track); - $if(it->shape().has_medium()) { + $if (it->shape().has_medium()) { auto surface_tag = it->shape().surface_tag(); auto medium_tag = it->shape().medium_tag(); @@ -215,13 +216,13 @@ class MegakernelVolumePathTracingNaiveInstance final : public ProgressiveIntegra pipeline().surfaces().dispatch(surface_tag, [&](auto surface) { device_log("surface event={}", surface_event); // update medium tracker - $switch(surface_event) { - $case(Surface::event_enter) { + $switch (surface_event) { + $case (Surface::event_enter) { medium_tracker.enter(medium_priority, medium_info); device_log("enter: priority={}, medium_tag={}", medium_priority, medium_tag); }; - $case(Surface::event_exit) { - $if(medium_tracker.exist(medium_priority, medium_info)) { + $case (Surface::event_exit) { + $if (medium_tracker.exist(medium_priority, medium_info)) { medium_tracker.exit(medium_priority, medium_info); device_log("exit exist: priority={}, medium_tag={}", medium_priority, medium_tag); } @@ -251,10 +252,10 @@ class MegakernelVolumePathTracingNaiveInstance final : public ProgressiveIntegra auto pdf_bsdf = def(1e16f); auto eta_scale = def(1.f); auto max_depth = node()->max_depth(); - $for(depth, max_depth) { + $for (depth, max_depth) { auto eta = def(1.f); auto u_rr = def(0.f); - $if(depth + 1u >= rr_depth) { u_rr = sampler()->generate_1d(); }; + $if (depth + 1u >= rr_depth) { u_rr = sampler()->generate_1d(); }; // trace auto it = pipeline().geometry()->intersect(ray); @@ -263,7 +264,7 @@ class MegakernelVolumePathTracingNaiveInstance final : public ProgressiveIntegra device_log("depth={}", depth); device_log("before: medium tracker size={}, priority={}, tag={}", - medium_tracker.size(), medium_tracker.current().priority, medium_tracker.current().medium_tag); + medium_tracker.size(), medium_tracker.current().priority, medium_tracker.current().medium_tag); device_log( "ray=({}, {}, {}) + t * ({}, {}, {})", ray->origin().x, ray->origin().y, ray->origin().z, @@ -272,7 +273,7 @@ class MegakernelVolumePathTracingNaiveInstance final : public ProgressiveIntegra auto medium_sample = Medium::Sample::zero(swl.dimension()); // sample the participating medium - $if(!medium_tracker.vacuum()) { + $if (!medium_tracker.vacuum()) { #ifdef VPT_NAIVE_ENABLE_DIRECT_LIGHTING // direct light // generate uniform samples @@ -286,7 +287,7 @@ class MegakernelVolumePathTracingNaiveInstance final : public ProgressiveIntegra // trace shadow ray auto transmittance_evaluation = _transmittance(frame_index, pixel_id, time, swl, rng, medium_tracker, light_sample.shadow_ray); - $if(transmittance_evaluation.pdf > 0.f) { + $if (transmittance_evaluation.pdf > 0.f) { auto w = 1.f / (pdf_bsdf + transmittance_evaluation.pdf + light_sample.eval.pdf); Li += w * beta * transmittance_evaluation.f * light_sample.eval.L; }; @@ -310,9 +311,9 @@ class MegakernelVolumePathTracingNaiveInstance final : public ProgressiveIntegra }; // sample the surface - $if((medium_sample.medium_event == Medium::event_invalid) | (medium_sample.medium_event == Medium::event_hit_surface)) { + $if ((medium_sample.medium_event == Medium::event_invalid) | (medium_sample.medium_event == Medium::event_hit_surface)) { // miss, environment light - $if(!it->valid()) { + $if (!it->valid()) { if (pipeline().environment()) { auto eval = light_sampler()->evaluate_miss(ray->direction(), swl, time); Li += beta * eval.L * balance_heuristic(pdf_bsdf, eval.pdf); @@ -322,7 +323,7 @@ class MegakernelVolumePathTracingNaiveInstance final : public ProgressiveIntegra // hit light if (!pipeline().lights().empty()) { - $if(it->shape().has_light()) { + $if (it->shape().has_light()) { auto eval = light_sampler()->evaluate_hit(*it, ray->origin(), swl, time); Li += beta * eval.L * balance_heuristic(pdf_bsdf, eval.pdf); device_log( @@ -341,7 +342,7 @@ class MegakernelVolumePathTracingNaiveInstance final : public ProgressiveIntegra } // hit ordinary surface - $if(!it->shape().has_surface()) { $break; }; + $if (!it->shape().has_surface()) { $break; }; // generate uniform samples auto u_light_selection = sampler()->generate_1d(); @@ -363,7 +364,7 @@ class MegakernelVolumePathTracingNaiveInstance final : public ProgressiveIntegra auto medium_tag = it->shape().medium_tag(); auto medium_priority = def(Medium::VACUUM_PRIORITY); auto eta_next = def(1.f); - $if(has_medium) { + $if (has_medium) { pipeline().media().dispatch(medium_tag, [&](auto medium) { auto closure = medium->closure(ray, swl, time); medium_priority = medium->priority(); @@ -384,21 +385,21 @@ class MegakernelVolumePathTracingNaiveInstance final : public ProgressiveIntegra }); call.execute([&](auto closure) noexcept { UInt surface_event; - $if(!medium_tracker.true_hit(medium_info.medium_tag)) { + $if (!medium_tracker.true_hit(medium_info.medium_tag)) { surface_event = surface_event_skip; ray = it->spawn_ray(ray->direction()); pdf_bsdf = 1e16f; } $else { if (auto dispersive = closure->is_dispersive()) { - $if(*dispersive) { swl.terminate_secondary(); }; + $if (*dispersive) { swl.terminate_secondary(); }; } // direct lighting #ifdef VPT_NAIVE_ENABLE_DIRECT_LIGHTING - $if(light_sample.eval.pdf > 0.0f) { + $if (light_sample.eval.pdf > 0.0f) { #else - $if(light_sample.eval.pdf > 0.0f & !occluded) { + $if (light_sample.eval.pdf > 0.0f & !occluded) { #endif auto wi = light_sample.shadow_ray->direction(); auto eval = closure->evaluate(wo, wi); @@ -433,24 +434,24 @@ class MegakernelVolumePathTracingNaiveInstance final : public ProgressiveIntegra beta *= w * surface_sample.eval.f; // apply eta scale & update medium tracker - $if(has_medium) { - $switch(surface_event) { - $case(Surface::event_enter) { + $if (has_medium) { + $switch (surface_event) { + $case (Surface::event_enter) { eta_scale = sqr(eta_next / eta); }; - $case(Surface::event_exit) { + $case (Surface::event_exit) { eta_scale = sqr(eta / eta_next); }; }; }; }; - $if(has_medium) { - $switch(surface_event) { - $case(Surface::event_enter) { + $if (has_medium) { + $switch (surface_event) { + $case (Surface::event_enter) { medium_tracker.enter(medium_priority, medium_info); }; - $case(Surface::event_exit) { + $case (Surface::event_exit) { medium_tracker.exit(medium_priority, medium_info); }; }; @@ -467,18 +468,18 @@ class MegakernelVolumePathTracingNaiveInstance final : public ProgressiveIntegra medium_sample.medium_event, beta[0u], beta[1u], beta[2u], pdf_bsdf, Li[0u], Li[1u], Li[2u]); beta = zero_if_any_nan(beta); - $if(all(beta <= 0.f)) { $break; }; + $if (all(beta <= 0.f)) { $break; }; // rr auto rr_threshold = node()->rr_threshold(); auto q = max(beta.max() * eta_scale, .05f); - $if(depth + 1u >= rr_depth) { - $if(q < rr_threshold & u_rr >= q) { $break; }; + $if (depth + 1u >= rr_depth) { + $if (q < rr_threshold & u_rr >= q) { $break; }; beta *= ite(q < rr_threshold, 1.0f / q, 1.f); }; device_log("beta=({}, {}, {})", beta[0u], beta[1u], beta[2u]); device_log("after: medium tracker size={}, priority={}, tag={}", - medium_tracker.size(), medium_tracker.current().priority, medium_tracker.current().medium_tag); + medium_tracker.size(), medium_tracker.current().priority, medium_tracker.current().medium_tag); }; return spectrum->srgb(swl, Li); } diff --git a/src/integrators/pssmlt.cpp b/src/integrators/pssmlt.cpp index 4f87666a..3ee56d6e 100644 --- a/src/integrators/pssmlt.cpp +++ b/src/integrators/pssmlt.cpp @@ -2,7 +2,6 @@ // Created by Mike Smith on 2022/11/9. // -#include #include #include #include @@ -15,8 +14,8 @@ namespace luisa::render { struct alignas(8) PrimarySample { float value; float value_backup; - uint2 last_modification; - uint2 modification_backup; + ulong last_modification; + ulong modification_backup; }; }// namespace luisa::render @@ -46,9 +45,9 @@ class PSSMLTSampler { public: struct State { UInt rng_state; - U64 current_iteration; + ULong current_iteration; Bool large_step; - U64 last_large_step_iteration; + ULong last_large_step_iteration; UInt chain_index; UInt sample_index; UInt initialized_dimensions; @@ -64,9 +63,9 @@ class PSSMLTSampler { uint _chains{}; uint _pss_dim{}; Buffer _rng_buffer; - Buffer _current_iteration_buffer; + Buffer _current_iteration_buffer; Buffer _large_step_and_initialized_dimensions_buffer; - Buffer _last_large_step_iteration_buffer; + Buffer _last_large_step_iteration_buffer; Buffer _primary_sample_buffer; public: @@ -79,9 +78,9 @@ class PSSMLTSampler { command_buffer << synchronize(); if (auto n = next_pow2(chains); n > _rng_buffer.size()) { _rng_buffer = _device.create_buffer(n); - _current_iteration_buffer = _device.create_buffer(n); + _current_iteration_buffer = _device.create_buffer(n); _large_step_and_initialized_dimensions_buffer = _device.create_buffer(n); - _last_large_step_iteration_buffer = _device.create_buffer(n); + _last_large_step_iteration_buffer = _device.create_buffer(n); } if (auto n = next_pow2(chains * pss_dim); n > _primary_sample_buffer.size()) { _primary_sample_buffer = _device.create_buffer(n); @@ -110,7 +109,7 @@ class PSSMLTSampler { auto p = def(0.f); x = clamp(x, -.99999f, .99999f); w = -log((1.f - x) * (1.f + x)); - $if(w < 5.f) { + $if (w < 5.f) { w = w - 2.5f; p = 2.81022636e-08f; p = fma(p, w, 3.43273939e-07f); @@ -141,28 +140,28 @@ class PSSMLTSampler { [[nodiscard]] auto _sample(Expr index) noexcept { auto Xi = def(); - $if(_state->initialized_dimensions <= index) {// Initialize the sample + $if (_state->initialized_dimensions <= index) {// Initialize the sample Xi.value = 0.f; Xi.value_backup = 0.f; - Xi.last_modification = make_uint2(); - Xi.modification_backup = make_uint2(); + Xi.last_modification = 0ull; + Xi.modification_backup = 0ull; _state->initialized_dimensions += 1u; } $else {// Load the sample Xi = _read_primary_sample(index); }; // Reset Xi if a large step took place in the meantime - $if(U64{Xi.last_modification} < _state->last_large_step_iteration) { + $if (Xi.last_modification < _state->last_large_step_iteration) { Xi.value = lcg(_state->rng_state); - Xi.last_modification = _state->last_large_step_iteration.bits(); + Xi.last_modification = _state->last_large_step_iteration; }; // Apply remaining sequence of mutations to _sample_ Xi->backup(); - $if(_state->large_step) { + $if (_state->large_step) { Xi.value = lcg(_state->rng_state); } $else { - auto nSmall = (_state->current_iteration - U64{Xi.last_modification}).lo(); + auto nSmall = compute::cast(_state->current_iteration - Xi.last_modification); // Apply _nSmall_ small step mutations // Sample the standard normal distribution N(0, 1) auto normalSample = sqrt_two * _erf_inv(2.f * lcg(_state->rng_state) - 1.f); @@ -170,7 +169,7 @@ class PSSMLTSampler { auto effSigma = _sigma * sqrt(cast(nSmall)); Xi.value = fract(Xi.value + normalSample * effSigma); }; - Xi.last_modification = _state->current_iteration.bits(); + Xi.last_modification = _state->current_iteration; // Store the sample _write_primary_sample(index, Xi); return Xi.value; @@ -183,9 +182,9 @@ class PSSMLTSampler { void create(Expr chain_index, Expr rng_sequence) noexcept { _state = luisa::make_unique(State{ .rng_state = xxhash32(rng_sequence), - .current_iteration = U64{0u}, + .current_iteration = 0ull, .large_step = true, - .last_large_step_iteration = U64{0u}, + .last_large_step_iteration = 0ull, .chain_index = chain_index, .sample_index = 0u, .initialized_dimensions = 0u}); @@ -198,9 +197,9 @@ class PSSMLTSampler { auto last_large_step_iteration = _last_large_step_iteration_buffer->read(chain_index); _state = luisa::make_unique(State{ .rng_state = rng_state, - .current_iteration = U64{current_iteration}, + .current_iteration = current_iteration, .large_step = (large_step_and_dimensions & 1u) != 0u, - .last_large_step_iteration = U64{last_large_step_iteration}, + .last_large_step_iteration = last_large_step_iteration, .chain_index = chain_index, .sample_index = 0u, .initialized_dimensions = large_step_and_dimensions >> 1u}); @@ -208,10 +207,10 @@ class PSSMLTSampler { void save() noexcept { _rng_buffer->write(_state->chain_index, _state->rng_state); - _current_iteration_buffer->write(_state->chain_index, _state->current_iteration.bits()); + _current_iteration_buffer->write(_state->chain_index, _state->current_iteration); _large_step_and_initialized_dimensions_buffer->write( _state->chain_index, ite(_state->large_step, 1u, 0u) | (_state->initialized_dimensions << 1u)); - _last_large_step_iteration_buffer->write(_state->chain_index, _state->last_large_step_iteration.bits()); + _last_large_step_iteration_buffer->write(_state->chain_index, _state->last_large_step_iteration); } void accept() noexcept { @@ -222,14 +221,14 @@ class PSSMLTSampler { } void reject() noexcept { - $for(i, _state->initialized_dimensions) { + $for (i, _state->initialized_dimensions) { auto sample = _read_primary_sample(i); - $if(U64{sample.last_modification} == _state->current_iteration) { + $if (sample.last_modification == _state->current_iteration) { sample->restore(); _write_primary_sample(i, sample); }; }; - _state->current_iteration = _state->current_iteration - 1u; + _state->current_iteration = _state->current_iteration - 1ull; } [[nodiscard]] auto large_step() const noexcept { @@ -249,7 +248,7 @@ class PSSMLTSampler { } void start_iteration() noexcept { - _state->current_iteration = _state->current_iteration + 1u; + _state->current_iteration = _state->current_iteration + 1ull; _state->large_step = lcg(_state->rng_state) < _large_step_probability; } }; @@ -352,14 +351,14 @@ class PSSMLTInstance final : public ProgressiveIntegrator::Instance { auto ray = camera_ray; auto pdf_bsdf = def(1e16f); - $for(depth, node()->max_depth()) { + $for (depth, node()->max_depth()) { // trace auto wo = -ray->direction(); auto it = pipeline().geometry()->intersect(ray); // miss - $if(!it->valid()) { + $if (!it->valid()) { if (pipeline().environment()) { auto eval = light_sampler()->evaluate_miss(ray->direction(), swl, time); Li += beta * eval.L * balance_heuristic(pdf_bsdf, eval.pdf); @@ -370,14 +369,14 @@ class PSSMLTInstance final : public ProgressiveIntegrator::Instance { // hit light if (!pipeline().lights().empty()) { - $if(it->shape().has_light()) { + $if (it->shape().has_light()) { auto eval = light_sampler()->evaluate_hit(*it, ray->origin(), swl, time); Li += beta * eval.L * balance_heuristic(pdf_bsdf, eval.pdf); is_visible_light |= depth == 0u; }; } - $if(!it->shape().has_surface()) { $break; }; + $if (!it->shape().has_surface()) { $break; }; // sample one light auto u_light_selection = sampler.generate_1d(); @@ -399,10 +398,10 @@ class PSSMLTInstance final : public ProgressiveIntegrator::Instance { }); call.execute([&](auto closure) noexcept { if (auto dispersive = closure->is_dispersive()) { - $if(*dispersive) { swl.terminate_secondary(); }; + $if (*dispersive) { swl.terminate_secondary(); }; } // direct lighting - $if(light_sample.eval.pdf > 0.0f & !occluded) { + $if (light_sample.eval.pdf > 0.0f & !occluded) { auto wi = light_sample.shadow_ray->direction(); auto eval = closure->evaluate(wo, wi); auto w = balance_heuristic(light_sample.eval.pdf, eval.pdf) / @@ -417,19 +416,19 @@ class PSSMLTInstance final : public ProgressiveIntegrator::Instance { beta *= w * surface_sample.eval.f; // apply eta scale auto eta = closure->eta().value_or(1.f); - $switch(surface_sample.event) { - $case(Surface::event_enter) { eta_scale = sqr(eta); }; - $case(Surface::event_exit) { eta_scale = sqr(1.f / eta); }; + $switch (surface_sample.event) { + $case (Surface::event_enter) { eta_scale = sqr(eta); }; + $case (Surface::event_exit) { eta_scale = sqr(1.f / eta); }; }; }); beta = zero_if_any_nan(beta); - $if(beta.all([](auto b) noexcept { return b <= 0.f; })) { $break; }; + $if (beta.all([](auto b) noexcept { return b <= 0.f; })) { $break; }; auto rr_depth = node()->rr_depth(); auto rr_threshold = node()->rr_threshold(); auto q = max(beta.max() * eta_scale, .05f); - $if(depth + 1u >= rr_depth) { + $if (depth + 1u >= rr_depth) { auto u = sampler.generate_1d(); - $if(q < rr_threshold & u >= q) { $break; }; + $if (q < rr_threshold & u >= q) { $break; }; beta *= ite(q < rr_threshold, 1.0f / q, 1.f); }; }; @@ -502,7 +501,7 @@ class PSSMLTInstance final : public ProgressiveIntegrator::Instance { global_accept_counter = {pipeline().device(), 1u}; clear_statistics = pipeline().device().compile<1u>([&] { auto i = dispatch_x(); - $if(i == 0u) { global_accept_counter.clear(i); }; + $if (i == 0u) { global_accept_counter.clear(i); }; accept_counter.clear(i); mutation_counter.clear(i); }); @@ -549,7 +548,7 @@ class PSSMLTInstance final : public ProgressiveIntegrator::Instance { auto accum = [&accumulate_buffer, resolution](Expr p, Expr L) noexcept { auto offset = (p.y * resolution.x + p.x) * 3u; - $if(!any(isnan(L))) { + $if (!any(isnan(L))) { for (auto i = 0u; i < 3u; i++) { accumulate_buffer->atomic(offset + i).fetch_add(L[i]); } @@ -568,7 +567,7 @@ class PSSMLTInstance final : public ProgressiveIntegrator::Instance { mutation_counter.record(pixel_index_new); // Accept or reject the proposal - $if(lcg(seed) < accept) { + $if (lcg(seed) < accept) { position_buffer->write(chain_id, p_new); radiance_and_contribution_buffer->write( chain_id, make_float4(shutter_weight * L_new, y_new)); diff --git a/src/samplers/sobol.cpp b/src/samplers/sobol.cpp index b29648af..4bba9309 100644 --- a/src/samplers/sobol.cpp +++ b/src/samplers/sobol.cpp @@ -5,7 +5,6 @@ #include #include -#include #include #include #include @@ -27,13 +26,14 @@ using namespace luisa::compute; class SobolSamplerInstance final : public Sampler::Instance { private: + uint2 _resolution; uint _scale{}; luisa::optional _pixel; luisa::optional _dimension; - luisa::optional _sobol_index; + luisa::optional _sobol_index; Buffer _sobol_matrices; - Buffer _vdc_sobol_matrices; - Buffer _vdc_sobol_matrices_inv; + Buffer _vdc_sobol_matrices; + Buffer _vdc_sobol_matrices_inv; Buffer _state_buffer; private: @@ -48,51 +48,50 @@ class SobolSamplerInstance final : public Sampler::Instance { } template - [[nodiscard]] auto _sobol_sample(U64 a, Expr dimension, Expr hash) const noexcept { - static Callable impl = [](UInt2 a_in, UInt dimension, BufferVar sobol_matrices, UInt hash) noexcept { + [[nodiscard]] auto _sobol_sample(ULong a, Expr dimension, Expr hash) const noexcept { + static Callable impl = [](ULong a, UInt dimension, BufferVar sobol_matrices, UInt hash) noexcept { auto v = def(0u); auto i = def(dimension * SobolMatrixSize); - auto a = U64{a_in}; - $while(a != 0u) { - v = ite((a & 1u) != 0u, v ^ sobol_matrices.read(i), v); - a = a >> 1u; + $while (a != 0ull) { + v = ite((a & 1ull) != 0ull, v ^ sobol_matrices.read(i), v); + a = a >> 1ull; i = i + 1u; }; if constexpr (scramble) { v = _fast_owen_scramble(hash, v); } return v * 0x1p-32f; }; - return impl(a.bits(), dimension, _sobol_matrices.view(), hash); + return impl(a, dimension, _sobol_matrices.view(), hash); } [[nodiscard]] auto _sobol_interval_to_index(uint m, UInt frame, Expr p) const noexcept { - if (m == 0u) { return U64{frame}; } - static Callable impl = [](UInt m, UInt frame, UInt2 p, BufferVar vdc, BufferVar vdc_inv) noexcept { + if (m == 0u) { return cast(frame); } + static Callable impl = [](UInt m, UInt frame, UInt2 p, BufferVar vdc, BufferVar vdc_inv) noexcept { auto c = def(0u); auto m2 = m << 1u; - auto index = U64{frame} << m2; - auto delta = U64{0u}; - $while(frame != 0u) { - $if((frame & 1u) != 0u) { - auto v = U64{vdc.read(c)}; + auto index = cast(frame) << cast(m2); + auto delta = def(0ull); + $while (frame != 0u) { + $if ((frame & 1u) != 0u) { + auto v = vdc.read(c); delta = delta ^ v; }; frame >>= 1u; c += 1u; }; // flipped b - auto b = delta ^ ((U64{p.x} << m) | p.y); + auto b = delta ^ ((cast(p.x) << m) | cast(p.y)); auto d = def(0u); - $while(b != 0u) { - $if((b & 1u) != 0u) { - auto v = U64{vdc_inv.read(d)}; + $while (b != 0ull) { + $if ((b & 1ull) != 0ull) { + auto v = vdc_inv.read(d); index = index ^ v; }; - b = b >> 1u; + b = b >> 1ull; d += 1u; }; - return index.bits(); + return index; }; - return U64{impl(m, frame, p, _vdc_sobol_matrices.view(), _vdc_sobol_matrices_inv.view())}; + return impl(m, frame, p, _vdc_sobol_matrices.view(), _vdc_sobol_matrices_inv.view()); } public: @@ -101,8 +100,8 @@ class SobolSamplerInstance final : public Sampler::Instance { const SobolSampler *s) noexcept : Sampler::Instance{pipeline, s} { _sobol_matrices = pipeline.device().create_buffer(SobolMatrixSize * NSobolDimensions); - _vdc_sobol_matrices = pipeline.device().create_buffer(SobolMatrixSize); - _vdc_sobol_matrices_inv = pipeline.device().create_buffer(SobolMatrixSize); + _vdc_sobol_matrices = pipeline.device().create_buffer(SobolMatrixSize); + _vdc_sobol_matrices_inv = pipeline.device().create_buffer(SobolMatrixSize); command_buffer << _sobol_matrices.copy_from(SobolMatrices32); } void reset(CommandBuffer &command_buffer, uint2 resolution, uint state_count, uint spp) noexcept override { @@ -115,14 +114,15 @@ class SobolSamplerInstance final : public Sampler::Instance { _state_buffer = pipeline().device().create_buffer( next_pow2(state_count)); } + _resolution = resolution; _scale = next_pow2(std::max(resolution.x, resolution.y)); LUISA_ASSERT(_scale <= 0xffffu, "Sobol sampler scale is too large."); auto m = std::bit_width(_scale) - 1u; - std::array vdc_sobol_matrices; - std::array vdc_sobol_matrices_inv; + std::array vdc_sobol_matrices{}; + std::array vdc_sobol_matrices_inv{}; for (auto i = 0u; i < SobolMatrixSize; i++) { - vdc_sobol_matrices[i] = u64_to_uint2(VdCSobolMatrices[m - 1u][i]); - vdc_sobol_matrices_inv[i] = u64_to_uint2(VdCSobolMatricesInv[m - 1u][i]); + vdc_sobol_matrices[i] = VdCSobolMatrices[m - 1u][i]; + vdc_sobol_matrices_inv[i] = VdCSobolMatricesInv[m - 1u][i]; } command_buffer << _vdc_sobol_matrices.copy_from(vdc_sobol_matrices.data()) << _vdc_sobol_matrices_inv.copy_from(vdc_sobol_matrices_inv.data()) @@ -135,14 +135,19 @@ class SobolSamplerInstance final : public Sampler::Instance { _pixel.emplace(pixel); } void save_state(Expr state_id) noexcept override { - auto state = make_uint4(_sobol_index->bits(), *_dimension, (_pixel->y << 16u) | _pixel->x); + auto sobol_index_hi = cast(_sobol_index.value() >> 32ull); + auto sobol_index_lo = cast(_sobol_index.value()); + auto pixel_index = _pixel.value().y * _resolution.x + _pixel.value().x; + auto state = make_uint4(sobol_index_hi, sobol_index_lo, *_dimension, pixel_index); _state_buffer->write(state_id, state); } void load_state(Expr state_id) noexcept override { auto state = _state_buffer->read(state_id); - _sobol_index.emplace(state.xy()); + auto sobol_index_hi = cast(state.x) << 32ull; + auto sobol_index_lo = cast(state.y); + _sobol_index.emplace(sobol_index_hi | sobol_index_lo); _dimension.emplace(state.z); - _pixel.emplace(make_uint2(state.w >> 16u, state.w & 0xffffu)); + _pixel.emplace(make_uint2(state.w / _resolution.x, state.w % _resolution.x)); } [[nodiscard]] Float generate_1d() noexcept override { *_dimension = ite(*_dimension >= NSobolDimensions, 2u, *_dimension); diff --git a/src/samplers/zsobol.cpp b/src/samplers/zsobol.cpp index b833e181..53d56ecf 100644 --- a/src/samplers/zsobol.cpp +++ b/src/samplers/zsobol.cpp @@ -3,7 +3,6 @@ // #include -#include #include #include #include @@ -29,15 +28,15 @@ class ZSobolSamplerInstance final : public Sampler::Instance { private: uint _log2_spp{}; luisa::optional _dimension{}; - luisa::optional _morton_index{}; + luisa::optional _morton_index{}; luisa::unique_ptr> _sample_hash; Buffer _sobol_matrices; Buffer _state_buffer; - luisa::unique_ptr> _get_sample_index_impl; + luisa::unique_ptr> _get_sample_index_impl; private: [[nodiscard]] auto _get_sample_index() const noexcept { - return U64{(*_get_sample_index_impl)(_morton_index->bits(), *_dimension)}; + return (*_get_sample_index_impl)(*_morton_index, *_dimension); } [[nodiscard]] static auto _fast_owen_scramble(UInt seed, UInt v) noexcept { @@ -50,20 +49,19 @@ class ZSobolSamplerInstance final : public Sampler::Instance { return reverse(v); } - [[nodiscard]] auto _sobol_sample(U64 a, uint dimension, Expr hash) const noexcept { - static Callable impl = [](UInt2 a_bits, UInt dimension, UInt hash, BufferVar sobol_matrices) noexcept { - auto a = U64{a_bits}; + [[nodiscard]] auto _sobol_sample(ULong a, uint dimension, Expr hash) const noexcept { + static Callable impl = [](ULong a, UInt dimension, UInt hash, BufferVar sobol_matrices) noexcept { auto v = def(0u); auto i = def(dimension * SobolMatrixSize); - $while(a != 0u) { - v = ite((a & 1u) != 0u, v ^ sobol_matrices.read(i), v); + $while (a != 0ull) { + v = ite((a & 1ull) != 0ull, v ^ sobol_matrices.read(i), v); a = a >> 1u; i += 1u; }; v = _fast_owen_scramble(hash, v); return min(v * 0x1p-32f, one_minus_epsilon); }; - return impl(a.bits(), dimension, hash, _sobol_matrices.view()); + return impl(a, dimension, hash, _sobol_matrices.view()); } public: @@ -96,8 +94,8 @@ class ZSobolSamplerInstance final : public Sampler::Instance { return std::bit_width(next_pow2(x)) - 1u; }; _log2_spp = log2(spp); - _get_sample_index_impl = luisa::make_unique>( - [this, resolution](UInt2 morton, UInt dimension) noexcept { + _get_sample_index_impl = luisa::make_unique>( + [this, resolution](ULong morton_index, UInt dimension) noexcept { static Constant permutations{std::array{ make_uint4(0, 1, 2, 3), make_uint4(0, 1, 3, 2), make_uint4(0, 2, 1, 3), make_uint4(0, 2, 3, 1), make_uint4(0, 3, 2, 1), make_uint4(0, 3, 1, 2), make_uint4(1, 0, 2, 3), make_uint4(1, 0, 3, 2), @@ -105,63 +103,64 @@ class ZSobolSamplerInstance final : public Sampler::Instance { make_uint4(2, 1, 0, 3), make_uint4(2, 1, 3, 0), make_uint4(2, 0, 1, 3), make_uint4(2, 0, 3, 1), make_uint4(2, 3, 0, 1), make_uint4(2, 3, 1, 0), make_uint4(3, 1, 2, 0), make_uint4(3, 1, 0, 2), make_uint4(3, 2, 1, 0), make_uint4(3, 2, 0, 1), make_uint4(3, 0, 2, 1), make_uint4(3, 0, 1, 2)}}; - static constexpr auto mix_bits = [](U64 v) noexcept { + static constexpr auto mix_bits = [](ULong v) noexcept { v = v ^ (v >> 31u); - v = v * U64{0x7fb5d329728ea185ull}; + v = v * 0x7fb5d329728ea185ull; v = v ^ (v >> 27u); - v = v * U64{0x81dadef4bc2dd44dull}; - v = v ^ (v.hi() >> 1u); + v = v * 0x81dadef4bc2dd44dull; + v = v ^ (v >> 33ull); return v; }; - U64 sample_index; + ULong sample_index; auto log4_spp = (_log2_spp + 1u) / 2u; auto pow2_samples = static_cast(_log2_spp & 1u); auto last_digit = pow2_samples ? 1 : 0; - auto morton_index = U64{morton}; auto num_base4_digits = log2(std::max(resolution.x, resolution.y)) + log4_spp; for (auto i = static_cast(num_base4_digits) - 1; i >= last_digit; i--) { - auto digit_shift = 2u * i - (pow2_samples ? 1u : 0u); - auto digit = (morton_index >> digit_shift) & 3u; - auto higher_digits = morton_index >> (digit_shift + 2u); - auto p = (mix_bits(higher_digits ^ (dimension * 0x55555555u)) >> 24u) % 24u; - U64 perm_digit{permutations.read(p)[digit]}; - sample_index = sample_index | (perm_digit << digit_shift); + auto digit_shift = cast(2u * i - (pow2_samples ? 1u : 0u)); + auto digit = cast((morton_index >> digit_shift) & 3ull); + auto higher_digits = morton_index >> (digit_shift + 2ull); + auto p = cast((mix_bits(higher_digits ^ cast(dimension * 0x55555555u)) >> 24ull) % 24ull); + auto perm_digit = permutations.read(p)[digit]; + sample_index = sample_index | (cast(perm_digit) << cast(digit_shift)); } if (pow2_samples) { - auto digit = (morton_index & 1u) ^ - (mix_bits((morton_index >> 1u) ^ (dimension * 0x55555555u)) & 1u); - sample_index = sample_index | digit; + auto digit = cast(morton_index & 1ull); + sample_index |= cast(digit ^ (cast(mix_bits((morton_index >> 1ull) ^ cast(0x55555555u * dimension))) & 1u)); } - return sample_index.bits(); + return sample_index; }); } void start(Expr pixel, Expr sample_index) noexcept override { static Callable encode_morton = [](UInt x, UInt y) noexcept { - static constexpr auto left_shift2 = [](auto x_in) noexcept { - U64 x{x_in}; - x = (x ^ (x << 16u)) & U64{0x0000ffff0000ffffull}; - x = (x ^ (x << 8u)) & U64{0x00ff00ff00ff00ffull}; - x = (x ^ (x << 4u)) & U64{0x0f0f0f0f0f0f0f0full}; - x = (x ^ (x << 2u)) & U64{0x3333333333333333ull}; - x = (x ^ (x << 1u)) & U64{0x5555555555555555ull}; + static constexpr auto left_shift2 = [](auto x) noexcept { + x = (x ^ (x << 16ull)) & 0x0000ffff0000ffffull; + x = (x ^ (x << 8ull)) & 0x00ff00ff00ff00ffull; + x = (x ^ (x << 4ull)) & 0x0f0f0f0f0f0f0f0full; + x = (x ^ (x << 2ull)) & 0x3333333333333333ull; + x = (x ^ (x << 1ull)) & 0x5555555555555555ull; return x; }; - return ((left_shift2(y) << 1u) | left_shift2(x)).bits(); + return (left_shift2(cast(y)) << 1ull) | left_shift2(cast(x)); }; - _dimension = luisa::nullopt; - _morton_index = luisa::nullopt; - _dimension = def(0u); - _morton_index = (U64{encode_morton(pixel.x, pixel.y)} << _log2_spp) | sample_index; + _dimension.emplace(def(0u)); + _morton_index.emplace((encode_morton(pixel.x, pixel.y) << static_cast(_log2_spp)) | + cast(sample_index)); } void save_state(Expr state_id) noexcept override { - auto state = make_uint3(_morton_index->bits(), *_dimension); + auto state = make_uint3( + cast(*_morton_index >> 32ull), + cast(*_morton_index), + *_dimension); _state_buffer->write(state_id, state); } void load_state(Expr state_id) noexcept override { _dimension = luisa::nullopt; _morton_index = luisa::nullopt; auto state = _state_buffer->read(state_id); - _morton_index = U64{state.xy()}; + auto morton_high = cast(state.x) << 32ull; + auto morton_low = cast(state.y); + _morton_index = morton_high | morton_low; _dimension = state.z; } [[nodiscard]] Float generate_1d() noexcept override { diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index b214ba4e..ca246600 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -5,7 +5,6 @@ add_library(luisa-render-util SHARED xform.cpp xform.h spec.cpp spec.h colorspace.h - u64.cpp u64.h rng.cpp rng.h ies.cpp ies.h scattering.cpp scattering.h diff --git a/src/util/rng.cpp b/src/util/rng.cpp index f680bb2d..3cff5f31 100644 --- a/src/util/rng.cpp +++ b/src/util/rng.cpp @@ -141,17 +141,17 @@ Float lcg(UInt &state) noexcept { UInt PCG32::uniform_uint() noexcept { auto oldstate = _state; - _state = oldstate * U64{mult} + _inc; - auto xorshifted = (((oldstate >> 18u) ^ oldstate) >> 27u).lo(); - auto rot = (oldstate >> 59u).lo(); + _state = oldstate * mult + _inc; + auto xorshifted = compute::cast(((oldstate >> 18u) ^ oldstate) >> 27u); + auto rot = compute::cast(oldstate >> 59u); return (xorshifted >> rot) | (xorshifted << ((~rot + 1u) & 31u)); } -void PCG32::set_sequence(U64 init_seq) noexcept { - _state = U64{0u}; - _inc = (init_seq << 1u) | 1u; +void PCG32::set_sequence(ULong init_seq) noexcept { + _state = def(0ull); + _inc = (init_seq << 1ull) | 1ull; static_cast(uniform_uint()); - _state = _state + U64{default_state}; + _state = _state + default_state; static_cast(uniform_uint()); } @@ -162,15 +162,15 @@ Float PCG32::uniform_float() noexcept { PCG32::PCG32() noexcept : _state{default_state}, _inc{default_stream} {} -PCG32::PCG32(U64 state, U64 inc) noexcept +PCG32::PCG32(ULong state, ULong inc) noexcept : _state{std::move(state)}, _inc{std::move(inc)} {} -PCG32::PCG32(U64 seq_index) noexcept { +PCG32::PCG32(ULong seq_index) noexcept { set_sequence(std::move(seq_index)); } PCG32::PCG32(Expr seq_index) noexcept { - set_sequence(U64{seq_index}); + set_sequence(compute::cast(seq_index)); } }// namespace luisa::render diff --git a/src/util/rng.h b/src/util/rng.h index db488dcc..d1324ccb 100644 --- a/src/util/rng.h +++ b/src/util/rng.h @@ -5,7 +5,6 @@ #pragma once #include -#include namespace luisa::render { @@ -15,6 +14,7 @@ using compute::UInt2; using compute::UInt3; using compute::UInt4; using compute::Float; +using compute::ULong; [[nodiscard]] UInt xxhash32(Expr p) noexcept; [[nodiscard]] UInt xxhash32(Expr p) noexcept; @@ -38,15 +38,15 @@ class PCG32 { static constexpr auto mult = 0x5851f42d4c957f2dull; private: - U64 _state; - U64 _inc; + ULong _state; + ULong _inc; public: PCG32() noexcept; - PCG32(U64 state, U64 inc) noexcept; - explicit PCG32(U64 seq_index) noexcept; + PCG32(ULong state, ULong inc) noexcept; + explicit PCG32(ULong seq_index) noexcept; explicit PCG32(Expr seq_index) noexcept; - void set_sequence(U64 init_seq) noexcept; + void set_sequence(ULong init_seq) noexcept; [[nodiscard]] UInt uniform_uint() noexcept; [[nodiscard]] Float uniform_float() noexcept; [[nodiscard]] auto state() const noexcept { return _state; } diff --git a/src/util/u64.cpp b/src/util/u64.cpp deleted file mode 100644 index acb4bc23..00000000 --- a/src/util/u64.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// -// Created by Mike Smith on 2023/5/18. -// - -#include -#include - -namespace luisa::render { - -compute::UInt U64::operator%(uint rhs) const noexcept { - LUISA_ASSERT(rhs <= 0xffffu, "U64::operator% rhs must be <= 0xffff"); - return ((hi() % rhs) * static_cast(0x1'0000'0000ull % rhs) + lo() % rhs) % rhs; -} - -}// namespace luisa::render diff --git a/src/util/u64.h b/src/util/u64.h deleted file mode 100644 index 76e997ee..00000000 --- a/src/util/u64.h +++ /dev/null @@ -1,162 +0,0 @@ -// -// Created by Mike Smith on 2022/2/9. -// - -#pragma once - -#include - -namespace luisa::render { - -using compute::cast; -using compute::Expr; -using compute::make_uint2; -using compute::UInt2; - -[[nodiscard]] constexpr auto u64_to_uint2(uint64_t x) noexcept { - return luisa::make_uint2( - static_cast(x >> 32u) /* hi */, - static_cast(x) /* lo */); -} - -[[nodiscard]] constexpr auto uint2_to_u64(uint2 v) noexcept { - return (static_cast(v.x) << 32u) | v.y; -} - -class U64 { - -private: - UInt2 _bits; - -private: - [[nodiscard]] static auto _mul_u32(Expr lhs, Expr rhs) noexcept { - auto lhs_hi = lhs >> 16u; - auto lhs_lo = lhs & 0xffffu; - auto rhs_hi = rhs >> 16u; - auto rhs_lo = rhs & 0xffffu; - auto hi_lo = lhs_hi * rhs_lo; - auto lo_lo = lhs_lo * rhs_lo; - auto lo_hi = lhs_lo * rhs_hi; - auto hi_hi = lhs_hi * rhs_hi; - auto m_16_32 = (lo_lo >> 16u) + (hi_lo & 0xffffu) + (lo_hi & 0xffffu); - auto m_32_64 = (m_16_32 >> 16u) + (hi_lo >> 16u) + (lo_hi >> 16u) + hi_hi; - return U64{m_32_64, (m_16_32 << 16u) | (lo_lo & 0xffffu)}; - } - -public: - explicit U64(uint64_t u = 0ull) noexcept : _bits{u64_to_uint2(u)} {} - explicit U64(Expr u) noexcept : _bits{u} {} - explicit U64(Expr u) noexcept : _bits{make_uint2(0u, u)} {} - U64(Expr hi, Expr lo) - noexcept : _bits{make_uint2(hi, lo)} {} - U64(U64 &&) - noexcept = default; - U64(const U64 &) - noexcept = default; - U64 &operator=(U64 &&) noexcept = default; - U64 &operator=(const U64 &) noexcept = default; - [[nodiscard]] auto hi() const noexcept { return _bits.x; } - [[nodiscard]] auto lo() const noexcept { return _bits.y; } - [[nodiscard]] auto bits() const noexcept { return _bits; } - [[nodiscard]] auto operator~() const noexcept { return U64{~_bits}; } - [[nodiscard]] auto operator&(Expr rhs) const noexcept { return lo() & rhs; } - [[nodiscard]] auto operator&(const U64 &rhs) const noexcept { return U64{_bits & rhs._bits}; } - [[nodiscard]] auto operator|(Expr rhs) const noexcept { return U64{hi(), lo() | rhs}; } - [[nodiscard]] auto operator|(const U64 &rhs) const noexcept { return U64{_bits | rhs._bits}; } - [[nodiscard]] auto operator^(Expr rhs) const noexcept { return U64{hi(), lo() ^ rhs}; } - [[nodiscard]] auto operator^(const U64 &rhs) const noexcept { return U64{_bits ^ rhs._bits}; } - // TODO: optimize this - [[nodiscard]] auto operator>>(Expr rhs) const noexcept { - using compute::if_; - auto ret = *this; - if_(rhs != 0u, [&] { - if_(rhs >= 32u, [&] { - ret = U64{0u, hi() >> (rhs - 32u)}; - }).else_([&] { - ret = U64{hi() >> rhs, (hi() << (32u - rhs)) | (lo() >> rhs)}; - }); - }); - return ret; - } - [[nodiscard]] auto operator<<(Expr rhs) const noexcept { - using compute::if_; - auto ret = *this; - if_(rhs != 0u, [&] { - if_(rhs >= 32u, [&] { - ret = U64{lo() << (rhs - 32u), 0u}; - }).else_([&] { - ret = U64{(hi() << rhs) | (lo() >> (32u - rhs)), lo() << rhs}; - }); - }); - return ret; - } - [[nodiscard]] auto operator==(const U64 &rhs) const noexcept { return all(_bits == rhs._bits); } - [[nodiscard]] auto operator==(Expr rhs) const noexcept { return hi() == 0u & lo() == rhs; } - [[nodiscard]] friend auto operator==(Expr lhs, const U64 &rhs) noexcept { return rhs == lhs; } - [[nodiscard]] auto operator!=(const U64 &rhs) const noexcept { return !(*this == rhs); } - [[nodiscard]] auto operator!=(Expr rhs) const noexcept { return !(*this == rhs); } - [[nodiscard]] friend auto operator!=(Expr lhs, const U64 &rhs) noexcept { return rhs != lhs; } - [[nodiscard]] auto operator<(const U64 &rhs) const noexcept { return hi() < rhs.hi() | (hi() == rhs.hi() & lo() < rhs.lo()); } - [[nodiscard]] auto operator<(Expr rhs) const noexcept { return hi() == 0u & lo() < rhs; } - [[nodiscard]] friend auto operator<(Expr lhs, const U64 &rhs) noexcept { return rhs.hi() > 0u | lhs < rhs.lo(); } - [[nodiscard]] auto operator>(const U64 &rhs) const noexcept { return rhs < *this; } - [[nodiscard]] auto operator>(Expr rhs) const noexcept { return rhs < *this; } - [[nodiscard]] friend auto operator>(Expr lhs, const U64 &rhs) noexcept { return rhs < lhs; } - [[nodiscard]] auto operator<=(const U64 &rhs) const noexcept { return !(rhs < *this); } - [[nodiscard]] auto operator<=(Expr rhs) const noexcept { return !(rhs < *this); } - [[nodiscard]] friend auto operator<=(Expr lhs, const U64 &rhs) noexcept { return !(rhs < lhs); } - [[nodiscard]] auto operator>=(const U64 &rhs) const noexcept { return !(*this < rhs); } - [[nodiscard]] auto operator>=(Expr rhs) const noexcept { return !(*this < rhs); } - [[nodiscard]] friend auto operator>=(Expr lhs, const U64 &rhs) noexcept { return !(lhs < rhs); } - - [[nodiscard]] auto operator+(const U64 &rhs) const noexcept -> U64 { - auto carry = cast(~0u - lo() < rhs.lo()); - return U64{hi() + rhs.hi() + carry, lo() + rhs.lo()}; - } - [[nodiscard]] auto operator+(Expr rhs) const noexcept -> U64 { - auto carry = cast(~0u - lo() < rhs); - return U64{hi() + carry, lo() + rhs}; - } - [[nodiscard]] auto operator-(const U64 &rhs) const noexcept { - return *this + ~rhs + 1u; - } - [[nodiscard]] auto operator-(Expr rhs) const noexcept { - return *this - U64{rhs}; - } - [[nodiscard]] auto operator*(const U64 &rhs) const noexcept { - auto lo_lo = _mul_u32(lo(), rhs.lo()); - auto lo_hi = _mul_u32(lo(), rhs.hi()); - auto hi_lo = _mul_u32(hi(), rhs.lo()); - return U64{lo_lo.hi() + lo_hi.lo() + hi_lo.lo(), lo_lo.lo()}; - } - [[nodiscard]] auto operator*(Expr rhs) const noexcept { - auto lo_lo = _mul_u32(lo(), rhs); - auto hi_lo = _mul_u32(hi(), rhs); - return U64{lo_lo.hi() + hi_lo.lo(), lo_lo.lo()}; - } - [[nodiscard]] compute::UInt operator%(uint rhs) const noexcept; - - // conversions - [[nodiscard]] auto to_uint() const noexcept { return lo(); } - [[nodiscard]] auto to_float() const noexcept { - return fma(cast(hi() >> 16u), 0x1p48f, - fma(cast(hi() & 0xffffu), 0x1p32f, - cast(lo()))); - } -}; - -[[nodiscard]] inline auto ite(Expr p, Expr t, const U64 &f) noexcept { - return U64{ite(p, make_uint2(0u, t), f.bits())}; -} - -[[nodiscard]] inline auto ite(Expr p, const U64 &t, Expr f) noexcept { - return U64{ite(p, t.bits(), make_uint2(0u, f))}; -} - -[[nodiscard]] inline auto ite(Expr p, const U64 &t, const U64 &f) noexcept { - return U64{ite(p, t.bits(), f.bits())}; -} - -}// namespace luisa::render - -LUISA_DISABLE_DSL_ADDRESS_OF_OPERATOR(luisa::render::U64)