celeritas-project · sethrj · Nov 6, 2024 · Nov 3, 2024 · Nov 3, 2024 · Nov 3, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -408,7 +408,7 @@ if(CELERITAS_USE_VecGeom)
       AND NOT (VecGeom_CUDA_ARCHITECTURES STREQUAL CMAKE_CUDA_ARCHITECTURES))
     message(WARNING "CUDA architecture types between "
       "VecGeom (${VecGeom_CUDA_ARCHITECTURES}) and "
-      "Celeritas (${CMAKE_CUDA_ARCHITECTURES}) should probably match:"
+      "Celeritas (${CMAKE_CUDA_ARCHITECTURES}) should probably match: "
       "runtime errors may result"
     )
   endif()

diff --git a/src/celeritas_cmake_strings.h b/src/celeritas_cmake_strings.h
@@ -8,6 +8,7 @@
  * \brief Detailed CMake configuration information.
  * \deprecated This file should be replaced by "corecel/Config.hh".
  */
+// DEPRECATED: remove in Celeritas v1.0
 //---------------------------------------------------------------------------//
 #ifndef celeritas_cmake_strings_h
 #define celeritas_cmake_strings_h

diff --git a/src/celeritas_sys_config.h b/src/celeritas_sys_config.h
@@ -8,6 +8,7 @@
  * \brief System-specific configuration options for Celeritas.
  * \deprecated This file should be replaced by "corecel/Config.hh".
  */
+// DEPRECATED: remove in Celeritas v1.0
 //---------------------------------------------------------------------------//
 #ifndef celeritas_sys_config_h
 #define celeritas_sys_config_h

diff --git a/src/corecel/CMakeLists.txt b/src/corecel/CMakeLists.txt
@@ -188,11 +188,13 @@ if(CELERITAS_USE_CUDA OR CELERITAS_USE_HIP)
   )
 endif()
 if(CELERITAS_USE_CUDA)
-  # we currently depend on CUDA install including the nvtx header
+  # Note: we currently assume the CUDA install includes the nvtx header
   list(APPEND SOURCES
     sys/ScopedLimitSaver.cuda.cc
     sys/ScopedProfiling.cuda.cc
   )
+  # NVTX requires `-ldl`
+  list(APPEND PRIVATE_DEPS ${CMAKE_DL_LIBS})
 endif()
 
 if(CELERITAS_USE_HIP)

diff --git a/src/corecel/device_runtime_api.h b/src/corecel/device_runtime_api.h
@@ -8,6 +8,7 @@
  * \brief Include CUDA or HIP runtime APIs for compiling with host/cc compiler.
  * \deprecated This file should be replaced by "corecel/DeviceRuntimeApi.hh".
  */
+// DEPRECATED: remove in Celeritas v1.0
 //---------------------------------------------------------------------------//
 #ifndef CELERITAS_DEVICE_RUNTIME_API_H
 #define CELERITAS_DEVICE_RUNTIME_API_H
@@ -21,6 +22,6 @@
         "corecel/device_runtime_api.h is deprecated and should be replaced by \"corecel/DeviceRuntimeApi.hh\""
 #endif
 
-#include "corecel/DeviceRuntimeApi.hh
+#include "corecel/DeviceRuntimeApi.hh"
 
-#endif /* CELERITAS_DEVICE_RUNTIME_API_H */
+#endif /* CELERITAS_DEVICE_RUNTIME_API_H */
diff --git a/src/corecel/sys/KernelLauncher.device.hh b/src/corecel/sys/KernelLauncher.device.hh
@@ -106,7 +106,9 @@ void KernelLauncher<F>::operator()(Range<ThreadId> threads,
     if (!threads.empty())
     {
         using StreamT = CELER_DEVICE_PREFIX(Stream_t);
-        StreamT stream = celeritas::device().stream(stream_id).get();
+        StreamT stream = stream_id
+                             ? celeritas::device().stream(stream_id).get()
+                             : nullptr;
         auto config = calc_launch_params_(threads.size());
         detail::launch_action_impl<F>
             <<<config.blocks_per_grid, config.threads_per_block, 0, stream>>>(
@@ -130,7 +132,6 @@ void KernelLauncher<F>::operator()(size_type num_threads,
                                    StreamId stream_id,
                                    F const& execute_thread) const
 {
-    CELER_EXPECT(stream_id);
     (*this)(range(ThreadId{num_threads}), stream_id, execute_thread);
 }
 

diff --git a/src/geocel/CMakeLists.txt b/src/geocel/CMakeLists.txt
@@ -80,19 +80,14 @@ if(CELERITAS_USE_VecGeom)
   if(VecGeom_GDML_FOUND)
     list(APPEND PRIVATE_DEPS VecGeom::vgdml)
   endif()
-  if(VecGeom_CUDA_FOUND AND VecGeom_SURF_FOUND)
-    # Special routines needed for surface
-    list(APPEND SOURCES
-      vg/VecgeomParams.surface.cu
-    )
-  endif()
   if(CELERITAS_USE_CUDA)
     # This needs to be public because its might be needed
     # to resolve the symbols generate by the `nvcc -dlink` of
     # one of the executable.
     list(APPEND PUBLIC_DEPS VecGeom::vecgeom)
     list(APPEND SOURCES
       vg/RaytraceImager.cu
+      vg/detail/VecgeomSetup.cu
     )
   elseif(VecGeom_CUDA_FOUND)
     # VecGeom is built with CUDA but Celeritas is not

diff --git a/src/geocel/vg/VecgeomParams.cc b/src/geocel/vg/VecgeomParams.cc
@@ -17,9 +17,9 @@
 #include <VecGeom/volumes/PlacedVolume.h>
 
 #include "corecel/Config.hh"
-#if CELERITAS_USE_CUDA
+#include "corecel/DeviceRuntimeApi.hh"
+#ifdef VECGEOM_ENABLE_CUDA
 #    include <VecGeom/management/CudaManager.h>
-#    include <cuda_runtime_api.h>
 #endif
 #ifdef VECGEOM_USE_SURF
 #    include <VecGeom/surfaces/BrepHelper.h>
@@ -28,8 +28,6 @@
 #    include <VecGeom/gdml/Frontend.h>
 #endif
 
-#include "corecel/DeviceRuntimeApi.hh"
-
 #include "corecel/Assert.hh"
 #include "corecel/Macros.hh"
 #include "corecel/cont/Range.hh"
@@ -49,14 +47,13 @@
 #include "VecgeomData.hh"  // IWYU pragma: associated
 
 #include "detail/VecgeomCompatibility.hh"
-
-#ifdef VECGEOM_USE_SURF
-#    include "VecgeomParams.surface.hh"
-#endif
+#include "detail/VecgeomSetup.hh"
 
 static_assert(std::is_same_v<celeritas::real_type, vecgeom::Precision>,
               "Celeritas and VecGeom real types do not match");
 
+using vecgeom::cxx::BVHManager;
+
 namespace celeritas
 {
 namespace
@@ -82,12 +79,6 @@ namespace
         } while (0)
 #endif
 
-#if defined(VECGEOM_ENABLE_CUDA) && defined(VECGEOM_USE_SURF)
-#    define VG_CUDASURF_CALL(CODE) CODE
-#else
-#    define VG_CUDASURF_CALL(CODE) CELER_UNREACHABLE
-#endif
-
 //---------------------------------------------------------------------------//
 // HELPER FUNCTIONS
 //---------------------------------------------------------------------------//
@@ -197,7 +188,7 @@ VecgeomParams::~VecgeomParams()
         if (VecgeomParams::use_surface_tracking())
         {
             CELER_LOG(debug) << "Clearing VecGeom surface GPU data";
-            VG_CUDASURF_CALL(teardown_surface_tracking_device());
+            VG_SURF_CALL(detail::teardown_surface_tracking_device());
         }
         else
         {
@@ -367,8 +358,8 @@ void VecgeomParams::build_surface_tracking()
         ScopedTimeAndRedirect time_and_output_(
             "BrepCudaManager::TransferSurfData");
 
-        VG_CUDASURF_CALL(
-            setup_surface_tracking_device(brep_helper.GetSurfData()));
+        VG_SURF_CALL(
+            detail::setup_surface_tracking_device(brep_helper.GetSurfData()));
         CELER_DEVICE_CHECK_ERROR();
     }
 }
@@ -394,7 +385,7 @@ void VecgeomParams::build_volume_tracking()
     }
 
     // Init the bounding volume hierarchy structure
-    vecgeom::cxx::BVHManager::Init();
+    BVHManager::Init();
 
     if (celeritas::device())
     {
@@ -430,22 +421,25 @@ void VecgeomParams::build_volume_tracking()
             set_cuda_heap_size(heap_size);
         }
 
-#if CELERITAS_USE_CUDA
+#ifdef VECGEOM_ENABLE_CUDA
         auto& cuda_manager = vecgeom::cxx::CudaManager::Instance();
         cuda_manager.set_verbose(vecgeom_verbosity());
+#endif
         {
             CELER_LOG(debug) << "Converting to CUDA geometry";
             ScopedTimeAndRedirect time_and_output_(
                 "vecgeom::CudaManager.LoadGeometry");
 
-            cuda_manager.LoadGeometry();
+            VG_CUDA_CALL(cuda_manager.LoadGeometry());
             CELER_DEVICE_CALL_PREFIX(DeviceSynchronize());
         }
         {
             CELER_LOG(debug) << "Transferring geometry to GPU";
             ScopedTimeAndRedirect time_and_output_(
                 "vecgeom::CudaManager.Synchronize");
-            auto world_top_devptr = cuda_manager.Synchronize();
+            void const* world_top_devptr{nullptr};
+            VG_CUDA_CALL(
+                world_top_devptr = cuda_manager.Synchronize().GetPtr());
             CELER_DEVICE_CHECK_ERROR();
             CELER_VALIDATE(world_top_devptr != nullptr,
                            << "VecGeom failed to copy geometry to GPU");
@@ -454,12 +448,65 @@ void VecgeomParams::build_volume_tracking()
             CELER_LOG(debug) << "Initializing BVH on GPU";
             ScopedTimeAndRedirect time_and_output_(
                 "vecgeom::BVHManager::DeviceInit");
-            vecgeom::cxx::BVHManager::DeviceInit();
+#if defined(VECGEOM_BVHMANAGER_DEVICE)
+            auto* bvh_ptr = BVHManager::DeviceInit();
+#elif defined(VECGEOM_ENABLE_CUDA)
+            BVHManager::DeviceInit();
+#endif
+#ifdef VECGEOM_BVHMANAGER_DEVICE
+            auto* bvh_symbol_ptr = BVHManager::GetDeviceBVH();
+            CELER_VALIDATE(bvh_ptr && bvh_ptr == bvh_symbol_ptr,
+                           << "inconsistent BVH device pointer: allocated "
+                           << bvh_ptr << " but copy-from-symbol returned "
+                           << bvh_symbol_ptr);
+#endif
             CELER_DEVICE_CHECK_ERROR();
         }
+
+        // Check BVH pointers
+        auto ptrs = detail::bvh_pointers_device();
+
+        vecgeom::cuda::BVH const* bvh_symbol_ptr{nullptr};
+#ifdef VECGEOM_BVHMANAGER_DEVICE
+        bvh_symbol_ptr = BVHManager::GetDeviceBVH();
+#endif
+        if (ptrs.kernel == nullptr || ptrs.kernel != ptrs.symbol
+            || (bvh_symbol_ptr && (ptrs.kernel != bvh_symbol_ptr)))
+        {
+            // It's very bad if the kernel-viewed BVH pointer is null or
+            // inconsistent with the VecGeom-provided BVH pointer (only
+            // available in very recent VecGeom). It's bad (but not really
+            // necessary?) if cudaMemcpyFromSymbol fails when accessed from
+            // Celeritas
+            auto msg = world_logger()(
+                CELER_CODE_PROVENANCE,
+                (ptrs.kernel == nullptr
+                 || (bvh_symbol_ptr && (ptrs.kernel != bvh_symbol_ptr)))
+                    ? LogLevel::error
+                    : LogLevel::debug);
+            auto msg_pointer = [&msg](auto* p) {
+                if (p)
+                {
+                    msg << p;
+                }
+                else
+                {
+                    msg << "nullptr";
+                }
+            };
+            msg << "VecGeom CUDA may not be correctly linked or initialized ("
+                   "BVH device pointers are null or inconsistent: ";
+            msg_pointer(ptrs.kernel);
+            msg << " from Celeritas device kernel, ";
+            msg_pointer(ptrs.symbol);
+            msg << " from Celeritas runtime symbol, ";
+#ifdef VECGEOM_BVHMANAGER_DEVICE
+            msg_pointer(bvh_symbol_ptr);
 #else
-        CELER_NOT_CONFIGURED("CUDA");
+            msg << "unavailable";
 #endif
+            msg << " from VecGeom runtime symbol)";
+        }
     }
 }
 
@@ -477,7 +524,7 @@ void VecgeomParams::build_data()
 
     if (celeritas::device())
     {
-#if CELERITAS_USE_CUDA
+#ifdef VECGEOM_ENABLE_CUDA
         auto& cuda_manager = vecgeom::cxx::CudaManager::Instance();
         device_ref_.world_volume = cuda_manager.world_gpu();
 #endif

diff --git a/src/geocel/vg/VecgeomParams.surface.cu b/src/geocel/vg/VecgeomParams.surface.cu
diff --git a/src/geocel/vg/VecgeomParams.surface.hh b/src/geocel/vg/VecgeomParams.surface.hh
diff --git a/src/geocel/vg/detail/BVHNavigator.hh b/src/geocel/vg/detail/BVHNavigator.hh
@@ -58,15 +58,22 @@ class BVHNavigator
         Vector3D currentpoint(point);
         Vector3D daughterlocalpoint;
 
-        for (auto v = vol; v->GetDaughters().size() > 0;)
+        while (vol->GetDaughters().size() > 0)
         {
-            auto bvh = vecgeom::BVHManager::GetBVH(v->GetLogicalVolume()->id());
+            auto* bvh
+                = vecgeom::BVHManager::GetBVH(vol->GetLogicalVolume()->id());
+            CELER_ASSERT(bvh);
 
-            if (!bvh->LevelLocate(exclude, currentpoint, v, daughterlocalpoint))
+            // Note: vol is updated by this call
+            if (!bvh->LevelLocate(
+                    exclude, currentpoint, vol, daughterlocalpoint))
+            {
+                // Not inside any daughter
                 break;
+            }
 
             currentpoint = daughterlocalpoint;
-            path.Push(v);
+            path.Push(vol);
             // Only exclude the placed volume once since we could enter it
             // again via a different volume history.
             exclude = nullptr;