diff --git a/src/gpu/nodeKernels.cu b/src/gpu/nodeKernels.cu index e51b60ba..015afb61 100644 --- a/src/gpu/nodeKernels.cu +++ b/src/gpu/nodeKernels.cu @@ -223,6 +223,34 @@ __global__ void kFilterGroundPoints(size_t pointCount, const Vec3f sensor_up_vec outNonGround[tid] = normalUpAngle > ground_angle_threshold; } +__global__ void kProcessBeamSamplesFirstLast(size_t beamCount, int samplesPerBeam, MultiReturnPointers beamSamples, + MultiReturnPointers first, MultiReturnPointers last) +{ + LIMIT(beamCount); + + const auto beamIdx = tid; + int firstIdx = 0; + int lastIdx = 0; + for (int sampleIdx = 0; sampleIdx < samplesPerBeam; ++sampleIdx) { + if (beamSamples.isHit[beamIdx * samplesPerBeam + sampleIdx] == 0) { + continue; + } + if (beamSamples.distance[beamIdx * samplesPerBeam + sampleIdx] < + beamSamples.distance[beamIdx * samplesPerBeam + firstIdx]) { + firstIdx = sampleIdx; + } + if (beamSamples.distance[beamIdx * samplesPerBeam + sampleIdx] > + beamSamples.distance[beamIdx * samplesPerBeam + lastIdx]) { + lastIdx = sampleIdx; + } + } + first.xyz[beamIdx] = beamSamples.xyz[beamIdx * samplesPerBeam + firstIdx]; + first.distance[beamIdx] = beamSamples.distance[beamIdx * samplesPerBeam + firstIdx]; + last.xyz[beamIdx] = beamSamples.xyz[beamIdx * samplesPerBeam + lastIdx]; + last.distance[beamIdx] = beamSamples.distance[beamIdx * samplesPerBeam + lastIdx]; +} + + void gpuFindCompaction(cudaStream_t stream, size_t pointCount, const int32_t* shouldCompact, CompactionIndexType* hitCountInclusive, size_t* outHitCount) { @@ -294,3 +322,9 @@ void gpuRadarComputeEnergy(cudaStream_t stream, size_t count, float rayAzimuthSt run(kRadarComputeEnergy, stream, count, rayAzimuthStepRad, rayElevationStepRad, freq, lookAtOriginTransform, rayPose, hitDist, hitNorm, hitPos, outBUBRFactor); } + +void gpuProcessBeamSamplesFirstLast(cudaStream_t stream, size_t beamCount, int samplesPerBeam, MultiReturnPointers beamSamples, + MultiReturnPointers first, MultiReturnPointers last) +{ + run(kProcessBeamSamplesFirstLast, stream, beamCount, samplesPerBeam, beamSamples, first, last); +} \ No newline at end of file diff --git a/src/gpu/nodeKernels.hpp b/src/gpu/nodeKernels.hpp index f05cfd60..b8002a1a 100644 --- a/src/gpu/nodeKernels.hpp +++ b/src/gpu/nodeKernels.hpp @@ -22,6 +22,7 @@ #include #include #include +#include /* * The following functions are asynchronous! @@ -50,4 +51,6 @@ void gpuFilterGroundPoints(cudaStream_t stream, size_t pointCount, const Vec3f s void gpuRadarComputeEnergy(cudaStream_t stream, size_t count, float rayAzimuthStepRad, float rayElevationStepRad, float freq, Mat3x4f lookAtOriginTransform, const Field::type* rayPose, const Field::type* hitDist, const Field::type* hitNorm, - const Field::type* hitPos, Vector<3, thrust::complex>* outBUBRFactor); \ No newline at end of file + const Field::type* hitPos, Vector<3, thrust::complex>* outBUBRFactor); +void gpuProcessBeamSamplesFirstLast(cudaStream_t stream, size_t beamCount, int samplesPerBeam, MultiReturnPointers beamSamples, + MultiReturnPointers first, MultiReturnPointers last); \ No newline at end of file diff --git a/src/graph/RaytraceNode.cpp b/src/graph/RaytraceNode.cpp index 4dd2efa7..75f16702 100644 --- a/src/graph/RaytraceNode.cpp +++ b/src/graph/RaytraceNode.cpp @@ -133,6 +133,9 @@ void RaytraceNode::enqueueExecImpl() std::size_t pipelineArgsSize = requestCtxDev->getSizeOf() * requestCtxDev->getCount(); CHECK_OPTIX(optixLaunch(Optix::getOrCreate().pipeline, getStreamHandle(), pipelineArgsPtr, pipelineArgsSize, &sceneSBT, launchDims.x, launchDims.y, launchDims.y)); + + gpuProcessBeamSamplesFirstLast(getStreamHandle(), raysNode->getRayCount(), MULTI_RETURN_BEAM_SAMPLES, + mrSamples.getPointers(), mrFirst.getPointers(), mrLast.getPointers()); } void RaytraceNode::setFields(const std::set& fields)