diff --git a/roofit/batchcompute/src/RooBatchCompute.cu b/roofit/batchcompute/src/RooBatchCompute.cu index 34bfe5885fdd3..55f3d4d457b1c 100644 --- a/roofit/batchcompute/src/RooBatchCompute.cu +++ b/roofit/batchcompute/src/RooBatchCompute.cu @@ -47,14 +47,19 @@ void fillBatches(Batches &batches, RestrictArr output, size_t nEvents, std::size batches._output = output; } -void fillArrays(Batch *arrays, const VarVector &vars, double *buffer, double *bufferDevice) +void fillArrays(Batch *arrays, const VarVector &vars, double *buffer, double *bufferDevice, std::size_t nEvents) { for (int i = 0; i < vars.size(); i++) { const std::span &span = vars[i]; - if (span.size() == 1) { + if (!span.empty() && span.size() < nEvents) { + // In the scalar case, the value is not on the GPU yet, so we have to + // copy the value to the GPU buffer. buffer[i] = span[0]; arrays[i].set(bufferDevice + i, false); } else { + // In the vector input cases, they are already on the GPU, so we can + // fill be buffer with some dummy value and set the input span + // directly. buffer[i] = 0.0; arrays[i].set(span.data(), true); } @@ -132,7 +137,7 @@ public: auto extraArgsDevice = reinterpret_cast(scalarBufferDevice + vars.size()); fillBatches(*batches, output, nEvents, vars.size(), extraArgs.size()); - fillArrays(arrays, vars, scalarBuffer, scalarBufferDevice); + fillArrays(arrays, vars, scalarBuffer, scalarBufferDevice, nEvents); batches->_arrays = arraysDevice; if (!extraArgs.empty()) { @@ -255,6 +260,8 @@ __global__ void nllSumKernel(const double *__restrict__ probas, const double *__ double RooBatchComputeClass::reduceSum(RooBatchCompute::Config const &cfg, InputArr input, size_t n) { + if (n == 0) + return 0.0; const int gridSize = getGridSize(n); cudaStream_t stream = *cfg.cudaStream(); CudaInterface::DeviceArray devOut(2 * gridSize); @@ -270,6 +277,9 @@ ReduceNLLOutput RooBatchComputeClass::reduceNLL(RooBatchCompute::Config const &c std::span weights, std::span offsetProbas) { ReduceNLLOutput out; + if (probas.empty()) { + return out; + } const int gridSize = getGridSize(probas.size()); CudaInterface::DeviceArray devOut(2 * gridSize); cudaStream_t stream = *cfg.cudaStream(); diff --git a/roofit/batchcompute/src/RooBatchCompute.cxx b/roofit/batchcompute/src/RooBatchCompute.cxx index ae9970b0f1a63..b6a22cb13ded4 100644 --- a/roofit/batchcompute/src/RooBatchCompute.cxx +++ b/roofit/batchcompute/src/RooBatchCompute.cxx @@ -50,21 +50,19 @@ void fillBatches(Batches &batches, RestrictArr output, size_t nEvents, std::size batches._output = output; } -void fillArrays(std::vector &arrays, const VarVector &vars, double *buffer) +void fillArrays(std::vector &arrays, const VarVector &vars, double *buffer, std::size_t nEvents) { arrays.resize(vars.size()); for (size_t i = 0; i < vars.size(); i++) { const std::span &span = vars[i]; - if (span.empty()) { - std::stringstream ss; - ss << "The span number " << i << " passed to Batches::Batches() is empty!"; - throw std::runtime_error(ss.str()); - } else if (span.size() > 1) { - arrays[i].set(span.data(), true); - } else { + if (!span.empty() && span.size() < nEvents) { + // In the scalar case, copy the value to each element of vector input + // buffer. std::fill_n(&buffer[i * bufferSize], bufferSize, span.data()[0]); arrays[i].set(&buffer[i * bufferSize], false); + } else { + arrays[i].set(span.data(), true); } } } @@ -131,7 +129,7 @@ class RooBatchComputeClass : public RooBatchComputeInterface { Batches batches; std::vector arrays; fillBatches(batches, output, nEventsPerThread, vars.size(), extraArgs); - fillArrays(arrays, vars, buffer.data()); + fillArrays(arrays, vars, buffer.data(), nEvents); batches._arrays = arrays.data(); batches.advance(batches.getNEvents() * idx); @@ -163,7 +161,7 @@ class RooBatchComputeClass : public RooBatchComputeInterface { Batches batches; std::vector arrays; fillBatches(batches, output, nEvents, vars.size(), extraArgs); - fillArrays(arrays, vars, buffer.data()); + fillArrays(arrays, vars, buffer.data(), nEvents); batches._arrays = arrays.data(); std::size_t events = batches.getNEvents(); diff --git a/roofit/roofitcore/inc/RooFit/Detail/BatchModeDataHelpers.h b/roofit/roofitcore/inc/RooFit/Detail/BatchModeDataHelpers.h index 34367d318cc82..b41350aaeafa1 100644 --- a/roofit/roofitcore/inc/RooFit/Detail/BatchModeDataHelpers.h +++ b/roofit/roofitcore/inc/RooFit/Detail/BatchModeDataHelpers.h @@ -41,8 +41,7 @@ getDataSpans(RooAbsData const &data, std::string const &rangeName, RooSimultaneo bool takeGlobalObservablesFromData, std::stack> &buffers); std::map -determineOutputSizes(RooAbsArg const &topNode, - std::function const &inputSizeFunc); +determineOutputSizes(RooAbsArg const &topNode, std::function const &inputSizeFunc); } // namespace BatchModeDataHelpers } // namespace Detail diff --git a/roofit/roofitcore/src/BatchModeDataHelpers.cxx b/roofit/roofitcore/src/BatchModeDataHelpers.cxx index da1af654f230e..19c99a95761a1 100644 --- a/roofit/roofitcore/src/BatchModeDataHelpers.cxx +++ b/roofit/roofitcore/src/BatchModeDataHelpers.cxx @@ -54,12 +54,6 @@ getSingleDataSpans(RooAbsData const &data, std::string_view rangeName, std::stri std::size_t nEvents = static_cast(data.numEntries()); - // We also want to support empty datasets: in this case the - // RooFitDriver::Dataset is not filled with anything. - if (nEvents == 0) { - return dataSpans; - } - auto weight = data.getWeightBatch(0, nEvents, /*sumW2=*/false); auto weightSumW2 = data.getWeightBatch(0, nEvents, /*sumW2=*/true); @@ -274,7 +268,7 @@ RooFit::Detail::BatchModeDataHelpers::getDataSpans(RooAbsData const &data, std:: /// \param[in] topNode The top node of the computation graph. /// \param[in] inputSizeFunc A function to get the input sizes. std::map RooFit::Detail::BatchModeDataHelpers::determineOutputSizes( - RooAbsArg const &topNode, std::function const &inputSizeFunc) + RooAbsArg const &topNode, std::function const &inputSizeFunc) { std::map output; @@ -282,8 +276,10 @@ std::map RooFit::Detail::BatchModeDataHelp RooHelpers::getSortedComputationGraph(topNode, serverSet); for (RooAbsArg *arg : serverSet) { - std::size_t inputSize = inputSizeFunc(arg); - if (inputSize > 0) { + int inputSize = inputSizeFunc(arg); + // The size == -1 encodes that the input doesn't come from an array + // input. + if (inputSize != -1) { output[arg] = inputSize; } } @@ -296,7 +292,14 @@ std::map RooFit::Detail::BatchModeDataHelp if (!arg->isReducerNode()) { for (RooAbsArg *server : arg->servers()) { if (server->isValueServer(*arg)) { - size = std::max(output.at(server), size); + std::size_t inputSize = output.at(server); + if (inputSize != 1) { + // If the input if from an external array, the output will + // adopt its size and we can stop the checking of other + // servers. + size = inputSize; + break; + } } } } diff --git a/roofit/roofitcore/src/RooDataSet.cxx b/roofit/roofitcore/src/RooDataSet.cxx index 03507c431c444..434a9c0e98141 100644 --- a/roofit/roofitcore/src/RooDataSet.cxx +++ b/roofit/roofitcore/src/RooDataSet.cxx @@ -919,7 +919,7 @@ std::span RooDataSet::getWeightBatch(std::size_t first, std::size_ std::size_t nEntries = this->numEntries(); // for the casting to std::size_t - if(first >= nEntries || (first + len) > nEntries) { + if(first + len > nEntries) { throw std::runtime_error("RooDataSet::getWeightBatch(): requested range not valid for dataset."); } diff --git a/roofit/roofitcore/src/RooFit/Evaluator.cxx b/roofit/roofitcore/src/RooFit/Evaluator.cxx index 13a7eab674d63..d7734c6edfbfa 100644 --- a/roofit/roofitcore/src/RooFit/Evaluator.cxx +++ b/roofit/roofitcore/src/RooFit/Evaluator.cxx @@ -262,30 +262,27 @@ void Evaluator::setInput(std::string const &name, std::span inputA info.outputSize = inputArray.size(); if (_useGPU) { #ifdef ROOFIT_CUDA - if (info.outputSize == 1) { - // Scalar observables from the data don't need to be copied to the GPU + if (info.outputSize <= 1) { + // Empty or scalar observables from the data don't need to be + // copied to the GPU. _dataMapCPU.set(info.absArg, inputArray); _dataMapCUDA.set(info.absArg, inputArray); } else { - if (_useGPU) { - // For simplicity, we put the data on both host and device for - // now. This could be optimized by inspecting the clients of the - // variable. - if (isOnDevice) { - _dataMapCUDA.set(info.absArg, inputArray); - auto gpuSpan = _dataMapCUDA.at(info.absArg); - info.buffer = _bufferManager->makeCpuBuffer(gpuSpan.size()); - CudaInterface::copyDeviceToHost(gpuSpan.data(), info.buffer->cpuWritePtr(), gpuSpan.size()); - _dataMapCPU.set(info.absArg, {info.buffer->cpuReadPtr(), gpuSpan.size()}); - } else { - _dataMapCPU.set(info.absArg, inputArray); - auto cpuSpan = _dataMapCPU.at(info.absArg); - info.buffer = _bufferManager->makeGpuBuffer(cpuSpan.size()); - CudaInterface::copyHostToDevice(cpuSpan.data(), info.buffer->gpuWritePtr(), cpuSpan.size()); - _dataMapCUDA.set(info.absArg, {info.buffer->gpuReadPtr(), cpuSpan.size()}); - } + // For simplicity, we put the data on both host and device for + // now. This could be optimized by inspecting the clients of the + // variable. + if (isOnDevice) { + _dataMapCUDA.set(info.absArg, inputArray); + auto gpuSpan = _dataMapCUDA.at(info.absArg); + info.buffer = _bufferManager->makeCpuBuffer(gpuSpan.size()); + CudaInterface::copyDeviceToHost(gpuSpan.data(), info.buffer->cpuWritePtr(), gpuSpan.size()); + _dataMapCPU.set(info.absArg, {info.buffer->cpuReadPtr(), gpuSpan.size()}); } else { _dataMapCPU.set(info.absArg, inputArray); + auto cpuSpan = _dataMapCPU.at(info.absArg); + info.buffer = _bufferManager->makeGpuBuffer(cpuSpan.size()); + CudaInterface::copyHostToDevice(cpuSpan.data(), info.buffer->gpuWritePtr(), cpuSpan.size()); + _dataMapCUDA.set(info.absArg, {info.buffer->gpuReadPtr(), cpuSpan.size()}); } } #endif @@ -313,9 +310,9 @@ void Evaluator::updateOutputSizes() } auto outputSizeMap = - RooFit::Detail::BatchModeDataHelpers::determineOutputSizes(_topNode, [&](RooFit::Detail::DataKey key) { + RooFit::Detail::BatchModeDataHelpers::determineOutputSizes(_topNode, [&](RooFit::Detail::DataKey key) -> int { auto found = sizeMap.find(key); - return found != sizeMap.end() ? found->second : 0; + return found != sizeMap.end() ? found->second : -1; }); for (auto &info : _nodes) { diff --git a/roofit/roofitcore/src/RooFuncWrapper.cxx b/roofit/roofitcore/src/RooFuncWrapper.cxx index 29e89375d815d..00365ba285c24 100644 --- a/roofit/roofitcore/src/RooFuncWrapper.cxx +++ b/roofit/roofitcore/src/RooFuncWrapper.cxx @@ -100,10 +100,10 @@ void RooFuncWrapper::loadParamsAndData(RooAbsArg const *head, RooArgSet const &p _gradientVarBuffer.resize(_params.size()); if (head) { - _nodeOutputSizes = - RooFit::Detail::BatchModeDataHelpers::determineOutputSizes(*head, [&spans](RooFit::Detail::DataKey key) { + _nodeOutputSizes = RooFit::Detail::BatchModeDataHelpers::determineOutputSizes( + *head, [&spans](RooFit::Detail::DataKey key) -> int { auto found = spans.find(key); - return found != spans.end() ? found->second.size() : 0; + return found != spans.end() ? found->second.size() : -1; }); } } diff --git a/roofit/roofitcore/test/testTestStatistics.cxx b/roofit/roofitcore/test/testTestStatistics.cxx index 5bf655a8af804..2ef4082c3a862 100644 --- a/roofit/roofitcore/test/testTestStatistics.cxx +++ b/roofit/roofitcore/test/testTestStatistics.cxx @@ -24,6 +24,8 @@ #include #include +#include + #include "gtest_wrapper.h" #include @@ -269,6 +271,57 @@ TEST_P(TestStatisticTest, IntegrateBins_RooDataHist) << "Expect chi2/ndf at least 10% better."; } +// Verify that fitting an empty RooDataSet or a RooDataHist with only empty +// bins does not do anything to the parameters. The point of this test is to +// validate that the new CPU backend behaves the same as the legacy evaluation +// backend for empty data objects. +TEST_P(TestStatisticTest, EmptyData) +{ + RooWorkspace ws; + ws.factory("Gaussian::model(x[0, 10], mean[6, 0, 10], sigma[2.0, 0.01, 10.0])"); + + RooRealVar &x = *ws.var("x"); + RooRealVar &mean = *ws.var("mean"); + RooRealVar &sigma = *ws.var("sigma"); + RooAbsPdf &model = *ws.pdf("model"); + + const double meanOrigVal = mean.getVal(); + const double sigmaOrigVal = sigma.getVal(); + + std::unique_ptr data{model.generate(x, 0)}; + std::unique_ptr dataHist{data->binnedClone()}; + + { + // We expect errors in the Hessian calculation because the likelihood is + // constant. + ROOT::TestSupport::CheckDiagsRAII checkDiag; + checkDiag.requiredDiag(kWarning, "ROOT::Math::Fitter::CalculateHessErrors", "Error when calculating Hessian"); + + model.fitTo(*data, _evalBackend, RooFit::PrintLevel(-1)); + } + + EXPECT_EQ(mean.getVal(), meanOrigVal) << "Fitting an empty RooDataSet changed \"mean\" value!"; + EXPECT_EQ(sigma.getVal(), sigmaOrigVal) << "Fitting an empty RooDataSet changed \"sigma\" value!"; + + // Reset the parameters for the check with the RooDataHist + mean.setVal(meanOrigVal); + sigma.setVal(sigmaOrigVal); + mean.setError(0.0); + sigma.setError(0.0); + + { + // We expect errors in the Hessian calculation because the likelihood is + // constant (same as above for the RooDataSet). + ROOT::TestSupport::CheckDiagsRAII checkDiag; + checkDiag.requiredDiag(kWarning, "ROOT::Math::Fitter::CalculateHessErrors", "Error when calculating Hessian"); + + model.fitTo(*dataHist, _evalBackend, RooFit::PrintLevel(-1)); + } + + EXPECT_EQ(mean.getVal(), meanOrigVal) << "Fitting an empty RooDataSet changed \"mean\" value!"; + EXPECT_EQ(sigma.getVal(), sigmaOrigVal) << "Fitting an empty RooDataSet changed \"sigma\" value!"; +} + TEST(RooChi2Var, IntegrateBins) { RooHelpers::LocalChangeMsgLevel changeMsgLvl(RooFit::WARNING); @@ -587,11 +640,11 @@ INSTANTIATE_TEST_SUITE_P(RooNLLVar, TestStatisticTest, testing::Values(ROOFIT_EV INSTANTIATE_TEST_SUITE_P(RooNLLVar, OffsetBinTest, testing::Combine(testing::Values(ROOFIT_EVAL_BACKENDS), // EvalBackend - testing::Values(false, true), // unbinned or binned - testing::Values(false, true), // extended fit - testing::Values(false, true), // use sumW2 - testing::Values(false, true), // wrap in a RooSimultaneous - testing::Values(false) // binned likelihood code path + testing::Values(false, true), // unbinned or binned + testing::Values(false, true), // extended fit + testing::Values(false, true), // use sumW2 + testing::Values(false, true), // wrap in a RooSimultaneous + testing::Values(false) // binned likelihood code path ), [](testing::TestParamInfo const ¶mInfo) { std::stringstream ss; @@ -606,11 +659,11 @@ INSTANTIATE_TEST_SUITE_P(RooNLLVar, OffsetBinTest, INSTANTIATE_TEST_SUITE_P(RooNLLVarBinnedL, OffsetBinTest, testing::Combine(testing::Values(ROOFIT_EVAL_BACKENDS), // EvalBackend - testing::Values(true), // unbinned or binned - testing::Values(false), // extended fit - testing::Values(false), // use sumW2 - testing::Values(false, true), // wrap in a RooSimultaneous - testing::Values(true) // binned likelihood code path + testing::Values(true), // unbinned or binned + testing::Values(false), // extended fit + testing::Values(false), // use sumW2 + testing::Values(false, true), // wrap in a RooSimultaneous + testing::Values(true) // binned likelihood code path ), [](testing::TestParamInfo const ¶mInfo) { std::stringstream ss;