diff --git a/include/maths/CTimeSeriesChangeDetector.h b/include/maths/CTimeSeriesChangeDetector.h index 5726446d24..49236a59cb 100644 --- a/include/maths/CTimeSeriesChangeDetector.h +++ b/include/maths/CTimeSeriesChangeDetector.h @@ -99,7 +99,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector const TPriorPtr &residualModel, core_t::TTime minimumTimeToDetect = 6 * core::constants::HOUR, core_t::TTime maximumTimeToDetect = core::constants::DAY, - double minimumDeltaBicToDetect = 12.0); + double minimumDeltaBicToDetect = 14.0); //! Initialize by reading state from \p traverser. bool acceptRestoreTraverser(const SModelRestoreParams ¶ms, @@ -191,6 +191,9 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable //! The BIC of applying the change. virtual double bic() const = 0; + //! The expected BIC of applying the change. + virtual double expectedBic() const = 0; + //! Get a description of the change. virtual TOptionalChangeDescription change() const = 0; @@ -223,10 +226,14 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable //! Get the log-likelihood. double logLikelihood() const; - //! Update the data log-likelihood with \p logLikelihood. void addLogLikelihood(double logLikelihood); + //! Get the expected log-likelihood. + double expectedLogLikelihood() const; + //! Update the expected data log-likelihood with \p logLikelihood. + void addExpectedLogLikelihood(double logLikelihood); + //! Get the time series trend model. const CTimeSeriesDecompositionInterface &trendModel() const; //! Get the time series trend model. @@ -243,6 +250,9 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable //! The likelihood of the data under this model. double m_LogLikelihood; + //! The expected log-likelihood of the data under this model. + double m_ExpectedLogLikelihood; + //! A model decomposing the time series trend. TDecompositionPtr m_TrendModel; @@ -267,6 +277,9 @@ class MATHS_EXPORT CUnivariateNoChangeModel final : public CUnivariateChangeMode //! Returns the no change BIC. virtual double bic() const; + //! The expected BIC of applying the change. + virtual double expectedBic() const; + //! Returns a null object. virtual TOptionalChangeDescription change() const; @@ -301,6 +314,9 @@ class MATHS_EXPORT CUnivariateLevelShiftModel final : public CUnivariateChangeMo //! The BIC of applying the level shift. virtual double bic() const; + //! The expected BIC of applying the change. + virtual double expectedBic() const; + //! Get a description of the level shift. virtual TOptionalChangeDescription change() const; @@ -350,6 +366,9 @@ class MATHS_EXPORT CUnivariateTimeShiftModel final : public CUnivariateChangeMod //! The BIC of applying the time shift. virtual double bic() const; + //! The expected BIC of applying the change. + virtual double expectedBic() const; + //! Get a description of the time shift. virtual TOptionalChangeDescription change() const; diff --git a/lib/maths/CTimeSeriesChangeDetector.cc b/lib/maths/CTimeSeriesChangeDetector.cc index a8e60f7459..dc8a77fc4c 100644 --- a/lib/maths/CTimeSeriesChangeDetector.cc +++ b/lib/maths/CTimeSeriesChangeDetector.cc @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -50,7 +51,6 @@ using TDouble1Vec = core::CSmallVector; using TDouble4Vec = core::CSmallVector; using TDouble4Vec1Vec = core::CSmallVector; using TOptionalChangeDescription = CUnivariateTimeSeriesChangeDetector::TOptionalChangeDescription; - const std::string MINIMUM_TIME_TO_DETECT{"a"}; const std::string MAXIMUM_TIME_TO_DETECT{"b"}; const std::string MINIMUM_DELTA_BIC_TO_DETECT{"c"}; @@ -61,9 +61,12 @@ const std::string MIN_TIME_TAG{"g"}; const std::string MAX_TIME_TAG{"h"}; const std::string CHANGE_MODEL_TAG{"i"}; const std::string LOG_LIKELIHOOD_TAG{"j"}; -const std::string SHIFT_TAG{"k"}; -const std::string TREND_MODEL_TAG{"l"}; -const std::string RESIDUAL_MODEL_TAG{"m"}; +const std::string EXPECTED_LOG_LIKELIHOOD_TAG{"k"}; +const std::string SHIFT_TAG{"l"}; +const std::string TREND_MODEL_TAG{"m"}; +const std::string RESIDUAL_MODEL_TAG{"n"}; +const std::size_t EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS{4u}; +const double EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER{0.9}; } SChangeDescription::SChangeDescription(EDescription description, @@ -169,12 +172,25 @@ TOptionalChangeDescription CUnivariateTimeSeriesChangeDetector::change() double evidences[]{noChangeBic - candidates[0].first, noChangeBic - candidates[1].first}; - m_CurrentEvidenceOfChange = evidences[0]; - if ( evidences[0] > m_MinimumDeltaBicToDetect - && evidences[0] > evidences[1] + m_MinimumDeltaBicToDetect / 2.0) + double expectedEvidence{noChangeBic - (*candidates[0].second)->expectedBic()}; + + double x[]{evidences[0] / m_MinimumDeltaBicToDetect, + 2.0 * (evidences[0] - evidences[1]) / m_MinimumDeltaBicToDetect, + evidences[0] / EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER / expectedEvidence, + static_cast(m_TimeRange.range() - m_MinimumTimeToDetect) + / static_cast(m_MaximumTimeToDetect - m_MinimumTimeToDetect)}; + double p{ CTools::logisticFunction(x[0], 0.05, 1.0) + * CTools::logisticFunction(x[1], 0.1, 1.0) + * (x[2] < 0.0 ? 1.0 : CTools::logisticFunction(x[2], 0.2, 1.0)) + * (0.5 + CTools::logisticFunction(x[3], 0.2, 0.5))}; + LOG_TRACE("p = " << p); + + if (p > 0.0625/*= std::pow(0.5, 4.0)*/) { return (*candidates[0].second)->change(); } + + m_CurrentEvidenceOfChange = evidences[0]; } return TOptionalChangeDescription(); } @@ -236,9 +252,34 @@ namespace time_series_change_detector_detail CUnivariateChangeModel::CUnivariateChangeModel(const TDecompositionPtr &trendModel, const TPriorPtr &residualModel) : - m_LogLikelihood{0.0}, m_TrendModel{trendModel}, m_ResidualModel{residualModel} + m_LogLikelihood{0.0}, m_ExpectedLogLikelihood{0.0}, + m_TrendModel{trendModel}, m_ResidualModel{residualModel} {} +bool CUnivariateChangeModel::acceptRestoreTraverser(const SModelRestoreParams &/*params*/, + core::CStateRestoreTraverser &traverser) +{ + do + { + const std::string name{traverser.name()}; + RESTORE_BUILT_IN(LOG_LIKELIHOOD_TAG, m_LogLikelihood); + RESTORE_BUILT_IN(EXPECTED_LOG_LIKELIHOOD_TAG, m_ExpectedLogLikelihood); + return true; + } + while (traverser.next()); + return true; +} + +void CUnivariateChangeModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const +{ + inserter.insertValue(LOG_LIKELIHOOD_TAG, + m_LogLikelihood, + core::CIEEE754::E_SinglePrecision); + inserter.insertValue(EXPECTED_LOG_LIKELIHOOD_TAG, + m_ExpectedLogLikelihood, + core::CIEEE754::E_SinglePrecision); +} + void CUnivariateChangeModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { // Note if the trend and residual models are shallow copied their @@ -258,6 +299,7 @@ std::size_t CUnivariateChangeModel::memoryUsage() const uint64_t CUnivariateChangeModel::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_LogLikelihood); + seed = CChecksum::calculate(seed, m_ExpectedLogLikelihood); seed = CChecksum::calculate(seed, m_TrendModel); return CChecksum::calculate(seed, m_ResidualModel); } @@ -280,6 +322,16 @@ void CUnivariateChangeModel::addLogLikelihood(double logLikelihood) m_LogLikelihood += logLikelihood; } +double CUnivariateChangeModel::expectedLogLikelihood() const +{ + return m_ExpectedLogLikelihood; +} + +void CUnivariateChangeModel::addExpectedLogLikelihood(double logLikelihood) +{ + m_ExpectedLogLikelihood += logLikelihood; +} + const CTimeSeriesDecompositionInterface &CUnivariateChangeModel::trendModel() const { return *m_TrendModel; @@ -310,24 +362,15 @@ CUnivariateNoChangeModel::CUnivariateNoChangeModel(const TDecompositionPtr &tren CUnivariateChangeModel{trendModel, residualModel} {} -bool CUnivariateNoChangeModel::acceptRestoreTraverser(const SModelRestoreParams &/*params*/, +bool CUnivariateNoChangeModel::acceptRestoreTraverser(const SModelRestoreParams ¶ms, core::CStateRestoreTraverser &traverser) { - do - { - const std::string name{traverser.name()}; - RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG, - double logLikelihood, - core::CStringUtils::stringToType(traverser.value(), logLikelihood), - this->addLogLikelihood(logLikelihood)) - } - while (traverser.next()); - return true; + return this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser); } void CUnivariateNoChangeModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const { - inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood()); + this->CUnivariateChangeModel::acceptPersistInserter(inserter); } double CUnivariateNoChangeModel::bic() const @@ -335,6 +378,13 @@ double CUnivariateNoChangeModel::bic() const return -2.0 * this->logLikelihood(); } +double CUnivariateNoChangeModel::expectedBic() const +{ + // This is irrelevant since this is only used for deciding + // whether to accept a change. + return this->bic(); +} + TOptionalChangeDescription CUnivariateNoChangeModel::change() const { return TOptionalChangeDescription(); @@ -357,7 +407,7 @@ void CUnivariateNoChangeModel::addSamples(std::size_t count, samples.push_back(this->trendModel().detrend(sample.first, sample.second, 0.0)); } - double logLikelihood; + double logLikelihood{0.0}; if (this->residualModel().jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood) == maths_t::E_FpNoErrors) { @@ -386,13 +436,13 @@ CUnivariateLevelShiftModel::CUnivariateLevelShiftModel(const TDecompositionPtr & bool CUnivariateLevelShiftModel::acceptRestoreTraverser(const SModelRestoreParams ¶ms, core::CStateRestoreTraverser &traverser) { + if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false) + { + return false; + } do { const std::string name{traverser.name()}; - RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG, - double logLikelihood, - core::CStringUtils::stringToType(traverser.value(), logLikelihood), - this->addLogLikelihood(logLikelihood)) RESTORE(SHIFT_TAG, m_Shift.fromDelimited(traverser.value())) RESTORE_BUILT_IN(RESIDUAL_MODEL_MODE_TAG, m_ResidualModelMode) RESTORE_BUILT_IN(SAMPLE_COUNT_TAG, m_SampleCount) @@ -404,7 +454,7 @@ bool CUnivariateLevelShiftModel::acceptRestoreTraverser(const SModelRestoreParam void CUnivariateLevelShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const { - inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood()); + this->CUnivariateChangeModel::acceptPersistInserter(inserter); inserter.insertValue(SHIFT_TAG, m_Shift.toDelimited()); inserter.insertValue(SAMPLE_COUNT_TAG, m_SampleCount); inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind(CPriorStateSerialiser(), @@ -416,6 +466,11 @@ double CUnivariateLevelShiftModel::bic() const return -2.0 * this->logLikelihood() + std::log(m_SampleCount); } +double CUnivariateLevelShiftModel::expectedBic() const +{ + return -2.0 * this->expectedLogLikelihood() + std::log(m_SampleCount); +} + TOptionalChangeDescription CUnivariateLevelShiftModel::change() const { // The "magic" 0.9 is due to the fact that the trend is updated @@ -465,12 +520,24 @@ void CUnivariateLevelShiftModel::addSamples(std::size_t count, residualModel.addSamples(weightStyles, samples, weights); residualModel.propagateForwardsByTime(1.0); - double logLikelihood; + double logLikelihood{0.0}; if (residualModel.jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood) == maths_t::E_FpNoErrors) { this->addLogLikelihood(logLikelihood); } + for (const auto &weight : weights) + { + double expectedLogLikelihood{0.0}; + TDouble4Vec1Vec weight_{weight}; + if (residualModel.expectation(maths::CPrior::CLogMarginalLikelihood{ + residualModel, weightStyles, weight_}, + EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS, + expectedLogLikelihood, weightStyles, weight)) + { + this->addExpectedLogLikelihood(expectedLogLikelihood); + } + } } } @@ -496,13 +563,13 @@ CUnivariateTimeShiftModel::CUnivariateTimeShiftModel(const TDecompositionPtr &tr bool CUnivariateTimeShiftModel::acceptRestoreTraverser(const SModelRestoreParams ¶ms, core::CStateRestoreTraverser &traverser) { + if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false) + { + return false; + } do { const std::string name{traverser.name()}; - RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG, - double logLikelihood, - core::CStringUtils::stringToType(traverser.value(), logLikelihood), - this->addLogLikelihood(logLikelihood)) RESTORE(RESIDUAL_MODEL_TAG, this->restoreResidualModel(params.s_DistributionParams, traverser)) } while (traverser.next()); @@ -511,7 +578,7 @@ bool CUnivariateTimeShiftModel::acceptRestoreTraverser(const SModelRestoreParams void CUnivariateTimeShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const { - inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood()); + this->CUnivariateChangeModel::acceptPersistInserter(inserter); inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(this->residualModel()), _1)); } @@ -521,6 +588,11 @@ double CUnivariateTimeShiftModel::bic() const return -2.0 * this->logLikelihood(); } +double CUnivariateTimeShiftModel::expectedBic() const +{ + return -2.0 * this->expectedLogLikelihood(); +} + TOptionalChangeDescription CUnivariateTimeShiftModel::change() const { return SChangeDescription{SChangeDescription::E_TimeShift, @@ -549,12 +621,22 @@ void CUnivariateTimeShiftModel::addSamples(std::size_t count, residualModel.addSamples(weightStyles, samples, weights); residualModel.propagateForwardsByTime(1.0); - double logLikelihood; + double logLikelihood{0.0}; if (residualModel.jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood) == maths_t::E_FpNoErrors) { this->addLogLikelihood(logLikelihood); } + for (const auto &weight : weights) + { + double expectedLogLikelihood{0.0}; + TDouble4Vec1Vec weight_{weight}; + residualModel.expectation(maths::CPrior::CLogMarginalLikelihood{ + residualModel, weightStyles, weight_}, + EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS, + expectedLogLikelihood, weightStyles, weight); + this->addExpectedLogLikelihood(expectedLogLikelihood); + } } }