Skip to content

Commit

Permalink
[ML] Implements an absolute goodness-of-fit test to accept a change (#21
Browse files Browse the repository at this point in the history
)

This implements an absolute "goodness-of-fit" test for each change, by additionally testing a 
change versus its expected BIC given the residual distribution. It means we will only accept 
changes if they are a reasonably accurate description of the change currently occurring in the 
time series.
  • Loading branch information
tveasey authored Mar 26, 2018
1 parent cf48634 commit 1eb8f8a
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 35 deletions.
23 changes: 21 additions & 2 deletions include/maths/CTimeSeriesChangeDetector.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector
const TPriorPtr &residualModel,
core_t::TTime minimumTimeToDetect = 6 * core::constants::HOUR,
core_t::TTime maximumTimeToDetect = core::constants::DAY,
double minimumDeltaBicToDetect = 12.0);
double minimumDeltaBicToDetect = 14.0);

//! Initialize by reading state from \p traverser.
bool acceptRestoreTraverser(const SModelRestoreParams &params,
Expand Down Expand Up @@ -191,6 +191,9 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable
//! The BIC of applying the change.
virtual double bic() const = 0;

//! The expected BIC of applying the change.
virtual double expectedBic() const = 0;

//! Get a description of the change.
virtual TOptionalChangeDescription change() const = 0;

Expand Down Expand Up @@ -223,10 +226,14 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable

//! Get the log-likelihood.
double logLikelihood() const;

//! Update the data log-likelihood with \p logLikelihood.
void addLogLikelihood(double logLikelihood);

//! Get the expected log-likelihood.
double expectedLogLikelihood() const;
//! Update the expected data log-likelihood with \p logLikelihood.
void addExpectedLogLikelihood(double logLikelihood);

//! Get the time series trend model.
const CTimeSeriesDecompositionInterface &trendModel() const;
//! Get the time series trend model.
Expand All @@ -243,6 +250,9 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable
//! The likelihood of the data under this model.
double m_LogLikelihood;

//! The expected log-likelihood of the data under this model.
double m_ExpectedLogLikelihood;

//! A model decomposing the time series trend.
TDecompositionPtr m_TrendModel;

Expand All @@ -267,6 +277,9 @@ class MATHS_EXPORT CUnivariateNoChangeModel final : public CUnivariateChangeMode
//! Returns the no change BIC.
virtual double bic() const;

//! The expected BIC of applying the change.
virtual double expectedBic() const;

//! Returns a null object.
virtual TOptionalChangeDescription change() const;

Expand Down Expand Up @@ -301,6 +314,9 @@ class MATHS_EXPORT CUnivariateLevelShiftModel final : public CUnivariateChangeMo
//! The BIC of applying the level shift.
virtual double bic() const;

//! The expected BIC of applying the change.
virtual double expectedBic() const;

//! Get a description of the level shift.
virtual TOptionalChangeDescription change() const;

Expand Down Expand Up @@ -350,6 +366,9 @@ class MATHS_EXPORT CUnivariateTimeShiftModel final : public CUnivariateChangeMod
//! The BIC of applying the time shift.
virtual double bic() const;

//! The expected BIC of applying the change.
virtual double expectedBic() const;

//! Get a description of the time shift.
virtual TOptionalChangeDescription change() const;

Expand Down
148 changes: 115 additions & 33 deletions lib/maths/CTimeSeriesChangeDetector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <maths/CBasicStatisticsPersist.h>
#include <maths/CChecksum.h>
#include <maths/CPrior.h>
#include <maths/CPriorDetail.h>
#include <maths/CPriorStateSerialiser.h>
#include <maths/CRestoreParams.h>
#include <maths/CTimeSeriesModel.h>
Expand All @@ -50,7 +51,6 @@ using TDouble1Vec = core::CSmallVector<double, 1>;
using TDouble4Vec = core::CSmallVector<double, 4>;
using TDouble4Vec1Vec = core::CSmallVector<TDouble4Vec, 1>;
using TOptionalChangeDescription = CUnivariateTimeSeriesChangeDetector::TOptionalChangeDescription;

const std::string MINIMUM_TIME_TO_DETECT{"a"};
const std::string MAXIMUM_TIME_TO_DETECT{"b"};
const std::string MINIMUM_DELTA_BIC_TO_DETECT{"c"};
Expand All @@ -61,9 +61,12 @@ const std::string MIN_TIME_TAG{"g"};
const std::string MAX_TIME_TAG{"h"};
const std::string CHANGE_MODEL_TAG{"i"};
const std::string LOG_LIKELIHOOD_TAG{"j"};
const std::string SHIFT_TAG{"k"};
const std::string TREND_MODEL_TAG{"l"};
const std::string RESIDUAL_MODEL_TAG{"m"};
const std::string EXPECTED_LOG_LIKELIHOOD_TAG{"k"};
const std::string SHIFT_TAG{"l"};
const std::string TREND_MODEL_TAG{"m"};
const std::string RESIDUAL_MODEL_TAG{"n"};
const std::size_t EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS{4u};
const double EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER{0.9};
}

SChangeDescription::SChangeDescription(EDescription description,
Expand Down Expand Up @@ -169,12 +172,25 @@ TOptionalChangeDescription CUnivariateTimeSeriesChangeDetector::change()

double evidences[]{noChangeBic - candidates[0].first,
noChangeBic - candidates[1].first};
m_CurrentEvidenceOfChange = evidences[0];
if ( evidences[0] > m_MinimumDeltaBicToDetect
&& evidences[0] > evidences[1] + m_MinimumDeltaBicToDetect / 2.0)
double expectedEvidence{noChangeBic - (*candidates[0].second)->expectedBic()};

double x[]{evidences[0] / m_MinimumDeltaBicToDetect,
2.0 * (evidences[0] - evidences[1]) / m_MinimumDeltaBicToDetect,
evidences[0] / EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER / expectedEvidence,
static_cast<double>(m_TimeRange.range() - m_MinimumTimeToDetect)
/ static_cast<double>(m_MaximumTimeToDetect - m_MinimumTimeToDetect)};
double p{ CTools::logisticFunction(x[0], 0.05, 1.0)
* CTools::logisticFunction(x[1], 0.1, 1.0)
* (x[2] < 0.0 ? 1.0 : CTools::logisticFunction(x[2], 0.2, 1.0))
* (0.5 + CTools::logisticFunction(x[3], 0.2, 0.5))};
LOG_TRACE("p = " << p);

if (p > 0.0625/*= std::pow(0.5, 4.0)*/)
{
return (*candidates[0].second)->change();
}

m_CurrentEvidenceOfChange = evidences[0];
}
return TOptionalChangeDescription();
}
Expand Down Expand Up @@ -236,9 +252,34 @@ namespace time_series_change_detector_detail

CUnivariateChangeModel::CUnivariateChangeModel(const TDecompositionPtr &trendModel,
const TPriorPtr &residualModel) :
m_LogLikelihood{0.0}, m_TrendModel{trendModel}, m_ResidualModel{residualModel}
m_LogLikelihood{0.0}, m_ExpectedLogLikelihood{0.0},
m_TrendModel{trendModel}, m_ResidualModel{residualModel}
{}

bool CUnivariateChangeModel::acceptRestoreTraverser(const SModelRestoreParams &/*params*/,
core::CStateRestoreTraverser &traverser)
{
do
{
const std::string name{traverser.name()};
RESTORE_BUILT_IN(LOG_LIKELIHOOD_TAG, m_LogLikelihood);
RESTORE_BUILT_IN(EXPECTED_LOG_LIKELIHOOD_TAG, m_ExpectedLogLikelihood);
return true;
}
while (traverser.next());
return true;
}

void CUnivariateChangeModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const
{
inserter.insertValue(LOG_LIKELIHOOD_TAG,
m_LogLikelihood,
core::CIEEE754::E_SinglePrecision);
inserter.insertValue(EXPECTED_LOG_LIKELIHOOD_TAG,
m_ExpectedLogLikelihood,
core::CIEEE754::E_SinglePrecision);
}

void CUnivariateChangeModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const
{
// Note if the trend and residual models are shallow copied their
Expand All @@ -258,6 +299,7 @@ std::size_t CUnivariateChangeModel::memoryUsage() const
uint64_t CUnivariateChangeModel::checksum(uint64_t seed) const
{
seed = CChecksum::calculate(seed, m_LogLikelihood);
seed = CChecksum::calculate(seed, m_ExpectedLogLikelihood);
seed = CChecksum::calculate(seed, m_TrendModel);
return CChecksum::calculate(seed, m_ResidualModel);
}
Expand All @@ -280,6 +322,16 @@ void CUnivariateChangeModel::addLogLikelihood(double logLikelihood)
m_LogLikelihood += logLikelihood;
}

double CUnivariateChangeModel::expectedLogLikelihood() const
{
return m_ExpectedLogLikelihood;
}

void CUnivariateChangeModel::addExpectedLogLikelihood(double logLikelihood)
{
m_ExpectedLogLikelihood += logLikelihood;
}

const CTimeSeriesDecompositionInterface &CUnivariateChangeModel::trendModel() const
{
return *m_TrendModel;
Expand Down Expand Up @@ -310,31 +362,29 @@ CUnivariateNoChangeModel::CUnivariateNoChangeModel(const TDecompositionPtr &tren
CUnivariateChangeModel{trendModel, residualModel}
{}

bool CUnivariateNoChangeModel::acceptRestoreTraverser(const SModelRestoreParams &/*params*/,
bool CUnivariateNoChangeModel::acceptRestoreTraverser(const SModelRestoreParams &params,
core::CStateRestoreTraverser &traverser)
{
do
{
const std::string name{traverser.name()};
RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG,
double logLikelihood,
core::CStringUtils::stringToType(traverser.value(), logLikelihood),
this->addLogLikelihood(logLikelihood))
}
while (traverser.next());
return true;
return this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser);
}

void CUnivariateNoChangeModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const
{
inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood());
this->CUnivariateChangeModel::acceptPersistInserter(inserter);
}

double CUnivariateNoChangeModel::bic() const
{
return -2.0 * this->logLikelihood();
}

double CUnivariateNoChangeModel::expectedBic() const
{
// This is irrelevant since this is only used for deciding
// whether to accept a change.
return this->bic();
}

TOptionalChangeDescription CUnivariateNoChangeModel::change() const
{
return TOptionalChangeDescription();
Expand All @@ -357,7 +407,7 @@ void CUnivariateNoChangeModel::addSamples(std::size_t count,
samples.push_back(this->trendModel().detrend(sample.first, sample.second, 0.0));
}

double logLikelihood;
double logLikelihood{0.0};
if (this->residualModel().jointLogMarginalLikelihood(weightStyles, samples, weights,
logLikelihood) == maths_t::E_FpNoErrors)
{
Expand Down Expand Up @@ -386,13 +436,13 @@ CUnivariateLevelShiftModel::CUnivariateLevelShiftModel(const TDecompositionPtr &
bool CUnivariateLevelShiftModel::acceptRestoreTraverser(const SModelRestoreParams &params,
core::CStateRestoreTraverser &traverser)
{
if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false)
{
return false;
}
do
{
const std::string name{traverser.name()};
RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG,
double logLikelihood,
core::CStringUtils::stringToType(traverser.value(), logLikelihood),
this->addLogLikelihood(logLikelihood))
RESTORE(SHIFT_TAG, m_Shift.fromDelimited(traverser.value()))
RESTORE_BUILT_IN(RESIDUAL_MODEL_MODE_TAG, m_ResidualModelMode)
RESTORE_BUILT_IN(SAMPLE_COUNT_TAG, m_SampleCount)
Expand All @@ -404,7 +454,7 @@ bool CUnivariateLevelShiftModel::acceptRestoreTraverser(const SModelRestoreParam

void CUnivariateLevelShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const
{
inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood());
this->CUnivariateChangeModel::acceptPersistInserter(inserter);
inserter.insertValue(SHIFT_TAG, m_Shift.toDelimited());
inserter.insertValue(SAMPLE_COUNT_TAG, m_SampleCount);
inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind<void>(CPriorStateSerialiser(),
Expand All @@ -416,6 +466,11 @@ double CUnivariateLevelShiftModel::bic() const
return -2.0 * this->logLikelihood() + std::log(m_SampleCount);
}

double CUnivariateLevelShiftModel::expectedBic() const
{
return -2.0 * this->expectedLogLikelihood() + std::log(m_SampleCount);
}

TOptionalChangeDescription CUnivariateLevelShiftModel::change() const
{
// The "magic" 0.9 is due to the fact that the trend is updated
Expand Down Expand Up @@ -465,12 +520,24 @@ void CUnivariateLevelShiftModel::addSamples(std::size_t count,
residualModel.addSamples(weightStyles, samples, weights);
residualModel.propagateForwardsByTime(1.0);

double logLikelihood;
double logLikelihood{0.0};
if (residualModel.jointLogMarginalLikelihood(weightStyles, samples, weights,
logLikelihood) == maths_t::E_FpNoErrors)
{
this->addLogLikelihood(logLikelihood);
}
for (const auto &weight : weights)
{
double expectedLogLikelihood{0.0};
TDouble4Vec1Vec weight_{weight};
if (residualModel.expectation(maths::CPrior::CLogMarginalLikelihood{
residualModel, weightStyles, weight_},
EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS,
expectedLogLikelihood, weightStyles, weight))
{
this->addExpectedLogLikelihood(expectedLogLikelihood);
}
}
}
}

Expand All @@ -496,13 +563,13 @@ CUnivariateTimeShiftModel::CUnivariateTimeShiftModel(const TDecompositionPtr &tr
bool CUnivariateTimeShiftModel::acceptRestoreTraverser(const SModelRestoreParams &params,
core::CStateRestoreTraverser &traverser)
{
if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false)
{
return false;
}
do
{
const std::string name{traverser.name()};
RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG,
double logLikelihood,
core::CStringUtils::stringToType(traverser.value(), logLikelihood),
this->addLogLikelihood(logLikelihood))
RESTORE(RESIDUAL_MODEL_TAG, this->restoreResidualModel(params.s_DistributionParams, traverser))
}
while (traverser.next());
Expand All @@ -511,7 +578,7 @@ bool CUnivariateTimeShiftModel::acceptRestoreTraverser(const SModelRestoreParams

void CUnivariateTimeShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const
{
inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood());
this->CUnivariateChangeModel::acceptPersistInserter(inserter);
inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind<void>(CPriorStateSerialiser(),
boost::cref(this->residualModel()), _1));
}
Expand All @@ -521,6 +588,11 @@ double CUnivariateTimeShiftModel::bic() const
return -2.0 * this->logLikelihood();
}

double CUnivariateTimeShiftModel::expectedBic() const
{
return -2.0 * this->expectedLogLikelihood();
}

TOptionalChangeDescription CUnivariateTimeShiftModel::change() const
{
return SChangeDescription{SChangeDescription::E_TimeShift,
Expand Down Expand Up @@ -549,12 +621,22 @@ void CUnivariateTimeShiftModel::addSamples(std::size_t count,
residualModel.addSamples(weightStyles, samples, weights);
residualModel.propagateForwardsByTime(1.0);

double logLikelihood;
double logLikelihood{0.0};
if (residualModel.jointLogMarginalLikelihood(weightStyles, samples, weights,
logLikelihood) == maths_t::E_FpNoErrors)
{
this->addLogLikelihood(logLikelihood);
}
for (const auto &weight : weights)
{
double expectedLogLikelihood{0.0};
TDouble4Vec1Vec weight_{weight};
residualModel.expectation(maths::CPrior::CLogMarginalLikelihood{
residualModel, weightStyles, weight_},
EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS,
expectedLogLikelihood, weightStyles, weight);
this->addExpectedLogLikelihood(expectedLogLikelihood);
}
}
}

Expand Down

0 comments on commit 1eb8f8a

Please sign in to comment.