Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature(anomaly detection): Enable Noise Reduction Step #1622

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/seer/anomaly_detection/accessors.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import abc
import logging
import os
import random
import sys
from datetime import datetime, timedelta
from typing import List, Optional

import numpy as np
import sentry_sdk
import stumpy # type: ignore # mypy throws "missing library stubs"

# import stumpy # type: ignore # mypy throws "missing library stubs"
from pydantic import BaseModel
from sqlalchemy import delete

Expand All @@ -24,6 +27,10 @@
from seer.dependency_injection import inject, injected
from seer.exceptions import ClientError

stumpy_path_src = "/Users/aayushseth/code/stumpy-noise-reduction"
sys.path.insert(0, os.path.abspath(stumpy_path_src))
import stumpy # type: ignore # mypy throws "missing library stubs"

logger = logging.getLogger(__name__)


Expand Down
9 changes: 8 additions & 1 deletion src/seer/anomaly_detection/anomaly_detection.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import logging
import os
import sys
from typing import List, Tuple

import numpy as np
import sentry_sdk
import stumpy # type: ignore # mypy throws "missing library stubs"

# import stumpy # type: ignore # mypy throws "missing library stubs"
from pydantic import BaseModel

from seer.anomaly_detection.accessors import AlertDataAccessor, DbAlertDataAccessor
Expand All @@ -29,6 +32,10 @@
from seer.exceptions import ClientError, ServerError
from seer.tags import AnomalyDetectionModes, AnomalyDetectionTags

stumpy_path_src = "/Users/aayushseth/code/stumpy-noise-reduction"
sys.path.insert(0, os.path.abspath(stumpy_path_src))
import stumpy # type: ignore # mypy throws "missing library stubs"

anomaly_detection_module.enable()
logger = logging.getLogger(__name__)

Expand Down
6 changes: 6 additions & 0 deletions src/seer/anomaly_detection/anomaly_detection_di.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
LocationDetector,
ProphetLocationDetector,
)
from seer.anomaly_detection.detectors.noise_reducers import NoiseReducer, VarianceNoiseReducer
from seer.anomaly_detection.models import AlgoConfig
from seer.dependency_injection import Module

Expand Down Expand Up @@ -60,3 +61,8 @@ def mp_utils_provider() -> MPUtils:
@anomaly_detection_module.provider
def location_detector_provider() -> LocationDetector:
return ProphetLocationDetector()


@anomaly_detection_module.provider
def noise_reducer_provider() -> NoiseReducer:
return VarianceNoiseReducer()
3 changes: 3 additions & 0 deletions src/seer/anomaly_detection/detectors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
anomaly_detectors,
mp_scorers,
mp_utils,
noise_reducers,
normalizers,
smoothers,
window_size_selectors,
Expand All @@ -24,3 +25,5 @@
FlagSmoother = smoothers.FlagSmoother
MajorityVoteBatchFlagSmoother = smoothers.MajorityVoteBatchFlagSmoother
MajorityVoteStreamFlagSmoother = smoothers.MajorityVoteStreamFlagSmoother
NoiseReducer = noise_reducers.NoiseReducer
VarianceNoiseReducer = noise_reducers.VarianceNoiseReducer
18 changes: 16 additions & 2 deletions src/seer/anomaly_detection/detectors/anomaly_detectors.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
import abc
import logging
import os
import sys

import numpy as np
import numpy.typing as npt
import sentry_sdk
import stumpy # type: ignore # mypy throws "missing library stubs"

# import stumpy # type: ignore # mypy throws "missing library stubs"
from pydantic import BaseModel, ConfigDict, Field

from seer.anomaly_detection.detectors.mp_scorers import MPScorer
from seer.anomaly_detection.detectors.mp_utils import MPUtils
from seer.anomaly_detection.detectors.noise_reducers import NoiseReducer
from seer.anomaly_detection.detectors.smoothers import (
MajorityVoteBatchFlagSmoother,
MajorityVoteStreamFlagSmoother,
Expand All @@ -26,6 +30,10 @@
from seer.dependency_injection import inject, injected
from seer.exceptions import ServerError

stumpy_path_src = "/Users/aayushseth/code/stumpy-noise-reduction"
sys.path.insert(0, os.path.abspath(stumpy_path_src))
import stumpy # type: ignore # mypy throws "missing library stubs"

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -81,6 +89,7 @@ def _compute_matrix_profile(
ws_selector: WindowSizeSelector = injected,
scorer: MPScorer = injected,
mp_utils: MPUtils = injected,
noise_reducer: NoiseReducer = injected,
) -> MPTimeSeriesAnomaliesSingleWindow:
"""
This method calls stumpy.stump to compute the matrix profile and scores the matrix profile distances
Expand All @@ -107,11 +116,13 @@ def _compute_matrix_profile(
# TODO: Add sentry logging of this error
raise ServerError("Invalid window size")
# Get the matrix profile for the time series
noise_parameter = noise_reducer.get_noise_parameter(ts_values)
mp = stumpy.stump(
ts_values,
m=max(3, window_size),
ignore_trivial=algo_config.mp_ignore_trivial,
normalize=False,
std_noise=noise_parameter,
)

# We do not normalize the matrix profile here as normalizing during stream detection later is not straighforward.
Expand Down Expand Up @@ -176,6 +187,7 @@ def detect(
algo_config: AlgoConfig = injected,
scorer: MPScorer = injected,
mp_utils: MPUtils = injected,
noise_reducer: NoiseReducer = injected,
) -> MPTimeSeriesAnomaliesSingleWindow:
"""
This method uses stumpy.stumpi to stream compute the matrix profile and scores the matrix profile distances
Expand Down Expand Up @@ -213,8 +225,10 @@ def detect(
streamed_mp: list[list[float]] = []
thresholds: list[list[Threshold]] = []
for cur_val, cur_timestamp in zip(timeseries.values, timeseries.timestamps):

# Update the stumpi stream processor with new data
stream.update(cur_val)
noise_parameter = noise_reducer.get_noise_parameter(np.array(self.history_values))
stream.update(cur_val, std_noise=noise_parameter)

# Get the matrix profile for the new data and score it
cur_mp = [stream.P_[-1], stream.I_[-1], stream.left_I_[-1], -1]
Expand Down
6 changes: 3 additions & 3 deletions src/seer/anomaly_detection/detectors/mp_scorers.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,13 +190,13 @@ class MPIQRScorer(MPScorer):
{
# High sensitivity = more anomalies + higher false positives
# Data point outside of bottom 70% of the MP distances considered anomalous
"high": [0.3, 0.7],
"high": [0.35, 0.65],
# Medium sensitivity = lesser anomalies + lesser false positives
# Data point outside of bottom 80% of the MP distances considered anomalous
"medium": [0.2, 0.8],
"medium": [0.25, 0.75],
# Low sensitivity = least anomalies + least false positives
# Data point outside of bottom 90% of the MP distances considered anomalous
"low": [0.1, 0.9],
"low": [0.15, 0.85],
},
description="Lower and upper bounds for high sensitivity",
)
Expand Down
54 changes: 54 additions & 0 deletions src/seer/anomaly_detection/detectors/noise_reducers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import abc

import numpy as np
import numpy.typing as npt
from pydantic import BaseModel


class NoiseReducer(BaseModel, abc.ABC):
"""
Abstract base class for selecting the noise parameter for stumpy
"""

@abc.abstractmethod
def get_noise_parameter(
self, timeseries: npt.NDArray, window: int = 12, scale_factor: float = 1.0
) -> float:
return NotImplemented


class VarianceNoiseReducer(NoiseReducer):
def get_noise_parameter(
self, timeseries: npt.NDArray, window: int = 12, scale_factor: float = 1.0
) -> float:
"""
Gets the noise parameter by calculating the median variance across sliding non-overlapping windows of the timeseries.

Parameters:
-----------
timeseries : npt.NDArray
Input time series array
window : int, default=12
Size of sliding window
scale_factor : float, default=1.0
Factor to scale the final noise parameter

Returns:
--------
float
Noise parameter calculated as median variance * scale_factor
"""

# TODO: The window should be ~half a day so should be based on ad_config
if len(timeseries) == 0 or window <= 0:
return 0.0

window = min(window, len(timeseries))

n_windows = len(timeseries) // window
windowed_ts = timeseries[: n_windows * window].reshape(n_windows, window)
variances = np.var(windowed_ts, axis=1)

noise_parameter = np.median(variances) * scale_factor

return noise_parameter
56 changes: 56 additions & 0 deletions tests/seer/anomaly_detection/detectors/test_noise_reducers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import unittest

import numpy as np

from seer.anomaly_detection.detectors.noise_reducers import VarianceNoiseReducer


class TestVarianceNoiseReducer(unittest.TestCase):

def setUp(self):
self.noise_reducer = VarianceNoiseReducer()

def test_get_noise_parameter_standard_array(self):
timeseries = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
window = 4
scale_factor = 1.0
result = self.noise_reducer.get_noise_parameter(timeseries, window, scale_factor)
expected = 1.25
assert result == expected

def test_get_noise_parameter_with_scale_factor(self):
timeseries = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
window = 4
scale_factor = 2.0
result = self.noise_reducer.get_noise_parameter(timeseries, window, scale_factor)
expected = 2.5
assert result == expected

def test_get_noise_parameter_constant_array(self):
timeseries = np.array([5, 5, 5, 5, 5, 5, 5, 5])
window = 4
result = self.noise_reducer.get_noise_parameter(timeseries, window)
expected = 0.0
assert result == expected

def test_get_noise_parameter_window_larger_than_timeseries(self):
timeseries = np.array([1, 2, 3, 4])
window = 10
result = self.noise_reducer.get_noise_parameter(timeseries, window)
expected = 1.25
assert result == expected

def test_get_noise_parameter_empty_array(self):
timeseries = np.array([])
window = 4
result = self.noise_reducer.get_noise_parameter(timeseries, window)
expected = 0.0
assert result == expected

def test_get_noise_parameter_single_value(self):
timeseries = np.array([1])
window = 4
result = self.noise_reducer.get_noise_parameter(timeseries, window)
# Single value should have 0 variance
expected = 0.0
assert result == expected
Loading