Skip to content

Commit

Permalink
Merge branch 'main' into autofix/refact-get-content
Browse files Browse the repository at this point in the history
  • Loading branch information
Mitan committed Dec 13, 2024
2 parents ef487d8 + 0d4196c commit 3847e82
Show file tree
Hide file tree
Showing 24 changed files with 693 additions and 306 deletions.
21 changes: 15 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@ These instructions require access to internal Sentry resources and are intended

1. Install [direnv](https://direnv.net/) or a similar tool
2. Install [pyenv](https://github.com/pyenv/pyenv) and configure Python 3.11
3. Install [Docker](https://www.docker.com/get-started)
4. Install [Google Cloud SDK](https://cloud.google.com/sdk/docs/install)

```bash
pyenv install 3.11
pyenv local 3.11
```
3. Install [Docker](https://www.docker.com/get-started). Note that if you want to install Docker from brew instead of Docker Desktop, then you would need to install docker-compose as well.
4. Install [Google Cloud SDK](https://cloud.google.com/sdk/docs/install) and authenticate.
### Environment Setup

1. Clone the repository and navigate to the project root
Expand All @@ -30,9 +33,9 @@ These instructions require access to internal Sentry resources and are intended
Download model artifacts:

```bash
gsutil cp -r gs://sentry-ml/seer/models ./models
gsutil cp -r gs://sentry-ml/seer/models .
```

If you see a prompt "Reauthentication required. Please insert your security key and press enter...", re-authenticate using the command `gcloud auth login` and set the project id to the one for Seer.
### Running Seer

1. Start the development environment:
Expand All @@ -46,6 +49,12 @@ gsutil cp -r gs://sentry-ml/seer/models ./models
```bash
make update
```
3. If you encounter authentication errors, run:

```bash
gcloud auth application-default login
```


## Integrating with Local Sentry

Expand Down Expand Up @@ -148,7 +157,7 @@ You can run all tests with `make test`.

### Running Individual Tests

Make sure you have the test database running when running individual tests, do that via `docker compose up -d test-db`.
Make sure you have the test database running when running individual tests, do that via `docker compose up --remove-orphans -d test-db`.

To run a single test, make sure you're in a shell, by doing `make shell`, and then run `pytest tests/path/to/test.py::test_name`.

Expand Down
53 changes: 35 additions & 18 deletions src/seer/anomaly_detection/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,29 +382,46 @@ def combine_anomalies(
MPTimeSeriesAnomalies
Combined anomalies object containing flags, scores and metadata from both approaches
"""
combined_flags = anomalies_suss.flags
combined_scores = anomalies_suss.scores
combined_thresholds = anomalies_suss.thresholds
combined_original_flags = anomalies_suss.original_flags
if anomalies_fixed is not None:
for i in range(len(anomalies_suss.flags)):
if use_suss[i]:
combined_flags[i] = anomalies_suss.flags[i]
combined_scores[i] = anomalies_suss.scores[i]
combined_original_flags[i] = anomalies_suss.original_flags[i]
if (
anomalies_suss.thresholds is not None
and anomalies_fixed.thresholds is not None
and combined_thresholds is not None
):
for j in range(len(anomalies_suss.thresholds)):
if i < len(anomalies_suss.thresholds[j]):
combined_thresholds[j][i] = anomalies_suss.thresholds[j][i]
else:
combined_flags[i] = anomalies_fixed.flags[i]
combined_scores[i] = anomalies_fixed.scores[i]
combined_original_flags[i] = anomalies_fixed.original_flags[i]
if (
anomalies_suss.thresholds is not None
and anomalies_fixed.thresholds is not None
and combined_thresholds is not None
):
for j in range(len(anomalies_suss.thresholds)):
if i < len(anomalies_fixed.thresholds[j]):
combined_thresholds[j][i] = anomalies_fixed.thresholds[j][i]

return MPTimeSeriesAnomalies(
flags=[
(
(anomalies_suss.flags[i] if use_suss[i] else anomalies_fixed.flags[i])
if anomalies_fixed is not None
else anomalies_suss.flags[i]
)
for i in range(len(anomalies_suss.flags))
],
scores=[
(
(anomalies_suss.scores[i] if use_suss[i] else anomalies_fixed.scores[i])
if anomalies_fixed is not None
else anomalies_suss.scores[i]
)
for i in range(len(anomalies_suss.scores))
],
thresholds=anomalies_suss.thresholds, # Use thresholds from either one since they're the same
flags=combined_flags,
scores=combined_scores,
thresholds=combined_thresholds,
matrix_profile_suss=anomalies_suss.matrix_profile,
matrix_profile_fixed=(
anomalies_fixed.matrix_profile if anomalies_fixed is not None else np.array([])
),
window_size=anomalies_suss.window_size,
original_flags=anomalies_suss.original_flags,
original_flags=combined_original_flags,
use_suss=use_suss,
)
15 changes: 11 additions & 4 deletions src/seer/anomaly_detection/anomaly_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,15 @@ def _batch_detect(
batch_detector = MPBatchAnomalyDetector()

anomalies_suss = batch_detector.detect(
convert_external_ts_to_internal(timeseries), config, window_size=window_size
convert_external_ts_to_internal(timeseries),
config,
algo_config=algo_config,
window_size=window_size,
)
anomalies_fixed = batch_detector.detect(
convert_external_ts_to_internal(timeseries),
config,
algo_config=algo_config,
window_size=algo_config.mp_fixed_window_size,
)
anomalies = DbAlertDataAccessor().combine_anomalies(
Expand Down Expand Up @@ -214,8 +218,9 @@ def _online_detect(
else:
anomalies.use_suss.append(True)

num_anomlies = len(streamed_anomalies_suss.flags)
streamed_anomalies = alert_data_accessor.combine_anomalies(
streamed_anomalies_suss, streamed_anomalies_fixed, anomalies.use_suss
streamed_anomalies_suss, streamed_anomalies_fixed, anomalies.use_suss[-num_anomlies:]
)

# Save new data point
Expand Down Expand Up @@ -253,7 +258,9 @@ def _min_required_timesteps(self, time_period, min_num_days=7):

@sentry_sdk.trace
def _combo_detect(
self, ts_with_history: TimeSeriesWithHistory, config: AnomalyDetectionConfig
self,
ts_with_history: TimeSeriesWithHistory,
config: AnomalyDetectionConfig,
) -> Tuple[List[TimeSeriesPoint], MPTimeSeriesAnomalies]:
"""
Stateless online anomaly detection for a part of a time series. This function takes two parts of the time series -
Expand Down Expand Up @@ -349,7 +356,7 @@ def _combo_detect(
streamed_anomalies_fixed,
[True]
* len(
ts_external
streamed_anomalies_suss.flags
), # Defaulting to using SuSS window because switching logic is for streaming only
)

Expand Down
7 changes: 4 additions & 3 deletions src/seer/anomaly_detection/anomaly_detection_di.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,11 @@ def algoconfig_provider() -> AlgoConfig:
return AlgoConfig(
mp_ignore_trivial=True,
mp_normalize=False,
prophet_uncertainty_samples=1,
prophet_uncertainty_samples=5,
prophet_mcmc_samples=0,
mp_fixed_window_size=10,
return_thresholds=False,
return_predicted_range=False,
return_thresholds=True,
return_predicted_range=True,
)


Expand Down
46 changes: 29 additions & 17 deletions src/seer/anomaly_detection/detectors/anomaly_detectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
AnomalyDetectionConfig,
AnomalyFlags,
MPTimeSeriesAnomaliesSingleWindow,
Threshold,
TimeSeries,
TimeSeriesAnomalies,
)
Expand All @@ -34,7 +35,9 @@ class AnomalyDetector(BaseModel, abc.ABC):
"""

@abc.abstractmethod
def detect(self, timeseries: TimeSeries, config: AnomalyDetectionConfig) -> TimeSeriesAnomalies:
def detect(
self, timeseries: TimeSeries, ad_config: AnomalyDetectionConfig, algo_config: AlgoConfig
) -> TimeSeriesAnomalies:
return NotImplemented


Expand All @@ -43,9 +46,14 @@ class MPBatchAnomalyDetector(AnomalyDetector):
This class encapsulates the logic for using Matrix Profile for batch anomaly detection.
"""

@inject
@sentry_sdk.trace
def detect(
self, timeseries: TimeSeries, config: AnomalyDetectionConfig, window_size: int | None = None
self,
timeseries: TimeSeries,
ad_config: AnomalyDetectionConfig,
algo_config: AlgoConfig = injected,
window_size: int | None = None,
) -> MPTimeSeriesAnomaliesSingleWindow:
"""
This method uses matrix profile to detect and score anonalies in the time series.
Expand All @@ -60,17 +68,17 @@ def detect(
Returns:
The input timeseries with an anomaly scores and a flag added
"""
return self._compute_matrix_profile(timeseries, config, window_size)
return self._compute_matrix_profile(timeseries, ad_config, algo_config, window_size)

@inject
@sentry_sdk.trace
def _compute_matrix_profile(
self,
timeseries: TimeSeries,
config: AnomalyDetectionConfig,
ad_config: AnomalyDetectionConfig,
algo_config: AlgoConfig,
window_size: int | None = None,
ws_selector: WindowSizeSelector = injected,
algo_config: AlgoConfig = injected,
scorer: MPScorer = injected,
mp_utils: MPUtils = injected,
) -> MPTimeSeriesAnomaliesSingleWindow:
Expand Down Expand Up @@ -113,28 +121,31 @@ def _compute_matrix_profile(
values=ts_values,
timestamps=timeseries.timestamps,
mp_dist=mp_dist,
ad_config=config,
ad_config=ad_config,
window_size=window_size,
)
if flags_and_scores is None:
raise ServerError("Failed to score the matrix profile distance")

original_flags = flags_and_scores.flags

# Apply smoothing to the flags
batch_flag_smoother = MajorityVoteBatchFlagSmoother()
smoothed_flags = batch_flag_smoother.smooth(
flags=flags_and_scores.flags,
ad_config=config,
ad_config=ad_config,
)

# Update the flags in flags_and_scores with the smoothed flags
flags_and_scores.flags = smoothed_flags

return MPTimeSeriesAnomaliesSingleWindow(
flags=flags_and_scores.flags,
flags=smoothed_flags,
scores=flags_and_scores.scores,
matrix_profile=mp,
window_size=window_size,
thresholds=flags_and_scores.thresholds,
thresholds=flags_and_scores.thresholds if algo_config.return_thresholds else None,
original_flags=original_flags,
)


Expand All @@ -161,7 +172,8 @@ class MPStreamAnomalyDetector(AnomalyDetector):
def detect(
self,
timeseries: TimeSeries,
config: AnomalyDetectionConfig,
ad_config: AnomalyDetectionConfig,
algo_config: AlgoConfig = injected,
scorer: MPScorer = injected,
mp_utils: MPUtils = injected,
) -> MPTimeSeriesAnomaliesSingleWindow:
Expand Down Expand Up @@ -199,7 +211,7 @@ def detect(
scores: list[float] = []
flags: list[AnomalyFlags] = []
streamed_mp: list[list[float]] = []
thresholds: list[float] = []
thresholds: list[list[Threshold]] = []
for cur_val, cur_timestamp in zip(timeseries.values, timeseries.timestamps):
# Update the stumpi stream processor with new data
stream.update(cur_val)
Expand All @@ -215,7 +227,7 @@ def detect(
history_values=self.history_values,
history_timestamps=self.history_timestamps,
history_mp_dist=mp_dist_baseline,
ad_config=config,
ad_config=ad_config,
window_size=self.window_size,
)
if flags_and_scores is None:
Expand All @@ -228,17 +240,17 @@ def detect(
# Apply stream smoothing to the newest flag based on the previous original flags
smoothed_flags = stream_flag_smoother.smooth(
original_flags=self.original_flags,
ad_config=config,
ad_config=ad_config,
vote_threshold=0.3,
cur_flag=flags_and_scores.flags,
)

original_flags = flags_and_scores.flags
flags_and_scores.flags = smoothed_flags

scores.extend(flags_and_scores.scores)
flags.extend(flags_and_scores.flags)
thresholds.extend(flags_and_scores.thresholds)
if flags_and_scores.thresholds is not None:
thresholds.extend(flags_and_scores.thresholds)

# Add new data point as well as its matrix profile to baseline
self.history_timestamps = np.append(self.history_timestamps, cur_timestamp)
Expand All @@ -255,6 +267,6 @@ def detect(
excl_zone_denom=stumpy.config.STUMPY_EXCL_ZONE_DENOM,
),
window_size=self.window_size,
thresholds=thresholds,
original_flags=original_flags,
thresholds=thresholds if algo_config.return_thresholds else None,
original_flags=self.original_flags,
)
Loading

0 comments on commit 3847e82

Please sign in to comment.