Merge branch 'main' into autofix/refact-get-content

getsentry · Dec 13, 2024 · 3847e82 · 3847e82
2 parents ef487d8 + 0d4196c
commit 3847e82
Show file tree

Hide file tree

Showing 24 changed files with 693 additions and 306 deletions.
diff --git a/README.md b/README.md
@@ -15,9 +15,12 @@ These instructions require access to internal Sentry resources and are intended
 
 1. Install [direnv](https://direnv.net/) or a similar tool
 2. Install [pyenv](https://github.com/pyenv/pyenv) and configure Python 3.11
-3. Install [Docker](https://www.docker.com/get-started)
-4. Install [Google Cloud SDK](https://cloud.google.com/sdk/docs/install)
-
+```bash
+pyenv install 3.11
+pyenv local 3.11
+```
+3. Install [Docker](https://www.docker.com/get-started). Note that if you want to install Docker from brew instead of Docker Desktop, then you would need to install docker-compose as well.
+4. Install [Google Cloud SDK](https://cloud.google.com/sdk/docs/install) and authenticate.
 ### Environment Setup
 
 1. Clone the repository and navigate to the project root
@@ -30,9 +33,9 @@ These instructions require access to internal Sentry resources and are intended
 Download model artifacts:
 
 ```bash
-gsutil cp -r gs://sentry-ml/seer/models ./models
+gsutil cp -r gs://sentry-ml/seer/models .
 ```
-
+If you see a prompt "Reauthentication required. Please insert your security key and press enter...", re-authenticate using the command `gcloud auth login` and set the project id to the one for Seer.
 ### Running Seer
 
 1. Start the development environment:
@@ -46,6 +49,12 @@ gsutil cp -r gs://sentry-ml/seer/models ./models
    ```bash
    make update
    ```
+3. If you encounter authentication errors, run:
+
+   ```bash
+   gcloud auth application-default login
+   ```
+
 
 ## Integrating with Local Sentry
 
@@ -148,7 +157,7 @@ You can run all tests with `make test`.
 
 ### Running Individual Tests
 
-Make sure you have the test database running when running individual tests, do that via `docker compose up -d test-db`.
+Make sure you have the test database running when running individual tests, do that via `docker compose up --remove-orphans -d test-db`.
 
 To run a single test, make sure you're in a shell, by doing `make shell`, and then run `pytest tests/path/to/test.py::test_name`.
 

diff --git a/src/seer/anomaly_detection/accessors.py b/src/seer/anomaly_detection/accessors.py
@@ -382,29 +382,46 @@ def combine_anomalies(
         MPTimeSeriesAnomalies
             Combined anomalies object containing flags, scores and metadata from both approaches
         """
+        combined_flags = anomalies_suss.flags
+        combined_scores = anomalies_suss.scores
+        combined_thresholds = anomalies_suss.thresholds
+        combined_original_flags = anomalies_suss.original_flags
+        if anomalies_fixed is not None:
+            for i in range(len(anomalies_suss.flags)):
+                if use_suss[i]:
+                    combined_flags[i] = anomalies_suss.flags[i]
+                    combined_scores[i] = anomalies_suss.scores[i]
+                    combined_original_flags[i] = anomalies_suss.original_flags[i]
+                    if (
+                        anomalies_suss.thresholds is not None
+                        and anomalies_fixed.thresholds is not None
+                        and combined_thresholds is not None
+                    ):
+                        for j in range(len(anomalies_suss.thresholds)):
+                            if i < len(anomalies_suss.thresholds[j]):
+                                combined_thresholds[j][i] = anomalies_suss.thresholds[j][i]
+                else:
+                    combined_flags[i] = anomalies_fixed.flags[i]
+                    combined_scores[i] = anomalies_fixed.scores[i]
+                    combined_original_flags[i] = anomalies_fixed.original_flags[i]
+                    if (
+                        anomalies_suss.thresholds is not None
+                        and anomalies_fixed.thresholds is not None
+                        and combined_thresholds is not None
+                    ):
+                        for j in range(len(anomalies_suss.thresholds)):
+                            if i < len(anomalies_fixed.thresholds[j]):
+                                combined_thresholds[j][i] = anomalies_fixed.thresholds[j][i]
+
         return MPTimeSeriesAnomalies(
-            flags=[
-                (
-                    (anomalies_suss.flags[i] if use_suss[i] else anomalies_fixed.flags[i])
-                    if anomalies_fixed is not None
-                    else anomalies_suss.flags[i]
-                )
-                for i in range(len(anomalies_suss.flags))
-            ],
-            scores=[
-                (
-                    (anomalies_suss.scores[i] if use_suss[i] else anomalies_fixed.scores[i])
-                    if anomalies_fixed is not None
-                    else anomalies_suss.scores[i]
-                )
-                for i in range(len(anomalies_suss.scores))
-            ],
-            thresholds=anomalies_suss.thresholds,  # Use thresholds from either one since they're the same
+            flags=combined_flags,
+            scores=combined_scores,
+            thresholds=combined_thresholds,
             matrix_profile_suss=anomalies_suss.matrix_profile,
             matrix_profile_fixed=(
                 anomalies_fixed.matrix_profile if anomalies_fixed is not None else np.array([])
             ),
             window_size=anomalies_suss.window_size,
-            original_flags=anomalies_suss.original_flags,
+            original_flags=combined_original_flags,
             use_suss=use_suss,
         )
diff --git a/src/seer/anomaly_detection/anomaly_detection.py b/src/seer/anomaly_detection/anomaly_detection.py
@@ -80,11 +80,15 @@ def _batch_detect(
         batch_detector = MPBatchAnomalyDetector()
 
         anomalies_suss = batch_detector.detect(
-            convert_external_ts_to_internal(timeseries), config, window_size=window_size
+            convert_external_ts_to_internal(timeseries),
+            config,
+            algo_config=algo_config,
+            window_size=window_size,
         )
         anomalies_fixed = batch_detector.detect(
             convert_external_ts_to_internal(timeseries),
             config,
+            algo_config=algo_config,
             window_size=algo_config.mp_fixed_window_size,
         )
         anomalies = DbAlertDataAccessor().combine_anomalies(
@@ -214,8 +218,9 @@ def _online_detect(
         else:
             anomalies.use_suss.append(True)
 
+        num_anomlies = len(streamed_anomalies_suss.flags)
         streamed_anomalies = alert_data_accessor.combine_anomalies(
-            streamed_anomalies_suss, streamed_anomalies_fixed, anomalies.use_suss
+            streamed_anomalies_suss, streamed_anomalies_fixed, anomalies.use_suss[-num_anomlies:]
         )
 
         # Save new data point
@@ -253,7 +258,9 @@ def _min_required_timesteps(self, time_period, min_num_days=7):
 
     @sentry_sdk.trace
     def _combo_detect(
-        self, ts_with_history: TimeSeriesWithHistory, config: AnomalyDetectionConfig
+        self,
+        ts_with_history: TimeSeriesWithHistory,
+        config: AnomalyDetectionConfig,
     ) -> Tuple[List[TimeSeriesPoint], MPTimeSeriesAnomalies]:
         """
         Stateless online anomaly detection for a part of a time series. This function takes two parts of the time series -
@@ -349,7 +356,7 @@ def _combo_detect(
             streamed_anomalies_fixed,
             [True]
             * len(
-                ts_external
+                streamed_anomalies_suss.flags
             ),  # Defaulting to using SuSS window because switching logic is for streaming only
         )
 

diff --git a/src/seer/anomaly_detection/anomaly_detection_di.py b/src/seer/anomaly_detection/anomaly_detection_di.py
@@ -34,10 +34,11 @@ def algoconfig_provider() -> AlgoConfig:
     return AlgoConfig(
         mp_ignore_trivial=True,
         mp_normalize=False,
-        prophet_uncertainty_samples=1,
+        prophet_uncertainty_samples=5,
+        prophet_mcmc_samples=0,
         mp_fixed_window_size=10,
-        return_thresholds=False,
-        return_predicted_range=False,
+        return_thresholds=True,
+        return_predicted_range=True,
     )
 
 

diff --git a/src/seer/anomaly_detection/detectors/anomaly_detectors.py b/src/seer/anomaly_detection/detectors/anomaly_detectors.py
@@ -19,6 +19,7 @@
     AnomalyDetectionConfig,
     AnomalyFlags,
     MPTimeSeriesAnomaliesSingleWindow,
+    Threshold,
     TimeSeries,
     TimeSeriesAnomalies,
 )
@@ -34,7 +35,9 @@ class AnomalyDetector(BaseModel, abc.ABC):
     """
 
     @abc.abstractmethod
-    def detect(self, timeseries: TimeSeries, config: AnomalyDetectionConfig) -> TimeSeriesAnomalies:
+    def detect(
+        self, timeseries: TimeSeries, ad_config: AnomalyDetectionConfig, algo_config: AlgoConfig
+    ) -> TimeSeriesAnomalies:
         return NotImplemented
 
 
@@ -43,9 +46,14 @@ class MPBatchAnomalyDetector(AnomalyDetector):
     This class encapsulates the logic for using Matrix Profile for batch anomaly detection.
     """
 
+    @inject
     @sentry_sdk.trace
     def detect(
-        self, timeseries: TimeSeries, config: AnomalyDetectionConfig, window_size: int | None = None
+        self,
+        timeseries: TimeSeries,
+        ad_config: AnomalyDetectionConfig,
+        algo_config: AlgoConfig = injected,
+        window_size: int | None = None,
     ) -> MPTimeSeriesAnomaliesSingleWindow:
         """
         This method uses matrix profile to detect and score anonalies in the time series.
@@ -60,17 +68,17 @@ def detect(
         Returns:
         The input timeseries with an anomaly scores and a flag added
         """
-        return self._compute_matrix_profile(timeseries, config, window_size)
+        return self._compute_matrix_profile(timeseries, ad_config, algo_config, window_size)
 
     @inject
     @sentry_sdk.trace
     def _compute_matrix_profile(
         self,
         timeseries: TimeSeries,
-        config: AnomalyDetectionConfig,
+        ad_config: AnomalyDetectionConfig,
+        algo_config: AlgoConfig,
         window_size: int | None = None,
         ws_selector: WindowSizeSelector = injected,
-        algo_config: AlgoConfig = injected,
         scorer: MPScorer = injected,
         mp_utils: MPUtils = injected,
     ) -> MPTimeSeriesAnomaliesSingleWindow:
@@ -113,28 +121,31 @@ def _compute_matrix_profile(
             values=ts_values,
             timestamps=timeseries.timestamps,
             mp_dist=mp_dist,
-            ad_config=config,
+            ad_config=ad_config,
             window_size=window_size,
         )
         if flags_and_scores is None:
             raise ServerError("Failed to score the matrix profile distance")
 
+        original_flags = flags_and_scores.flags
+
         # Apply smoothing to the flags
         batch_flag_smoother = MajorityVoteBatchFlagSmoother()
         smoothed_flags = batch_flag_smoother.smooth(
             flags=flags_and_scores.flags,
-            ad_config=config,
+            ad_config=ad_config,
         )
 
         # Update the flags in flags_and_scores with the smoothed flags
         flags_and_scores.flags = smoothed_flags
 
         return MPTimeSeriesAnomaliesSingleWindow(
-            flags=flags_and_scores.flags,
+            flags=smoothed_flags,
             scores=flags_and_scores.scores,
             matrix_profile=mp,
             window_size=window_size,
-            thresholds=flags_and_scores.thresholds,
+            thresholds=flags_and_scores.thresholds if algo_config.return_thresholds else None,
+            original_flags=original_flags,
         )
 
 
@@ -161,7 +172,8 @@ class MPStreamAnomalyDetector(AnomalyDetector):
     def detect(
         self,
         timeseries: TimeSeries,
-        config: AnomalyDetectionConfig,
+        ad_config: AnomalyDetectionConfig,
+        algo_config: AlgoConfig = injected,
         scorer: MPScorer = injected,
         mp_utils: MPUtils = injected,
     ) -> MPTimeSeriesAnomaliesSingleWindow:
@@ -199,7 +211,7 @@ def detect(
             scores: list[float] = []
             flags: list[AnomalyFlags] = []
             streamed_mp: list[list[float]] = []
-            thresholds: list[float] = []
+            thresholds: list[list[Threshold]] = []
             for cur_val, cur_timestamp in zip(timeseries.values, timeseries.timestamps):
                 # Update the stumpi stream processor with new data
                 stream.update(cur_val)
@@ -215,7 +227,7 @@ def detect(
                     history_values=self.history_values,
                     history_timestamps=self.history_timestamps,
                     history_mp_dist=mp_dist_baseline,
-                    ad_config=config,
+                    ad_config=ad_config,
                     window_size=self.window_size,
                 )
                 if flags_and_scores is None:
@@ -228,17 +240,17 @@ def detect(
                 # Apply stream smoothing to the newest flag based on the previous original flags
                 smoothed_flags = stream_flag_smoother.smooth(
                     original_flags=self.original_flags,
-                    ad_config=config,
+                    ad_config=ad_config,
                     vote_threshold=0.3,
                     cur_flag=flags_and_scores.flags,
                 )
 
-                original_flags = flags_and_scores.flags
                 flags_and_scores.flags = smoothed_flags
 
                 scores.extend(flags_and_scores.scores)
                 flags.extend(flags_and_scores.flags)
-                thresholds.extend(flags_and_scores.thresholds)
+                if flags_and_scores.thresholds is not None:
+                    thresholds.extend(flags_and_scores.thresholds)
 
                 # Add new data point as well as its matrix profile to baseline
                 self.history_timestamps = np.append(self.history_timestamps, cur_timestamp)
@@ -255,6 +267,6 @@ def detect(
                     excl_zone_denom=stumpy.config.STUMPY_EXCL_ZONE_DENOM,
                 ),
                 window_size=self.window_size,
-                thresholds=thresholds,
-                original_flags=original_flags,
+                thresholds=thresholds if algo_config.return_thresholds else None,
+                original_flags=self.original_flags,
             )