From 434a9a19b7f41ae868102e0154df95db8ec633c4 Mon Sep 17 00:00:00 2001 From: Shankari Date: Sun, 22 Jan 2023 22:53:03 -0800 Subject: [PATCH 1/9] Add a new unit test for the big jumps caused when all smoothing segments are clusters Once the actual issue is addressed, this will fix https://github.com/e-mission/e-mission-docs/issues/843 For now, we load the location dataframes for the two use cases and verify that the returned values are the ones in the current implementation. Procedure: - Perturb the location points in the original use cases to avoid leaking information - Load the location points into the test case - Run the filtering code - Verify that the output is consistent with https://github.com/e-mission/e-mission-docs/issues/843#issuecomment-1399565902 https://github.com/e-mission/e-mission-docs/issues/843#issuecomment-1399655022 Also change the location smoothing code from `logging.info` to `logging.exception` so that we can see where the error is in a more meaningful way Testing done: - Test passes ``` ---------------------------------------------------------------------- Ran 1 test in 0.387s ``` Note that due to the perturbation of the location points, the outliers no longer perfectly match the original use case, but are close enough ``` 2023-01-22 22:37:57,262:INFO:4634275328:After first round, still have outliers accuracy altitude ... distance speed 17 70.051 88.551857 ... 8.468128e+06 50922.935508 26 3.778 66.404068 ... 8.467873e+06 2878.645674 49 3.900 72.118635 ... 4.673209e+00 2.336605 2023-01-22 22:37:57,308:INFO:4634275328:After first round, still have outliers Unnamed: 0 accuracy altitude ... heading distance speed 14 14 5.638 470.899994 ... 88.989357 1.113137e+07 284923.028227 ``` --- .../intake/cleaning/location_smoothing.py | 2 +- .../intakeTests/TestLocationSmoothing.py | 26 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/emission/analysis/intake/cleaning/location_smoothing.py b/emission/analysis/intake/cleaning/location_smoothing.py index 93d79f1c5..09e2b40b8 100644 --- a/emission/analysis/intake/cleaning/location_smoothing.py +++ b/emission/analysis/intake/cleaning/location_smoothing.py @@ -190,7 +190,7 @@ def get_points_to_filter(section_points_df, outlier_algo, filtering_algo): to_delete_mask = np.logical_not(filtering_algo.inlier_mask_) return with_speeds_df[to_delete_mask] except Exception as e: - logging.info("Caught error %s while processing section, skipping..." % e) + logging.exception("Caught error %s while processing section, skipping..." % e) return None else: logging.debug("no filtering algo specified, returning None") diff --git a/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py b/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py index 2f5952a43..f4a0767c0 100644 --- a/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py +++ b/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py @@ -151,6 +151,32 @@ def testPointFilteringRichmondJump(self): self.assertEqual(np.count_nonzero(to_delete_mask), 1) self.assertEqual([str(id) for id in delete_ids], ["55e86dbb7d65cb39ee987e09"]) + # Tests for the special handling of big jumps in trips where all other points are in clusters + # If there are multiple clusters, then the simple alternation of GOOD and BAD fails + # and we need a more sophisticated check + def testFilterAllClusters(self): + import pandas as pd + import itertools + + outlier_algo = eaics.BoxplotOutlier() + jump_algo = eaicj.SmoothZigzag(False, 100) + + # US to ocean jump: case 1 of https://github.com/e-mission/e-mission-docs/issues/843 + with_speeds_df = pd.read_csv("emission/tests/data/smoothing_data/all_cluster_case_1.csv", index_col=0) + with_speeds_df.drop(["distance", "speed", "heading"], axis="columns", inplace=True) + with_speeds_df["loc"] = with_speeds_df["loc"].apply(lambda lstr: json.loads(lstr.replace("'", '"'))) + filtered_points = eaicl.get_points_to_filter(with_speeds_df, outlier_algo, jump_algo) + expected_result_idx = list(itertools.chain(range(0,11), [16], range(21, 26))) + self.assertEqual(list(filtered_points._id.dropna()), list(with_speeds_df.loc[expected_result_idx]._id)) + + # PR to pakistan jump: case 2 of https://github.com/e-mission/e-mission-docs/issues/843 + with_speeds_df = pd.read_csv("emission/tests/data/smoothing_data/all_cluster_case_2.csv") + with_speeds_df.drop(["distance", "speed", "heading"], axis="columns", inplace=True) + with_speeds_df["loc"] = with_speeds_df["loc"].apply(lambda lstr: json.loads(lstr.replace("'", '"'))) + filtered_points = eaicl.get_points_to_filter(with_speeds_df, outlier_algo, jump_algo) + expected_result_idx = list(itertools.chain([0], range(2,11), range(12, 14))) + self.assertEqual(list(filtered_points._id.dropna()), list(with_speeds_df.loc[expected_result_idx]._id)) + def testPointFilteringZigzag(self): classicJumpTrip1 = self.trip_entries[8] self.loadPointsForTrip(classicJumpTrip1.get_id()) From 7d44d634e9d65bb43a91519d5377762a6a089e91 Mon Sep 17 00:00:00 2001 From: Shankari Date: Sun, 22 Jan 2023 23:03:59 -0800 Subject: [PATCH 2/9] Change the assertion checks to use the row index instead of the id To make it easier to debug in case there are errors --- .../tests/analysisTests/intakeTests/TestLocationSmoothing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py b/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py index f4a0767c0..3c6ba7db7 100644 --- a/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py +++ b/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py @@ -167,7 +167,7 @@ def testFilterAllClusters(self): with_speeds_df["loc"] = with_speeds_df["loc"].apply(lambda lstr: json.loads(lstr.replace("'", '"'))) filtered_points = eaicl.get_points_to_filter(with_speeds_df, outlier_algo, jump_algo) expected_result_idx = list(itertools.chain(range(0,11), [16], range(21, 26))) - self.assertEqual(list(filtered_points._id.dropna()), list(with_speeds_df.loc[expected_result_idx]._id)) + self.assertEqual(list(filtered_points.dropna().index), expected_result_idx) # PR to pakistan jump: case 2 of https://github.com/e-mission/e-mission-docs/issues/843 with_speeds_df = pd.read_csv("emission/tests/data/smoothing_data/all_cluster_case_2.csv") @@ -175,7 +175,7 @@ def testFilterAllClusters(self): with_speeds_df["loc"] = with_speeds_df["loc"].apply(lambda lstr: json.loads(lstr.replace("'", '"'))) filtered_points = eaicl.get_points_to_filter(with_speeds_df, outlier_algo, jump_algo) expected_result_idx = list(itertools.chain([0], range(2,11), range(12, 14))) - self.assertEqual(list(filtered_points._id.dropna()), list(with_speeds_df.loc[expected_result_idx]._id)) + self.assertEqual(list(filtered_points.dropna().index), expected_result_idx) def testPointFilteringZigzag(self): classicJumpTrip1 = self.trip_entries[8] From 67f5c86206e41168c6f3664fa5a2d4152d9d4091 Mon Sep 17 00:00:00 2001 From: Shankari Date: Mon, 23 Jan 2023 22:33:46 -0800 Subject: [PATCH 3/9] Implement a backup algorithm in case the first zigzag algo does not work - Since we have already implemented many different smoothing algorithms, we pick POSDAP to use as backup - if we still have outliers after the first round, and the max value is over MACH1, we fall back to the backup algo - after implementing the backup algo, if we don't have outliers, the backup algo has succeeded and we use its results - if we do have outliers, but the max value is under MACH1, the backup algo has succeeded and we use its results - if we have outliers, and the max is high (> MACH1) the backup algo has failed With this change, both the tests also change to the correctly deleted values - [16 17 18 19 20] for use case 1 (https://github.com/e-mission/e-mission-docs/issues/843#issuecomment-1399411253) - [11] for use case 2 (https://github.com/e-mission/e-mission-docs/issues/843#issuecomment-1399631793) In this commit, we also check in the csv data files for the two test cases --- .../cleaning_methods/jump_smoothing.py | 38 ++++++-- .../intakeTests/TestLocationSmoothing.py | 4 +- .../smoothing_data/all_cluster_case_1.csv | 90 +++++++++++++++++++ .../smoothing_data/all_cluster_case_2.csv | 22 +++++ 4 files changed, 147 insertions(+), 7 deletions(-) create mode 100644 emission/tests/data/smoothing_data/all_cluster_case_1.csv create mode 100644 emission/tests/data/smoothing_data/all_cluster_case_2.csv diff --git a/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py b/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py index bda702ded..cc8ab5523 100644 --- a/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py +++ b/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py @@ -267,10 +267,36 @@ def filter(self, with_speeds_df): recomputed_speeds_df = ls.recalc_speed(self.with_speeds_df[self.inlier_mask_]) recomputed_threshold = cso.BoxplotOutlier(ignore_zeros = True).get_threshold(recomputed_speeds_df) + logging.info("After first round, recomputed max = %s, recomputed threshold = %s" % + (recomputed_speeds_df.speed.max(), recomputed_threshold)) # assert recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0, "After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold] if recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] != 0: logging.info("After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold]) + MACH1 = 340.29 + if recomputed_speeds_df.speed.max() > MACH1: + backup_filtering_algo = SmoothPosdap(MACH1) + backup_filtering_algo.filter(with_speeds_df) + + recomputed_speeds_df = ls.recalc_speed(self.with_speeds_df[backup_filtering_algo.inlier_mask_]) + recomputed_threshold = cso.BoxplotOutlier(ignore_zeros = True).get_threshold(recomputed_speeds_df) + logging.info("After second round, max = %s, recomputed threshold = %s" % + (recomputed_speeds_df.speed.max(), recomputed_threshold)) + # assert recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0, "After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold] + if recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0: + logging.info("After second round, no outliers, returning backup mask %s" % backup_filtering_algo.inlier_mask_) + self.inlier_mask_ = backup_filtering_algo.inlier_mask_ + else: + logging.info("After second round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold]) + if recomputed_speeds_df.speed.max() > MACH1: + logging.info("And they are also > %s, backup algo also failed" % MACH1) + else: + logging.debug("But they are all < %s, so returning outliers %s" % + (MACH1, np.nonzero(np.logical_not(backup_filtering_algo.inlier_mask_)))) + self.inlier_mask_ = backup_filtering_algo.inlier_mask_ + +### Re-implemented from the prior POSDAP algorithm +### This does seem to use some kind of max speed class SmoothPosdap(object): def __init__(self, maxSpeed = 100): @@ -324,8 +350,8 @@ def filter(self, with_speeds_df): logging.info("len(last_segment) = %d, len(curr_segment) = %d, skipping" % (len(last_segment), len(curr_segment))) continue - get_coords = lambda i: [with_speeds_df.iloc[i]["mLongitude"], with_speeds_df.iloc[i]["mLatitude"]] - get_ts = lambda i: with_speeds_df.iloc[i]["mTime"] + get_coords = lambda i: [with_speeds_df.iloc[i]["longitude"], with_speeds_df.iloc[i]["latitude"]] + get_ts = lambda i: with_speeds_df.iloc[i]["ts"] # I don't know why they would use time instead of distance, but # this is what the existing POSDAP code does. print("About to compare curr_segment duration %s with last segment duration %s" % @@ -338,7 +364,7 @@ def filter(self, with_speeds_df): for curr_idx in curr_segment: print("Comparing distance %s with speed %s * time %s = %s" % (math.fabs(ec.calDistance(get_coords(ref_idx), get_coords(curr_idx))), - old_div(self.maxSpeed, 100), abs(get_ts(ref_idx) - get_ts(curr_idx)), + self.maxSpeed / 100, abs(get_ts(ref_idx) - get_ts(curr_idx)), self.maxSpeed / 100 * abs(get_ts(ref_idx) - get_ts(curr_idx)))) if (math.fabs(ec.calDistance(get_coords(ref_idx), get_coords(curr_idx))) > @@ -351,14 +377,16 @@ def filter(self, with_speeds_df): for curr_idx in reversed(last_segment): print("Comparing distance %s with speed %s * time %s = %s" % (math.fabs(ec.calDistance(get_coords(ref_idx), get_coords(curr_idx))), - old_div(self.maxSpeed, 1000) , abs(get_ts(ref_idx) - get_ts(curr_idx)), + self.maxSpeed / 1000, abs(get_ts(ref_idx) - get_ts(curr_idx)), self.maxSpeed / 1000 * abs(get_ts(ref_idx) - get_ts(curr_idx)))) if (abs(ec.calDistance(get_coords(ref_idx), get_coords(curr_idx))) > (self.maxSpeed / 1000 * abs(get_ts(ref_idx) - get_ts(curr_idx)))): print("Distance is greater than max speed * time, deleting %s" % curr_idx) self.inlier_mask_[curr_idx] = False last_segment = curr_segment - logging.info("Filtering complete, removed indices = %s" % np.nonzero(self.inlier_mask_)) + self.outlier_mask_ = np.logical_not(self.inlier_mask_) + logging.info("Filtering complete, retained indices = %s, removed indices = %s" % + (np.nonzero(self.inlier_mask_), np.nonzero(self.outlier_mask_))) class SmoothPiecewiseRansac(object): def __init__(self, maxSpeed = 100): diff --git a/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py b/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py index 3c6ba7db7..988c164c0 100644 --- a/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py +++ b/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py @@ -166,7 +166,7 @@ def testFilterAllClusters(self): with_speeds_df.drop(["distance", "speed", "heading"], axis="columns", inplace=True) with_speeds_df["loc"] = with_speeds_df["loc"].apply(lambda lstr: json.loads(lstr.replace("'", '"'))) filtered_points = eaicl.get_points_to_filter(with_speeds_df, outlier_algo, jump_algo) - expected_result_idx = list(itertools.chain(range(0,11), [16], range(21, 26))) + expected_result_idx = list(range(16, 21)) self.assertEqual(list(filtered_points.dropna().index), expected_result_idx) # PR to pakistan jump: case 2 of https://github.com/e-mission/e-mission-docs/issues/843 @@ -174,7 +174,7 @@ def testFilterAllClusters(self): with_speeds_df.drop(["distance", "speed", "heading"], axis="columns", inplace=True) with_speeds_df["loc"] = with_speeds_df["loc"].apply(lambda lstr: json.loads(lstr.replace("'", '"'))) filtered_points = eaicl.get_points_to_filter(with_speeds_df, outlier_algo, jump_algo) - expected_result_idx = list(itertools.chain([0], range(2,11), range(12, 14))) + expected_result_idx = [11] self.assertEqual(list(filtered_points.dropna().index), expected_result_idx) def testPointFilteringZigzag(self): diff --git a/emission/tests/data/smoothing_data/all_cluster_case_1.csv b/emission/tests/data/smoothing_data/all_cluster_case_1.csv new file mode 100644 index 000000000..8e144aa8f --- /dev/null +++ b/emission/tests/data/smoothing_data/all_cluster_case_1.csv @@ -0,0 +1,90 @@ +,accuracy,altitude,elapsedRealtimeNanos,filter,fmt_time,latitude,longitude,sensed_speed,ts,loc,local_dt_year,local_dt_month,local_dt_day,local_dt_hour,local_dt_minute,local_dt_second,local_dt_weekday,local_dt_timezone,_id,user_id,metadata_write_ts,distance,speed,heading +0,128.899,53.29421048836261,127163135000000,time,2022-10-23T08:04:55.591000-04:00,45.8308428,-88.7994027,0.0,1666526695.591,"{'type': 'Point', 'coordinates': [-88.7994027, 45.8308428]}",2022,10,23,8,4,55,6,America/New_York,63cb3b885a5b6a9f96784b1b,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666526695.896,0.0,0.0,0.0 +1,137.733,53.18022198075521,127167552000000,time,2022-10-23T08:05:00.007000-04:00,45.8308428,-88.7994027,0.0,1666526700.007,"{'type': 'Point', 'coordinates': [-88.7994027, 45.8308428]}",2022,10,23,8,5,0,6,America/New_York,63cb3b885a5b6a9f96784b19,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666526700.617,0.0,0.0,0.0 +2,142.5,53.5344055898488,127172737000000,time,2022-10-23T08:05:05.193000-04:00,45.8307925,-88.7994146,0.22008851,1666526705.193,"{'type': 'Point', 'coordinates': [-88.7994146, 45.8307925]}",2022,10,23,8,5,5,6,America/New_York,63cb3b885a5b6a9f96784b17,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666526705.719,5.695060955710427,1.0981605931970009,-169.1421124942498 +3,148.326,53.15783156166135,127177592000000,time,2022-10-23T08:05:10.048000-04:00,45.8307213,-88.7994314,0.5686664,1666526710.048,"{'type': 'Point', 'coordinates': [-88.7994314, 45.8307213]}",2022,10,23,8,5,10,6,America/New_York,63cb3b885a5b6a9f96784b14,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666526710.631,8.060643332251898,1.6602766839515208,-169.17012883198421 +4,177.64,52.95835590021149,127182601000000,time,2022-10-23T08:05:15.057000-04:00,45.8306891,-88.799439,0.6136053,1666526715.057,"{'type': 'Point', 'coordinates': [-88.799439, 45.8306891]}",2022,10,23,8,5,15,6,America/New_York,63cb3b885a5b6a9f96784b12,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666526715.65,3.645441458854956,0.72777831634311,-169.1669972535065 +5,164.4,53.013313010629886,127318936000000,time,2022-10-23T08:07:31.391000-04:00,45.8308808,-88.7993278,1.0835898,1666526851.391,"{'type': 'Point', 'coordinates': [-88.7993278, 45.8308808]}",2022,10,23,8,7,31,6,America/New_York,63cb3b885a5b6a9f96784b69,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666526851.507,23.555713987919532,0.17277945317239715,25.18728122349039 +6,173.36,52.77313271496981,127323416000000,time,2022-10-23T08:07:35.871000-04:00,45.8309204,-88.7993044,0.9846095,1666526855.871,"{'type': 'Point', 'coordinates': [-88.7993044, 45.8309204]}",2022,10,23,8,7,35,6,America/New_York,63cb3b885a5b6a9f96784b65,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666526856.533,4.882556607210083,1.0898563808979291,25.59807206031767 +7,172.1,53.00720664738434,127334000000000,time,2022-10-23T08:07:46.456000-04:00,45.8309225,-88.7993071,0.23381805,1666526866.456,"{'type': 'Point', 'coordinates': [-88.7993071, 45.8309225]}",2022,10,23,8,7,46,6,America/New_York,63cb3b885a5b6a9f96784b61,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666526866.57,0.3373040307994092,0.031866228586094385,-46.18901596335532 +8,164.4,52.8016283254618,127339048000000,time,2022-10-23T08:07:51.503000-04:00,45.8309141,-88.79931,0.05169008,1666526871.503,"{'type': 'Point', 'coordinates': [-88.79931, 45.8309141]}",2022,10,23,8,7,51,6,America/New_York,63cb3b885a5b6a9f96784b5f,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666526871.617,0.9699359334678982,0.1921806908388942,-164.363006819066 +9,177.848,52.82401780109306,127343531000000,time,2022-10-23T08:07:55.987000-04:00,45.83091,-88.7993115,0.015241567,1666526875.987,"{'type': 'Point', 'coordinates': [-88.7993115, 45.83091]}",2022,10,23,8,7,55,6,America/New_York,63cb3b885a5b6a9f96784b5c,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666526876.616,0.47553161494456236,0.10605076235130477,-163.4788445997531 +10,149.6,52.24598124640867,127389330000000,time,2022-10-23T08:08:41.785000-04:00,45.8308181,-88.7994178,0.85806763,1666526921.785,"{'type': 'Point', 'coordinates': [-88.7994178, 45.8308181]}",2022,10,23,8,8,41,6,America/New_York,63cb3b885a5b6a9f96784b4e,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666526921.919,14.009257411457014,0.3058923398773179,-136.8388449707518 +11,164.4,52.82605321090949,127394353000000,time,2022-10-23T08:08:46.809000-04:00,45.8308614,-88.7991956,1.4498969,1666526926.809,"{'type': 'Point', 'coordinates': [-88.7991956, 45.8308614]}",2022,10,23,8,8,46,6,America/New_York,63cb3b885a5b6a9f96784b4c,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666526926.941,20.602092552435682,4.100735040173635,76.48484084087761 +12,177.84,52.88508030876593,127398833000000,time,2022-10-23T08:08:51.289000-04:00,45.8308858,-88.7990803,1.8527607,1666526931.289,"{'type': 'Point', 'coordinates': [-88.7990803, 45.8308858]}",2022,10,23,8,8,51,6,America/New_York,63cb3b885a5b6a9f96784b49,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666526931.933,10.742684463679929,2.3979206290051835,75.37100376876364 +13,180.0,52.53906526393154,127404380000000,time,2022-10-23T08:08:56.835000-04:00,45.8309062,-88.7991955,0.36177522,1666526936.835,"{'type': 'Point', 'coordinates': [-88.7991955, 45.8309062]}",2022,10,23,8,8,56,6,America/New_York,63cb3b885a5b6a9f96784b47,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666526936.978,10.630248887122416,1.9167415939074337,-77.6789449059423 +14,193.53,52.26429869980723,127408890000000,time,2022-10-23T08:09:01.345000-04:00,45.8309145,-88.7992615,0.58305377,1666526941.345,"{'type': 'Point', 'coordinates': [-88.7992615, 45.8309145]}",2022,10,23,8,9,1,6,America/New_York,63cb3b885a5b6a9f96784aee,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666526941.998,6.021123998920624,1.3350607564640256,-81.182925140903 +15,110.0,61.95244162061288,127811573000000,time,2022-10-23T08:15:44.029000-04:00,45.8309706,-88.7987701,0.0,1666527344.029,"{'type': 'Point', 'coordinates': [-88.7987701, 45.8309706]}",2022,10,23,8,15,44,6,America/New_York,63cb3b885a5b6a9f96784992,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666527344.151,44.73726342730601,0.11109769304370373,81.98457976210531 +16,159.5,88.81290436328116,127972937000000,time,2022-10-23T08:18:25.431000-04:00,9.509628,-8.930897,0.0,1666527505.431,"{'type': 'Point', 'coordinates': [-8.9308967, 9.5096283]}",2022,10,23,8,18,25,6,America/New_York,63cb3b875a5b6a9f9678495e,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666527505.531,9128037.101651454,56554.671593016574,96.3714008308474 +17,70.051,88.5518573396775,127977867000000,time,2022-10-23T08:18:30.322000-04:00,9.5028254,-8.928238799999999,0.0008007188,1666527510.322,"{'type': 'Point', 'coordinates': [-8.928238799999999, 9.5028254]}",2022,10,23,8,18,30,6,America/New_York,63cb3b875a5b6a9f9678495c,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666527510.414,812.1102966572569,166.04176881483636,158.65704571802328 +18,70.491,88.5518573396775,127980286000000,time,2022-10-23T08:18:32.742000-04:00,9.502827,-8.928241,0.00045667894,1666527512.742,"{'type': 'Point', 'coordinates': [-8.928241, 9.502827]}",2022,10,23,8,18,32,6,America/New_York,63cb3b875a5b6a9f9678495a,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666527512.805,0.30247545493466854,0.12498985347053698,-53.97160054243243 +19,71.088,88.90468062277316,127982766000000,time,2022-10-23T08:18:35.222000-04:00,9.5028288,-8.9282446,0.00018203357,1666527515.222,"{'type': 'Point', 'coordinates': [-8.9282446, 9.5028288]}",2022,10,23,8,18,35,6,America/New_York,63cb3b875a5b6a9f96784958,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666527515.256,0.44753746723349735,0.18045867210331512,-63.4340859968205 +20,71.386,88.90468062277316,127984927000000,time,2022-10-23T08:18:37.383000-04:00,9.5028305,-8.9282481,0.00047943447,1666527517.383,"{'type': 'Point', 'coordinates': [-8.9282481, 9.5028305]}",2022,10,23,8,18,37,6,America/New_York,63cb3b875a5b6a9f96784956,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666527517.467,0.43264798073506094,0.20020730127811434,-64.09264428183756 +21,6.796,47.58369388276215,130913929000000,time,2022-10-23T09:07:26.414000-04:00,45.8422118,-88.795334,0.0,1666530446.414,"{'type': 'Point', 'coordinates': [-88.795334, 45.8422118]}",2022,10,23,9,7,26,6,America/New_York,63cb3b865a5b6a9f96784711,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530446.57,9128553.072150933,3116.5778278628445,-53.67251333875304 +22,6.073,67.72237584742903,130916514000000,time,2022-10-23T09:07:29-04:00,45.8422088,-88.7953356,0.4435715,1666530449.0,"{'type': 'Point', 'coordinates': [-88.7953356, 45.8422088]}",2022,10,23,9,7,29,6,America/New_York,63cb3b865a5b6a9f9678470f,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530449.162,0.36342613387562533,0.140536016512199,-156.61936494460215 +23,5.674,66.45492228348893,130918514000000,time,2022-10-23T09:07:31-04:00,45.8421905,-88.7953335,0.42822865,1666530451.0,"{'type': 'Point', 'coordinates': [-88.7953335, 45.8421905]}",2022,10,23,9,7,31,6,America/New_York,63cb3b875a5b6a9f96784827,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530451.159,2.043652411060488,1.021826205530244,174.68544372612416 +24,4.264,66.52611797272903,130921514000000,time,2022-10-23T09:07:34-04:00,45.8421544,-88.7953286,0.46453524,1666530454.0,"{'type': 'Point', 'coordinates': [-88.7953286, 45.8421544]}",2022,10,23,9,7,34,6,America/New_York,63cb3b875a5b6a9f96784825,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530454.171,4.038362756394268,1.3461209187980894,173.72097211252125 +25,3.97,66.52611797272903,130923514000000,time,2022-10-23T09:07:36-04:00,45.8421287,-88.795326,0.27370173,1666530456.0,"{'type': 'Point', 'coordinates': [-88.795326, 45.8421287]}",2022,10,23,9,7,36,6,America/New_York,63cb3b875a5b6a9f96784823,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530456.167,2.8673033798852914,1.4336516899426457,175.3116869856678 +26,3.778,66.40406829233207,130926514000000,time,2022-10-23T09:07:39-04:00,45.8420562,-88.7953427,0.076683864,1666530459.0,"{'type': 'Point', 'coordinates': [-88.7953427, 45.8420562]}",2022,10,23,9,7,39,6,America/New_York,63cb3b875a5b6a9f96784821,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530459.212,8.200967876356506,2.733655958785502,-169.4232244687499 +27,3.809,66.40406829233207,130928514000000,time,2022-10-23T09:07:41-04:00,45.8420367,-88.7953468,0.14669338,1666530461.0,"{'type': 'Point', 'coordinates': [-88.7953468, 45.8420367]}",2022,10,23,9,7,41,6,America/New_York,63cb3b875a5b6a9f9678481d,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530461.207,2.1995703436063345,1.0997851718031673,-170.3273939735529 +28,3.777,66.38982927773667,130930514000000,time,2022-10-23T09:07:43-04:00,45.8420176,-88.7953506,0.14359993,1666530463.0,"{'type': 'Point', 'coordinates': [-88.7953506, 45.8420176]}",2022,10,23,9,7,43,6,America/New_York,63cb3b875a5b6a9f9678481b,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530463.128,2.1512667627833237,1.0756333813916619,-170.83833588335696 +29,3.773,66.34914651105368,130932514000000,time,2022-10-23T09:07:45-04:00,45.8420052,-88.7953557,0.20055798,1666530465.0,"{'type': 'Point', 'coordinates': [-88.7953557, 45.8420052]}",2022,10,23,9,7,45,6,America/New_York,63cb3b875a5b6a9f96784819,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530465.14,1.4534326024139022,0.7267163012069511,-161.56129220635185 +30,3.773,66.08674736998177,130934514000000,time,2022-10-23T09:07:47-04:00,45.8419844,-88.7953515,0.121819735,1666530467.0,"{'type': 'Point', 'coordinates': [-88.7953515, 45.8419844]}",2022,10,23,9,7,47,6,America/New_York,63cb3b875a5b6a9f96784817,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530467.16,2.3436339030131266,1.1718169515065633,170.70392683829218 +31,4.233,66.39186342121064,130936514000000,time,2022-10-23T09:07:49-04:00,45.8419706,-88.795353,0.119162664,1666530469.0,"{'type': 'Point', 'coordinates': [-88.795353, 45.8419706]}",2022,10,23,9,7,49,6,America/New_York,63cb3b875a5b6a9f96784815,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530469.165,1.5404352173393703,0.7702176086696851,-174.96452956466376 +32,4.167,66.23726991221541,130938514000000,time,2022-10-23T09:07:51-04:00,45.8419726,-88.795357,0.21616264,1666530471.0,"{'type': 'Point', 'coordinates': [-88.795357, 45.8419726]}",2022,10,23,9,7,51,6,America/New_York,63cb3b875a5b6a9f96784813,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530471.159,0.4236230181154391,0.21181150905771956,-58.33369926495768 +33,4.207,66.29829332803058,130940514000000,time,2022-10-23T09:07:53-04:00,45.8419809,-88.7953693,0.22744279,1666530473.0,"{'type': 'Point', 'coordinates': [-88.7953693, 45.8419809]}",2022,10,23,9,7,53,6,America/New_York,63cb3b875a5b6a9f96784811,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530473.128,1.442567540635993,0.7212837703179965,-50.22499168215252 +34,4.245,66.36541959423155,130942514000000,time,2022-10-23T09:07:55-04:00,45.8419851,-88.7953836,0.2135423,1666530475.0,"{'type': 'Point', 'coordinates': [-88.7953836, 45.8419851]}",2022,10,23,9,7,55,6,America/New_York,63cb3b875a5b6a9f9678480f,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530475.168,1.370976733054404,0.685488366527202,-70.08371674188749 +35,4.295,66.40610243923128,130944514000000,time,2022-10-23T09:07:57-04:00,45.8419902,-88.7953947,0.20302191,1666530477.0,"{'type': 'Point', 'coordinates': [-88.7953947, 45.8419902]}",2022,10,23,9,7,57,6,America/New_York,63cb3b875a5b6a9f9678480d,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530477.168,1.1500741509572014,0.5750370754786007,-60.455882666114654 +36,4.313,66.36135132050057,130946514000000,time,2022-10-23T09:07:59-04:00,45.8419951,-88.7954038,0.02718132,1666530479.0,"{'type': 'Point', 'coordinates': [-88.7954038, 45.8419951]}",2022,10,23,9,7,59,6,America/New_York,63cb3b875a5b6a9f9678480b,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530479.151,0.9847303981362939,0.49236519906814696,-56.40602845067725 +37,4.313,67.57998510185757,130948514000000,time,2022-10-23T09:08:01-04:00,45.8419986,-88.7954143,0.012299381,1666530481.0,"{'type': 'Point', 'coordinates': [-88.7954143, 45.8419986]}",2022,10,23,9,8,1,6,America/New_York,63cb3b875a5b6a9f96784809,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530481.217,1.023346516532289,0.5116732582661445,-67.6475146748015 +38,4.333,68.19625820853027,130950514000000,time,2022-10-23T09:08:03-04:00,45.8419997,-88.7954174,0.0043466245,1666530483.0,"{'type': 'Point', 'coordinates': [-88.7954174, 45.8419997]}",2022,10,23,9,8,3,6,America/New_York,63cb3b875a5b6a9f96784807,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530483.145,0.30502711675448785,0.15251355837724392,-66.35958895975834 +39,4.279,72.15524892434355,130952514000000,time,2022-10-23T09:08:05-04:00,45.8419999,-88.7954181,0.00078042917,1666530485.0,"{'type': 'Point', 'coordinates': [-88.7954181, 45.8419999]}",2022,10,23,9,8,5,6,America/New_York,63cb3b875a5b6a9f96784805,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530485.215,0.06690138714639474,0.03345069357319737,-70.58461412322431 +40,4.1,72.1511806564856,130954514000000,time,2022-10-23T09:08:07-04:00,45.8419943,-88.7954117,0.012091182,1666530487.0,"{'type': 'Point', 'coordinates': [-88.7954117, 45.8419943]}",2022,10,23,9,8,7,6,America/New_York,63cb3b875a5b6a9f96784803,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530487.224,0.8488478030142375,0.42442390150711873,137.1867478316268 +41,3.658,72.00269021960725,130956514000000,time,2022-10-23T09:08:09-04:00,45.8419933,-88.795412,0.0067384806,1666530489.0,"{'type': 'Point', 'coordinates': [-88.795412, 45.8419933]}",2022,10,23,9,8,9,6,America/New_York,63cb3b875a5b6a9f96784801,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530489.207,0.11443582037671686,0.05721791018835843,-166.3315484884776 +42,3.669,72.19389756664256,130958514000000,time,2022-10-23T09:08:11-04:00,45.8419981,-88.7954164,0.0033967616,1666530491.0,"{'type': 'Point', 'coordinates': [-88.7954164, 45.8419981]}",2022,10,23,9,8,11,6,America/New_York,63cb3b875a5b6a9f967847ff,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530491.223,0.6649606729506455,0.33248033647532277,-36.615372538920376 +43,3.794,72.04947459635781,130960514000000,time,2022-10-23T09:08:13-04:00,45.8419995,-88.7954178,0.0016277335,1666530493.0,"{'type': 'Point', 'coordinates': [-88.7954178, 45.8419995]}",2022,10,23,9,8,13,6,America/New_York,63cb3b875a5b6a9f967847fd,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530493.196,0.20039696619313824,0.10019848309656912,-39.02942819473018 +44,3.9,72.07591805427337,130962514000000,time,2022-10-23T09:08:15-04:00,45.8419999,-88.7954182,0.00074585306,1666530495.0,"{'type': 'Point', 'coordinates': [-88.7954182, 45.8419999]}",2022,10,23,9,8,15,6,America/New_York,63cb3b875a5b6a9f967847fb,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530495.224,0.057256275764991926,0.028628137882495963,-39.02942821257541 +45,3.9,72.2610245758745,130964514000000,time,2022-10-23T09:08:17-04:00,45.842,-88.7954183,0.00021288673,1666530497.0,"{'type': 'Point', 'coordinates': [-88.7954183, 45.842]}",2022,10,23,9,8,17,6,America/New_York,63cb3b875a5b6a9f967847f8,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530497.168,0.014314068673479318,0.007157034336739659,-39.02942884582376 +46,3.9,72.07184982459458,130966514000000,time,2022-10-23T09:08:19-04:00,45.842,-88.7954183,5.8428264e-05,1666530499.0,"{'type': 'Point', 'coordinates': [-88.7954183, 45.842]}",2022,10,23,9,8,19,6,America/New_York,63cb3b875a5b6a9f967847f6,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530499.177,0.0,0.0,0.0 +47,3.9,72.14507839052925,130968514000000,time,2022-10-23T09:08:21-04:00,45.842,-88.7954183,2.7616274e-05,1666530501.0,"{'type': 'Point', 'coordinates': [-88.7954183, 45.842]}",2022,10,23,9,8,21,6,America/New_York,63cb3b875a5b6a9f967847f4,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530501.172,0.0,0.0,0.0 +48,3.9,72.1328736368831,130970514000000,time,2022-10-23T09:08:23-04:00,45.842,-88.7954183,1.1177079e-05,1666530503.0,"{'type': 'Point', 'coordinates': [-88.7954183, 45.842]}",2022,10,23,9,8,23,6,America/New_York,63cb3b875a5b6a9f967847f2,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530503.163,0.0,0.0,0.0 +49,3.9,72.11863477989888,130972514000000,time,2022-10-23T09:08:25-04:00,45.8420249,-88.7953697,0.399899,1666530505.0,"{'type': 'Point', 'coordinates': [-88.7953697, 45.8420249]}",2022,10,23,9,8,25,6,America/New_York,63cb3b875a5b6a9f967847f0,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530505.169,5.18235335853226,2.59117667926613,57.70585871194569 +50,3.9,72.08405479850599,130974514000000,time,2022-10-23T09:08:27-04:00,45.8420403,-88.7953512,0.1407344,1666530507.0,"{'type': 'Point', 'coordinates': [-88.7953512, 45.8420403]}",2022,10,23,9,8,27,6,America/New_York,63cb3b875a5b6a9f967847ee,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530507.167,2.390205087015271,1.1951025435076355,44.23991722259342 +51,3.9,71.91725854756348,130976514000000,time,2022-10-23T09:08:29-04:00,45.8420422,-88.795342,0.24813904,1666530509.0,"{'type': 'Point', 'coordinates': [-88.795342, 45.8420422]}",2022,10,23,9,8,29,6,America/New_York,63cb3b875a5b6a9f967847ec,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530509.177,0.8557627047692521,0.42788135238462605,75.70705741006257 +52,3.9,71.91929262495907,130978514000000,time,2022-10-23T09:08:31-04:00,45.8420362,-88.7953326,0.3697923,1666530511.0,"{'type': 'Point', 'coordinates': [-88.7953326, 45.8420362]}",2022,10,23,9,8,31,6,America/New_York,63cb3b875a5b6a9f967847ea,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530511.164,1.0784408501058855,0.5392204250529428,128.21708213231219 +53,3.9,72.09829335250407,130980514000000,time,2022-10-23T09:08:33-04:00,45.8420287,-88.7953254,0.050909523,1666530513.0,"{'type': 'Point', 'coordinates': [-88.7953254, 45.8420287]}",2022,10,23,9,8,33,6,America/New_York,63cb3b875a5b6a9f967847e8,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530513.155,1.0567352299335482,0.5283676149667741,142.10961964353316 +54,3.861,72.33832267155071,130982514000000,time,2022-10-23T09:08:35-04:00,45.8420224,-88.7953283,0.017461937,1666530515.0,"{'type': 'Point', 'coordinates': [-88.7953283, 45.8420224]}",2022,10,23,9,8,35,6,America/New_York,63cb3b875a5b6a9f967847e6,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530515.161,0.7477100219118905,0.37385501095594526,-159.53697803395926 +55,3.82,72.3261179423813,130984514000000,time,2022-10-23T09:08:37-04:00,45.8420247,-88.7953261,0.1674135,1666530517.0,"{'type': 'Point', 'coordinates': [-88.7953261, 45.8420247]}",2022,10,23,9,8,37,6,America/New_York,63cb3b875a5b6a9f967847e4,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530517.176,0.3236231146910528,0.1618115573455264,37.78962630080022 +56,3.288,72.1003274734625,130987514000000,time,2022-10-23T09:08:40-04:00,45.8420264,-88.7953183,0.011299177,1666530520.0,"{'type': 'Point', 'coordinates': [-88.7953183, 45.8420264]}",2022,10,23,9,8,40,6,America/New_York,63cb3b875a5b6a9f967847e2,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530520.172,0.7280482156964252,0.24268273856547506,74.95121927305783 +57,3.21,72.15321490275011,130989514000000,time,2022-10-23T09:08:42-04:00,45.8420228,-88.7953196,0.004033534,1666530522.0,"{'type': 'Point', 'coordinates': [-88.7953196, 45.8420228]}",2022,10,23,9,8,42,6,America/New_York,63cb3b875a5b6a9f967847e0,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530522.149,0.4171002617990136,0.2085501308995068,-163.6837081580924 +58,3.387,72.04744048763752,130991514000000,time,2022-10-23T09:08:44-04:00,45.8420189,-88.7953195,0.0044018053,1666530524.0,"{'type': 'Point', 'coordinates': [-88.7953195, 45.8420189]}",2022,10,23,9,8,44,6,America/New_York,63cb3b875a5b6a9f967847de,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530524.148,0.4337538827662073,0.21687694138310365,178.8092498992339 +59,3.611,72.04744048763752,130993514000000,time,2022-10-23T09:08:46-04:00,45.842017,-88.795319,0.0008030349,1666530526.0,"{'type': 'Point', 'coordinates': [-88.795319, 45.842017]}",2022,10,23,9,8,46,6,America/New_York,63cb3b875a5b6a9f967847dc,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530526.138,0.2160240743037802,0.1080120371518901,167.95788709493064 +60,3.579,72.3545964478449,130995514000000,time,2022-10-23T09:08:48-04:00,45.8420179,-88.7953205,0.14305066,1666530528.0,"{'type': 'Point', 'coordinates': [-88.7953205, 45.8420179]}",2022,10,23,9,8,48,6,America/New_York,63cb3b875a5b6a9f967847da,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530528.17,0.16821477646297245,0.08410738823148622,-53.492604376283715 +61,3.573,72.13694188614046,130997514000000,time,2022-10-23T09:08:50-04:00,45.84202,-88.795334,0.032797106,1666530530.0,"{'type': 'Point', 'coordinates': [-88.795334, 45.84202]}",2022,10,23,9,8,50,6,America/New_York,63cb3b875a5b6a9f967847d8,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530530.178,1.23907108746586,0.61953554373293,-79.13735295938787 +62,3.684,68.27729640109833,130999514000000,time,2022-10-23T09:08:52-04:00,45.8420228,-88.7953503,0.014220784,1666530532.0,"{'type': 'Point', 'coordinates': [-88.7953503, 45.8420228]}",2022,10,23,9,8,52,6,America/New_York,63cb3b875a5b6a9f967847d5,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530532.142,1.5018827239897832,0.7509413619948916,-78.03560840810977 +63,3.692,68.53018419553139,131001514000000,time,2022-10-23T09:08:54-04:00,45.8420252,-88.7953622,0.0130950445,1666530534.0,"{'type': 'Point', 'coordinates': [-88.7953622, 45.8420252]}",2022,10,23,9,8,54,6,America/New_York,63cb3b875a5b6a9f967847d3,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530534.163,1.1053466528937979,0.5526733264468989,-76.02882835469099 +64,3.742,68.25728263755926,131003514000000,time,2022-10-23T09:08:56-04:00,45.8420272,-88.7953687,0.013469782,1666530536.0,"{'type': 'Point', 'coordinates': [-88.7953687, 45.8420272]}",2022,10,23,9,8,56,6,America/New_York,63cb3b875a5b6a9f96784898,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530536.177,0.6266864864501744,0.3133432432250872,-69.21475369220359 +65,3.665,68.30610249797027,131005514000000,time,2022-10-23T09:08:58-04:00,45.8420289,-88.7953758,0.011017026,1666530538.0,"{'type': 'Point', 'coordinates': [-88.7953758, 45.8420289]}",2022,10,23,9,8,58,6,America/New_York,63cb3b875a5b6a9f96784896,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530538.149,0.667316309645128,0.333658154822564,-73.54447074281263 +66,3.615,68.07086667440574,131007514000000,time,2022-10-23T09:09:00-04:00,45.8420308,-88.7953826,0.010133888,1666530540.0,"{'type': 'Point', 'coordinates': [-88.7953826, 45.8420308]}",2022,10,23,9,9,0,6,America/New_York,63cb3b875a5b6a9f96784894,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530540.166,0.648330429474324,0.324165214737162,-70.98180656998647 +67,3.59,68.03018381961624,131009514000000,time,2022-10-23T09:09:02-04:00,45.842032,-88.7953861,0.009548559,1666530542.0,"{'type': 'Point', 'coordinates': [-88.7953861, 45.842032]}",2022,10,23,9,9,2,6,America/New_York,63cb3b875a5b6a9f96784892,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530542.172,0.3425421265866874,0.1712710632933437,-67.07410969906957 +68,3.713,67.1528272554781,131011514000000,time,2022-10-23T09:09:04-04:00,45.8420333,-88.7953889,0.006202276,1666530544.0,"{'type': 'Point', 'coordinates': [-88.7953889, 45.8420333]}",2022,10,23,9,9,4,6,America/New_York,63cb3b875a5b6a9f96784890,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530544.176,0.290852547435103,0.1454262737175515,-60.19835080790707 +69,3.787,67.23012088530619,131013514000000,time,2022-10-23T09:09:06-04:00,45.842034,-88.7953911,0.0054347673,1666530546.0,"{'type': 'Point', 'coordinates': [-88.7953911, 45.842034]}",2022,10,23,9,9,6,6,America/New_York,63cb3b875a5b6a9f9678488e,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530546.178,0.21303336319656146,0.10651668159828073,-68.56951465011271 +70,3.821,66.32952382876933,131015514000000,time,2022-10-23T09:09:08-04:00,45.8420348,-88.7953926,0.00554405,1666530548.0,"{'type': 'Point', 'coordinates': [-88.7953926, 45.8420348]}",2022,10,23,9,9,8,6,America/New_York,63cb3b875a5b6a9f9678488c,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530548.143,0.16184644013672972,0.08092322006836486,-56.658246614067004 +71,3.773,66.21764701456404,131017514000000,time,2022-10-23T09:09:10-04:00,45.8420361,-88.7953858,0.068608545,1666530550.0,"{'type': 'Point', 'coordinates': [-88.7953858, 45.8420361]}",2022,10,23,9,9,10,6,America/New_York,63cb3b875a5b6a9f9678488a,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530550.204,0.6297561545848783,0.31487807729243916,76.73008379433972 +72,3.707,65.96948887577125,131019707000000,time,2022-10-23T09:09:12.162000-04:00,45.8420408,-88.7953555,0.06666976,1666530552.162,"{'type': 'Point', 'coordinates': [-88.7953555, 45.8420408]}",2022,10,23,9,9,12,6,America/New_York,63cb3b875a5b6a9f967847d0,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530552.263,2.7807465386096566,1.286191773662977,79.16734622724339 +73,3.722,66.43530403238086,131022187000000,time,2022-10-23T09:09:14.643000-04:00,45.8420468,-88.7953367,0.009156192,1666530554.643,"{'type': 'Point', 'coordinates': [-88.7953367, 45.8420468]}",2022,10,23,9,9,14,6,America/New_York,63cb3b875a5b6a9f967847ce,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530554.732,1.8212063767922617,0.7340614332911615,68.51030076668053 +74,3.816,66.26646849896474,131024514000000,time,2022-10-23T09:09:17-04:00,45.8420501,-88.7953228,0.042673808,1666530557.0,"{'type': 'Point', 'coordinates': [-88.7953228, 45.8420501]}",2022,10,23,9,9,17,6,America/New_York,63cb3b875a5b6a9f967847cc,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530557.094,1.3055520249344932,0.5539040995340211,73.676297123126 +75,3.959,66.03661403186763,131026514000000,time,2022-10-23T09:09:19-04:00,45.8420516,-88.7953116,0.042424865,1666530559.0,"{'type': 'Point', 'coordinates': [-88.7953116, 45.8420516]}",2022,10,23,9,9,19,6,America/New_York,63cb3b875a5b6a9f967847ca,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530559.135,1.0232358048817194,0.5116179024408597,80.6186455439939 +76,4.078,65.96948842791869,131028514000000,time,2022-10-23T09:09:21-04:00,45.8420527,-88.7953034,0.04895465,1666530561.0,"{'type': 'Point', 'coordinates': [-88.7953034, 45.8420527]}",2022,10,23,9,9,21,6,America/New_York,63cb3b875a5b6a9f967847c8,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530561.103,0.7491871678316387,0.3745935839158194,80.60366383794012 +77,3.887,65.92880650312267,131030514000000,time,2022-10-23T09:09:23-04:00,45.8420546,-88.795296,0.042309474,1666530563.0,"{'type': 'Point', 'coordinates': [-88.795296, 45.8420546]}",2022,10,23,9,9,23,6,America/New_York,63cb3b875a5b6a9f967847c6,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530563.113,0.6996831853331021,0.34984159266655107,72.42514769722138 +78,3.71,66.0427163859303,131032514000000,time,2022-10-23T09:09:25-04:00,45.8420555,-88.7952912,0.03168842,1666530565.0,"{'type': 'Point', 'coordinates': [-88.7952912, 45.8420555]}",2022,10,23,9,9,25,6,America/New_York,63cb3b875a5b6a9f967847c4,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530565.115,0.4440873438703771,0.22204367193518856,76.97649170593833 +79,3.502,66.08543298771778,131034514000000,time,2022-10-23T09:09:27-04:00,45.8420565,-88.7952896,0.009615864,1666530567.0,"{'type': 'Point', 'coordinates': [-88.7952896, 45.8420565]}",2022,10,23,9,9,27,6,America/New_York,63cb3b865a5b6a9f9678470b,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530567.086,0.1821102457805075,0.09105512289025375,52.367731514397526 +80,3.9,66.07933115641157,131069723000000,time,2022-10-23T09:10:02.209000-04:00,45.8420783,-88.7952383,0.07154639,1666530602.209,"{'type': 'Point', 'coordinates': [-88.7952383, 45.8420783]}",2022,10,23,9,10,2,6,America/New_York,63cb3b865a5b6a9f96784709,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530602.363,5.22094972937579,0.14828452137157072,62.33552171122507 +81,193.074,65.96948821500088,131101252000000,time,2022-10-23T09:10:33.707000-04:00,45.8420783,-88.7952383,0.0,1666530633.707,"{'type': 'Point', 'coordinates': [-88.7952383, 45.8420783]}",2022,10,23,9,10,33,6,America/New_York,63cb3b865a5b6a9f96784706,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530634.341,0.0,0.0,0.0 +82,4.8,65.80472784168754,131139137000000,time,2022-10-23T09:11:11.624000-04:00,45.8420783,-88.7952367,0.0002839967,1666530671.624,"{'type': 'Point', 'coordinates': [-88.7952367, 45.8420783]}",2022,10,23,9,11,11,6,America/New_York,63cb3b865a5b6a9f96784702,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530671.732,0.14422142099505092,0.00380360842853236,89.99999943799433 +83,190.938,65.5016581127731,131170160000000,time,2022-10-23T09:11:42.616000-04:00,45.8420783,-88.7952367,0.0,1666530702.616,"{'type': 'Point', 'coordinates': [-88.7952367, 45.8420783]}",2022,10,23,9,11,42,6,America/New_York,63cb3b865a5b6a9f96784700,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530703.268,0.0,0.0,0.0 +84,96.0,65.5016581127731,131172394000000,time,2022-10-23T09:11:44.880000-04:00,45.8420738,-88.7952367,0.0007636664,1666530704.88,"{'type': 'Point', 'coordinates': [-88.7952367, 45.8420738]}",2022,10,23,9,11,44,6,America/New_York,63cb3b865a5b6a9f967846fc,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666530704.991,0.5003771694272708,0.22101463349069728,180.0 +85,11.5,65.381649324433,131614132000000,time,2022-10-23T09:19:06.676000-04:00,45.84236,-88.7956533,0.56670916,1666531146.676,"{'type': 'Point', 'coordinates': [-88.7956533, 45.84236]}",2022,10,23,9,19,6,6,America/New_York,63cb3b865a5b6a9f967846d4,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666531146.742,49.222839277221304,0.1114153122183005,-49.719474065415795 +86,8.9,65.13961590124254,131652199000000,time,2022-10-23T09:19:44.704000-04:00,45.8422401,-88.795257,0.6320324,1666531184.704,"{'type': 'Point', 'coordinates': [-88.795257, 45.8422401]}",2022,10,23,9,19,44,6,America/New_York,63cb3b865a5b6a9f967846cf,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666531184.833,38.1286300828333,1.0026462134539453,110.46674928665514 +87,194.594,65.27182052953847,131683148000000,time,2022-10-23T09:20:15.604000-04:00,45.84224,-88.7952567,0.0,1666531215.604,"{'type': 'Point', 'coordinates': [-88.7952567, 45.84224]}",2022,10,23,9,20,15,6,America/New_York,63cb3b865a5b6a9f967846c8,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666531216.37,0.029238396150981973,0.0009462264097327765,112.35254384305428 +88,12.7,65.4447066248119,131741784000000,time,2022-10-23T09:21:14.282000-04:00,45.8422833,-88.7955132,0.17825896,1666531274.282,"{'type': 'Point', 'coordinates': [-88.7955132, 45.8422833]}",2022,10,23,9,21,14,6,America/New_York,63cb3b865a5b6a9f967846c1,7883908a-2d2e-4b2f-8ae1-3bbe8e1011ed,1666531274.434,23.616448092034798,0.40247534174276645,-78.23643055631823 diff --git a/emission/tests/data/smoothing_data/all_cluster_case_2.csv b/emission/tests/data/smoothing_data/all_cluster_case_2.csv new file mode 100644 index 000000000..19cae15ab --- /dev/null +++ b/emission/tests/data/smoothing_data/all_cluster_case_2.csv @@ -0,0 +1,22 @@ +,accuracy,altitude,elapsedRealtimeNanos,filter,fmt_time,latitude,longitude,sensed_speed,ts,loc,local_dt_year,local_dt_month,local_dt_day,local_dt_hour,local_dt_minute,local_dt_second,local_dt_weekday,local_dt_timezone,_id,user_id,metadata_write_ts,distance,speed,heading +0,11.108,4.099999904632568,480279189934473,time,2022-12-27T19:07:29.865000-04:00,28.7149459,-79.9660452,0.0,1672182449.865,"{'type': 'Point', 'coordinates': [-79.9660452, 28.7149459]}",2022,12,27,19,7,29,1,America/Santo_Domingo,63abd6b780ea0c622c07a524,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672182449.938,0.0,0.0,0.0 +1,135.6,2.5999999046325684,480297069000000,time,2022-12-27T19:07:47.745000-04:00,28.7150861,-79.9657915,0.0,1672182467.745,"{'type': 'Point', 'coordinates': [-79.9657915, 28.7150861]}",2022,12,27,19,7,47,1,America/Santo_Domingo,63abd6b780ea0c14d363f6aa,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672182469.317,30.93405372624263,1.7300925022782692,59.73761371123961 +2,8.817,2.5999999046325684,480310185031752,time,2022-12-27T19:08:00.861000-04:00,28.7149445,-79.9660444,0.4428903,1672182480.861,"{'type': 'Point', 'coordinates': [-79.9660444, 28.7149445]}",2022,12,27,19,8,0,1,America/Santo_Domingo,63abd6b780ea0c4fbb3a860b,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672182480.941,30.940241414406355,2.358969274204307,-120.58992854600628 +3,6.946,2.5999999046325684,480340187793720,time,2022-12-27T19:08:30.864000-04:00,28.7149431,-79.9660446,0.0,1672182510.864,"{'type': 'Point', 'coordinates': [-79.9660446, 28.7149431]}",2022,12,27,19,8,30,1,America/Santo_Domingo,63abd6b780ea0c4fbb3a860d,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672182510.935,0.15709139600600291,0.005235856277577252,-172.29445749969736 +4,6.946,4.099999904632568,480349110000000,time,2022-12-27T19:08:39.786000-04:00,28.7149431,-79.9660446,0.0,1672182519.786,"{'type': 'Point', 'coordinates': [-79.9660446, 28.7149431]}",2022,12,27,19,8,39,1,America/Santo_Domingo,63abd6b780ea0c14d363f6ac,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672182520.698,0.0,0.0,0.0 +5,11.905,4.099999904632568,480369184157251,time,2022-12-27T19:08:59.860000-04:00,28.7149424,-79.9660454,0.0,1672182539.86,"{'type': 'Point', 'coordinates': [-79.9660454, 28.7149424]}",2022,12,27,19,8,59,1,America/Santo_Domingo,63abd6b780ea0c14d363f6ae,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672182539.936,0.11470398098794221,0.005714057072016616,-132.73320072472342 +6,8.54,4.099999904632568,480400173519739,time,2022-12-27T19:09:30.849000-04:00,28.7149425,-79.9660461,0.0,1672182570.849,"{'type': 'Point', 'coordinates': [-79.9660461, 28.7149425]}",2022,12,27,19,9,30,1,America/Santo_Domingo,63abd6b780ea0c14d363f6af,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672182570.909,0.07455484590077509,0.0024058487109444326,-81.42261064849932 +7,11.309,4.099999904632568,480429197047019,time,2022-12-27T19:09:59.873000-04:00,28.7149429,-79.9660491,0.0,1672182599.873,"{'type': 'Point', 'coordinates': [-79.9660491, 28.7149429]}",2022,12,27,19,9,59,1,America/Santo_Domingo,63abd6b780ea0c622c07a526,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672182599.954,0.3190623999804654,0.010993054050312367,-81.98674926852927 +8,5.591,4.099999904632568,480458185770029,time,2022-12-27T19:10:28.862000-04:00,28.7149428,-79.9660528,0.0,1672182628.862,"{'type': 'Point', 'coordinates': [-79.9660528, 28.7149428]}",2022,12,27,19,10,28,1,America/Santo_Domingo,63abd6b780ea0c4fbb3a8611,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672182628.935,0.38982662025670806,0.013447397949353424,-91.63453739493009 +9,5.389,0.34009138558467455,480489175997205,time,2022-12-27T19:10:59.851000-04:00,28.714942,-79.9660599,0.0,1672182659.851,"{'type': 'Point', 'coordinates': [-79.9660599, 28.714942]}",2022,12,27,19,10,59,1,America/Santo_Domingo,63abd6b780ea0c622c07a52a,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672182659.909,0.7530140866899844,0.024299399293202267,-96.7843736068738 +10,31.712,-0.6652531603251415,480521176883859,time,2022-12-27T19:11:31.852000-04:00,28.7149404,-79.9660618,0.0,1672182691.852,"{'type': 'Point', 'coordinates': [-79.9660618, 28.7149404]}",2022,12,27,19,11,31,1,America/Santo_Domingo,63abd6b780ea0c4fbb3a8612,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672182691.923,0.26775466865075986,0.008367071943289239,-131.64080107020246 +11,20.0,470.8999938964844,502531939000000,time,2022-12-28T01:18:22.615000-04:00,43.652812,63.0870846,0.0,1672204702.615,"{'type': 'Point', 'coordinates': [63.0870846, 43.652812]}",2022,12,28,1,18,22,2,America/Santo_Domingo,63abd6ba80ea0c4fbb3a865a,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672204703.506,12997548.695653537,590.5087749864364,34.124727336970636 +12,16.302,470.8999938964844,502535068789045,time,2022-12-28T01:18:25.744000-04:00,28.7149097,-79.966215,0.0,1672204705.744,"{'type': 'Point', 'coordinates': [-79.966215, 28.7149097]}",2022,12,28,1,18,25,2,America/Santo_Domingo,63abd6ba80ea0c4fbb3a865c,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672204705.831,12997560.572871989,4153902.4566287547,-39.665658379961364 +13,55.202,470.8999938964844,502565932000000,time,2022-12-28T01:18:56.608000-04:00,28.7149097,-79.966215,0.0,1672204736.608,"{'type': 'Point', 'coordinates': [-79.966215, 28.7149097]}",2022,12,28,1,18,56,2,America/Santo_Domingo,63abd6ba80ea0c119978254e,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672204736.904,0.0,0.0,0.0 +14,5.638,470.8999938964844,502571007097104,time,2022-12-28T01:19:01.683000-04:00,28.7149108,-79.9661439,0.25195774,1672204741.683,"{'type': 'Point', 'coordinates': [-79.9661439, 28.7149108]}",2022,12,28,1,19,1,2,America/Santo_Domingo,63abd6ba80ea0c14d363f73d,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672204741.752,7.4889449364742,1.4756541608097589,89.06415428544567 +15,7.963,470.8999938964844,502600023932510,time,2022-12-28T01:19:30.699000-04:00,28.714911,-79.9661367,0.0,1672204770.699,"{'type': 'Point', 'coordinates': [-79.9661367, 28.714911]}",2022,12,28,1,19,30,2,America/Santo_Domingo,63abd6ba80ea0c14d363f73f,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672204770.775,0.7585990582025954,0.026144164033227436,88.32008300865206 +16,5.417,470.8999938964844,502631006269842,time,2022-12-28T01:20:01.682000-04:00,28.7149105,-79.9661342,0.0,1672204801.682,"{'type': 'Point', 'coordinates': [-79.9661342, 28.7149105]}",2022,12,28,1,20,1,2,America/Santo_Domingo,63abd6ba80ea0c14d363f740,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672204801.763,0.2690953405979625,0.008685257730048104,101.92369210435568 +17,7.663,470.8999938964844,502634001000000,time,2022-12-28T01:20:04.677000-04:00,28.7149105,-79.9661342,0.0,1672204804.677,"{'type': 'Point', 'coordinates': [-79.9661342, 28.7149105]}",2022,12,28,1,20,4,2,America/Santo_Domingo,63abd6ba80ea0c14d363f742,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672204807.212,0.0,0.0,0.0 +18,15.821,470.8999938964844,502661014797122,time,2022-12-28T01:20:31.690000-04:00,28.7149084,-79.9661334,0.0,1672204831.69,"{'type': 'Point', 'coordinates': [-79.9661334, 28.7149084]}",2022,12,28,1,20,31,2,America/Santo_Domingo,63abd6ba80ea0c1199782551,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672204831.773,0.24824404924316756,0.009189799323955083,160.16003830923796 +19,29.714,470.8999938964844,502670277000000,time,2022-12-28T01:20:40.953000-04:00,28.7149084,-79.9661334,0.0,1672204840.953,"{'type': 'Point', 'coordinates': [-79.9661334, 28.7149084]}",2022,12,28,1,20,40,2,America/Santo_Domingo,63abd6ba80ea0c1199782554,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672204841.19,0.0,0.0,0.0 +20,14.267,470.8999938964844,502691009146434,time,2022-12-28T01:21:01.685000-04:00,28.7149081,-79.9661225,0.0,1672204861.685,"{'type': 'Point', 'coordinates': [-79.9661225, 28.7149081]}",2022,12,28,1,21,1,2,America/Santo_Domingo,63abd6ba80ea0c4fbb3a8661,10ae5adb-4842-4107-b6c0-8c4dc94f0f32,1672204861.763,1.148425690744591,0.05539386927058889,91.66451069649526 From cebb81fac17d1e4684d3b76754ecabaa49af1a57 Mon Sep 17 00:00:00 2001 From: Shankari Date: Tue, 24 Jan 2023 10:41:48 -0800 Subject: [PATCH 4/9] Move the first round check and the backup algo code to the location smoothing file This addresses a long-term TODO https://github.com/e-mission/e-mission-server/blob/master/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py#L262 It also: - ensures that the individual algorithms are clean and modular and don't depend on other algorithms - we can swap in any algorithm for the backup algo - we can support more complex backups in the future Testing done: - modified the test to pass in the backup algo - tests pass --- .../cleaning_methods/jump_smoothing.py | 35 -------------- .../intake/cleaning/location_smoothing.py | 47 +++++++++++++++++-- .../intakeTests/TestLocationSmoothing.py | 5 +- 3 files changed, 47 insertions(+), 40 deletions(-) diff --git a/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py b/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py index cc8ab5523..0c60cbe73 100644 --- a/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py +++ b/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py @@ -259,41 +259,6 @@ def filter(self, with_speeds_df): logging.debug("after setting values, outlier_mask = %s" % np.nonzero((self.inlier_mask_ == False).to_numpy())) # logging.debug("point details are %s" % with_speeds_df[np.logical_not(self.inlier_mask_)]) - # TODO: This is not the right place for this - adds too many dependencies - # Should do this in the outer class in general so that we can do - # multiple passes of any filtering algorithm - import emission.analysis.intake.cleaning.cleaning_methods.speed_outlier_detection as cso - import emission.analysis.intake.cleaning.location_smoothing as ls - - recomputed_speeds_df = ls.recalc_speed(self.with_speeds_df[self.inlier_mask_]) - recomputed_threshold = cso.BoxplotOutlier(ignore_zeros = True).get_threshold(recomputed_speeds_df) - logging.info("After first round, recomputed max = %s, recomputed threshold = %s" % - (recomputed_speeds_df.speed.max(), recomputed_threshold)) - # assert recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0, "After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold] - if recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] != 0: - logging.info("After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold]) - MACH1 = 340.29 - if recomputed_speeds_df.speed.max() > MACH1: - backup_filtering_algo = SmoothPosdap(MACH1) - backup_filtering_algo.filter(with_speeds_df) - - recomputed_speeds_df = ls.recalc_speed(self.with_speeds_df[backup_filtering_algo.inlier_mask_]) - recomputed_threshold = cso.BoxplotOutlier(ignore_zeros = True).get_threshold(recomputed_speeds_df) - logging.info("After second round, max = %s, recomputed threshold = %s" % - (recomputed_speeds_df.speed.max(), recomputed_threshold)) - # assert recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0, "After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold] - if recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0: - logging.info("After second round, no outliers, returning backup mask %s" % backup_filtering_algo.inlier_mask_) - self.inlier_mask_ = backup_filtering_algo.inlier_mask_ - else: - logging.info("After second round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold]) - if recomputed_speeds_df.speed.max() > MACH1: - logging.info("And they are also > %s, backup algo also failed" % MACH1) - else: - logging.debug("But they are all < %s, so returning outliers %s" % - (MACH1, np.nonzero(np.logical_not(backup_filtering_algo.inlier_mask_)))) - self.inlier_mask_ = backup_filtering_algo.inlier_mask_ - ### Re-implemented from the prior POSDAP algorithm ### This does seem to use some kind of max speed diff --git a/emission/analysis/intake/cleaning/location_smoothing.py b/emission/analysis/intake/cleaning/location_smoothing.py index 09e2b40b8..e696f11ef 100644 --- a/emission/analysis/intake/cleaning/location_smoothing.py +++ b/emission/analysis/intake/cleaning/location_smoothing.py @@ -43,6 +43,7 @@ # This is what we use in the segmentation code to see if the points are "the same" DEFAULT_SAME_POINT_DISTANCE = 100 +MACH1 = 340.29 def recalc_speed(points_df): """ @@ -139,9 +140,10 @@ def filter_jumps(user_id, section_id): logging.debug("Found iOS section, filling in gaps with fake data") section_points_df = _ios_fill_fake_data(section_points_df) filtering_algo = eaicj.SmoothZigzag(is_ios, DEFAULT_SAME_POINT_DISTANCE) + backup_filtering_algo = eaicj.SmoothPosdap(MACH1) logging.debug("len(section_points_df) = %s" % len(section_points_df)) - points_to_ignore_df = get_points_to_filter(section_points_df, outlier_algo, filtering_algo) + points_to_ignore_df = get_points_to_filter(section_points_df, outlier_algo, filtering_algo, backup_filtering_algo) if points_to_ignore_df is None: # There were no points to delete return @@ -163,7 +165,7 @@ def filter_jumps(user_id, section_id): result_entry = ecwe.Entry.create_entry(user_id, "analysis/smoothing", filter_result) ts.insert(result_entry) -def get_points_to_filter(section_points_df, outlier_algo, filtering_algo): +def get_points_to_filter(section_points_df, outlier_algo, filtering_algo, backup_filtering_algo): """ From the incoming dataframe, filter out large jumps using the specified outlier detection algorithm and the specified filtering algorithm. @@ -187,7 +189,46 @@ def get_points_to_filter(section_points_df, outlier_algo, filtering_algo): if filtering_algo is not None: try: filtering_algo.filter(with_speeds_df) - to_delete_mask = np.logical_not(filtering_algo.inlier_mask_) + recomputed_speeds_df = recalc_speed(with_speeds_df[filtering_algo.inlier_mask_]) + recomputed_threshold = outlier_algo.get_threshold(recomputed_speeds_df) + logging.info("After first round, recomputed max = %s, recomputed threshold = %s" % + (recomputed_speeds_df.speed.max(), recomputed_threshold)) + # assert recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0, "After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold] + if recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0: + logging.info("No outliers after first round, default algo worked, to_delete = %s" % + np.nonzero(np.logical_not(filtering_algo.inlier_mask_))) + sel_inlier_mask = filtering_algo.inlier_mask + else: + logging.info("After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold]) + if backup_filtering_algo is None or recomputed_speeds_df.speed.max() < MACH1: + logging.debug("backup algo is %s, max < MACH1 %s, so returning default algo outliers %s" % + (MACH1, np.nonzero(np.logical_not(filtering_algo.inlier_mask_)))) + sel_inlier_mask_ = filtering_algo.inlier_mask_ + else: + backup_filtering_algo.filter(with_speeds_df) + recomputed_speeds_df = recalc_speed(with_speeds_df[backup_filtering_algo.inlier_mask_]) + recomputed_threshold = outlier_algo.get_threshold(recomputed_speeds_df) + logging.info("After second round, max = %s, recomputed threshold = %s" % + (recomputed_speeds_df.speed.max(), recomputed_threshold)) + # assert recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0, "After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold] + if recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0: + logging.info("After second round, no outliers, returning backup to delete %s" % + np.nonzero(np.logical_not(backup_filtering_algo.inlier_mask_))) + sel_inlier_mask_ = backup_filtering_algo.inlier_mask_ + else: + logging.info("After second round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold]) + if recomputed_speeds_df.speed.max() < MACH1: + logging.debug("But they are all < %s, so returning backup to delete %s" % + (MACH1, np.nonzero(np.logical_not(backup_filtering_algo.inlier_mask_)))) + sel_inlier_mask_ = backup_filtering_algo.inlier_mask_ + else: + logging.info("And they are also > %s, backup algo also failed, returning default to delete = %s" % + (MACH1, np.nonzero(np.logical_not(filtering_algo.inlier_mask_)))) + sel_inlier_mask_ = filtering_algo.inlier_mask_ + + to_delete_mask = np.logical_not(sel_inlier_mask_) + logging.info("After all checks, inlier mask = %s, outlier_mask = %s" % + (np.nonzero(sel_inlier_mask_), np.nonzero(np.logical_not(sel_inlier_mask_)))) return with_speeds_df[to_delete_mask] except Exception as e: logging.exception("Caught error %s while processing section, skipping..." % e) diff --git a/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py b/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py index 988c164c0..08e0d2ee8 100644 --- a/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py +++ b/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py @@ -160,12 +160,13 @@ def testFilterAllClusters(self): outlier_algo = eaics.BoxplotOutlier() jump_algo = eaicj.SmoothZigzag(False, 100) + backup_algo = eaicj.SmoothPosdap(eaicl.MACH1) # US to ocean jump: case 1 of https://github.com/e-mission/e-mission-docs/issues/843 with_speeds_df = pd.read_csv("emission/tests/data/smoothing_data/all_cluster_case_1.csv", index_col=0) with_speeds_df.drop(["distance", "speed", "heading"], axis="columns", inplace=True) with_speeds_df["loc"] = with_speeds_df["loc"].apply(lambda lstr: json.loads(lstr.replace("'", '"'))) - filtered_points = eaicl.get_points_to_filter(with_speeds_df, outlier_algo, jump_algo) + filtered_points = eaicl.get_points_to_filter(with_speeds_df, outlier_algo, jump_algo, backup_algo) expected_result_idx = list(range(16, 21)) self.assertEqual(list(filtered_points.dropna().index), expected_result_idx) @@ -173,7 +174,7 @@ def testFilterAllClusters(self): with_speeds_df = pd.read_csv("emission/tests/data/smoothing_data/all_cluster_case_2.csv") with_speeds_df.drop(["distance", "speed", "heading"], axis="columns", inplace=True) with_speeds_df["loc"] = with_speeds_df["loc"].apply(lambda lstr: json.loads(lstr.replace("'", '"'))) - filtered_points = eaicl.get_points_to_filter(with_speeds_df, outlier_algo, jump_algo) + filtered_points = eaicl.get_points_to_filter(with_speeds_df, outlier_algo, jump_algo, backup_algo) expected_result_idx = [11] self.assertEqual(list(filtered_points.dropna().index), expected_result_idx) From 0f1b24a9594bc6d18f874efbd31ff14b6dcad99d Mon Sep 17 00:00:00 2001 From: Shankari Date: Tue, 24 Jan 2023 11:27:15 -0800 Subject: [PATCH 5/9] Added unit test for `None` backup algo + unify algo outputs Added a new unit test for the case of `backup_algo == None`, which should return the original algo results. While testing, found that the ZigZag algo returns a pandas Series, while the Posdap algo returns a numpy array, which means that combining them could be problematic Changed ZigZag to also return a numpy array to unify the implementations. Testing done: - All tests now pass --- .../cleaning/cleaning_methods/jump_smoothing.py | 3 ++- .../analysis/intake/cleaning/location_smoothing.py | 7 ++++--- .../intakeTests/TestLocationSmoothing.py | 11 +++++++++++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py b/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py index 0c60cbe73..a7ae40c61 100644 --- a/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py +++ b/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py @@ -256,7 +256,8 @@ def filter(self, with_speeds_df): for segment in bad_segments: self.inlier_mask_[segment.start:segment.end] = False - logging.debug("after setting values, outlier_mask = %s" % np.nonzero((self.inlier_mask_ == False).to_numpy())) + self.inlier_mask_ = self.inlier_mask_.to_numpy() + logging.debug("after setting values, outlier_mask = %s" % np.nonzero(np.logical_not(self.inlier_mask_))) # logging.debug("point details are %s" % with_speeds_df[np.logical_not(self.inlier_mask_)]) diff --git a/emission/analysis/intake/cleaning/location_smoothing.py b/emission/analysis/intake/cleaning/location_smoothing.py index e696f11ef..54dc85aed 100644 --- a/emission/analysis/intake/cleaning/location_smoothing.py +++ b/emission/analysis/intake/cleaning/location_smoothing.py @@ -188,6 +188,7 @@ def get_points_to_filter(section_points_df, outlier_algo, filtering_algo, backup logging.debug("maxSpeed = %s" % filtering_algo.maxSpeed) if filtering_algo is not None: try: + sel_inlier_mask_ = [True] * with_speeds_df.shape[0] filtering_algo.filter(with_speeds_df) recomputed_speeds_df = recalc_speed(with_speeds_df[filtering_algo.inlier_mask_]) recomputed_threshold = outlier_algo.get_threshold(recomputed_speeds_df) @@ -197,12 +198,12 @@ def get_points_to_filter(section_points_df, outlier_algo, filtering_algo, backup if recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0: logging.info("No outliers after first round, default algo worked, to_delete = %s" % np.nonzero(np.logical_not(filtering_algo.inlier_mask_))) - sel_inlier_mask = filtering_algo.inlier_mask + sel_inlier_mask = filtering_algo.inlier_mask_ else: logging.info("After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold]) if backup_filtering_algo is None or recomputed_speeds_df.speed.max() < MACH1: - logging.debug("backup algo is %s, max < MACH1 %s, so returning default algo outliers %s" % - (MACH1, np.nonzero(np.logical_not(filtering_algo.inlier_mask_)))) + logging.debug("backup algo is %s, max < MACH1, so returning default algo outliers %s" % + (backup_filtering_algo, np.nonzero(np.logical_not(filtering_algo.inlier_mask_)))) sel_inlier_mask_ = filtering_algo.inlier_mask_ else: backup_filtering_algo.filter(with_speeds_df) diff --git a/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py b/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py index 08e0d2ee8..9499e50ec 100644 --- a/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py +++ b/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py @@ -162,6 +162,17 @@ def testFilterAllClusters(self): jump_algo = eaicj.SmoothZigzag(False, 100) backup_algo = eaicj.SmoothPosdap(eaicl.MACH1) + # basic check that if the backup algo is not specified, we return the original values + with_speeds_df = pd.read_csv("emission/tests/data/smoothing_data/all_cluster_case_2.csv") + with_speeds_df.drop(["distance", "speed", "heading"], axis="columns", inplace=True) + with_speeds_df["loc"] = with_speeds_df["loc"].apply(lambda lstr: json.loads(lstr.replace("'", '"'))) + filtered_points = eaicl.get_points_to_filter(with_speeds_df, outlier_algo, jump_algo, None) + # original values, inserted in + # https://github.com/e-mission/e-mission-server/pull/897/commits/434a9a19b7f41ae868102e0154df95db8ec633c4 + # removed in https://github.com/e-mission/e-mission-server/pull/897/commits/67f5c86206e41168c6f3664fa5a2d4152d9d4091 + expected_result_idx = list(itertools.chain([0], range(2,11), range(12, 14))) + self.assertEqual(list(filtered_points.dropna().index), expected_result_idx) + # US to ocean jump: case 1 of https://github.com/e-mission/e-mission-docs/issues/843 with_speeds_df = pd.read_csv("emission/tests/data/smoothing_data/all_cluster_case_1.csv", index_col=0) with_speeds_df.drop(["distance", "speed", "heading"], axis="columns", inplace=True) From 988871d77f80bd89e4040d5cec7d6021dd144a2f Mon Sep 17 00:00:00 2001 From: Shankari Date: Tue, 24 Jan 2023 11:52:55 -0800 Subject: [PATCH 6/9] :art: Return and record the selected algo correctly Before this change, we only used one algorithm, so we hardcoded it into the result. However, we can now use either the main algorithm or the backup algorithm. So we return the algo also from `get_points_to_filter` and attribute it correctly. `get_points_to_filter` is used only in `location_smoothing` and in the tests. So also fix the tests to read both values and check the sel algo in each case Testing done: tests pass --- .../intake/cleaning/location_smoothing.py | 25 +++++++++---------- .../intakeTests/TestLocationSmoothing.py | 12 ++++++--- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/emission/analysis/intake/cleaning/location_smoothing.py b/emission/analysis/intake/cleaning/location_smoothing.py index 54dc85aed..338422e69 100644 --- a/emission/analysis/intake/cleaning/location_smoothing.py +++ b/emission/analysis/intake/cleaning/location_smoothing.py @@ -143,13 +143,13 @@ def filter_jumps(user_id, section_id): backup_filtering_algo = eaicj.SmoothPosdap(MACH1) logging.debug("len(section_points_df) = %s" % len(section_points_df)) - points_to_ignore_df = get_points_to_filter(section_points_df, outlier_algo, filtering_algo, backup_filtering_algo) + (sel_algo, points_to_ignore_df) = get_points_to_filter(section_points_df, outlier_algo, filtering_algo, backup_filtering_algo) if points_to_ignore_df is None: # There were no points to delete return points_to_ignore_df_filtered = points_to_ignore_df._id.dropna() - logging.debug("after filtering ignored points, %s -> %s" % - (len(points_to_ignore_df), len(points_to_ignore_df_filtered))) + logging.debug("after filtering ignored points, using %s, %s -> %s" % + (sel_algo, len(points_to_ignore_df), len(points_to_ignore_df_filtered))) # We shouldn't really filter any fuzzed points because they represent 100m in 60 secs # but let's actually check for that # assert len(points_to_ignore_df) == len(points_to_ignore_df_filtered) @@ -160,7 +160,7 @@ def filter_jumps(user_id, section_id): filter_result.section = section_id filter_result.deleted_points = deleted_point_id_list filter_result.outlier_algo = "BoxplotOutlier" - filter_result.filtering_algo = "SmoothZigzag" + filter_result.filtering_algo = sel_algo.__class__.__name__.split(".")[-1] result_entry = ecwe.Entry.create_entry(user_id, "analysis/smoothing", filter_result) ts.insert(result_entry) @@ -188,7 +188,6 @@ def get_points_to_filter(section_points_df, outlier_algo, filtering_algo, backup logging.debug("maxSpeed = %s" % filtering_algo.maxSpeed) if filtering_algo is not None: try: - sel_inlier_mask_ = [True] * with_speeds_df.shape[0] filtering_algo.filter(with_speeds_df) recomputed_speeds_df = recalc_speed(with_speeds_df[filtering_algo.inlier_mask_]) recomputed_threshold = outlier_algo.get_threshold(recomputed_speeds_df) @@ -198,13 +197,13 @@ def get_points_to_filter(section_points_df, outlier_algo, filtering_algo, backup if recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0: logging.info("No outliers after first round, default algo worked, to_delete = %s" % np.nonzero(np.logical_not(filtering_algo.inlier_mask_))) - sel_inlier_mask = filtering_algo.inlier_mask_ + sel_algo = filtering_algo else: logging.info("After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold]) if backup_filtering_algo is None or recomputed_speeds_df.speed.max() < MACH1: logging.debug("backup algo is %s, max < MACH1, so returning default algo outliers %s" % (backup_filtering_algo, np.nonzero(np.logical_not(filtering_algo.inlier_mask_)))) - sel_inlier_mask_ = filtering_algo.inlier_mask_ + sel_algo = filtering_algo else: backup_filtering_algo.filter(with_speeds_df) recomputed_speeds_df = recalc_speed(with_speeds_df[backup_filtering_algo.inlier_mask_]) @@ -215,22 +214,22 @@ def get_points_to_filter(section_points_df, outlier_algo, filtering_algo, backup if recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0: logging.info("After second round, no outliers, returning backup to delete %s" % np.nonzero(np.logical_not(backup_filtering_algo.inlier_mask_))) - sel_inlier_mask_ = backup_filtering_algo.inlier_mask_ + sel_algo = backup_filtering_algo else: logging.info("After second round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold]) if recomputed_speeds_df.speed.max() < MACH1: logging.debug("But they are all < %s, so returning backup to delete %s" % (MACH1, np.nonzero(np.logical_not(backup_filtering_algo.inlier_mask_)))) - sel_inlier_mask_ = backup_filtering_algo.inlier_mask_ + sel_algo = backup_filtering_algo else: logging.info("And they are also > %s, backup algo also failed, returning default to delete = %s" % (MACH1, np.nonzero(np.logical_not(filtering_algo.inlier_mask_)))) - sel_inlier_mask_ = filtering_algo.inlier_mask_ + sel_algo = filtering_algo - to_delete_mask = np.logical_not(sel_inlier_mask_) + to_delete_mask = np.logical_not(sel_algo.inlier_mask_) logging.info("After all checks, inlier mask = %s, outlier_mask = %s" % - (np.nonzero(sel_inlier_mask_), np.nonzero(np.logical_not(sel_inlier_mask_)))) - return with_speeds_df[to_delete_mask] + (np.nonzero(sel_algo.inlier_mask_), np.nonzero(to_delete_mask))) + return (sel_algo, with_speeds_df[to_delete_mask]) except Exception as e: logging.exception("Caught error %s while processing section, skipping..." % e) return None diff --git a/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py b/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py index 9499e50ec..858f5701d 100644 --- a/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py +++ b/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py @@ -166,28 +166,34 @@ def testFilterAllClusters(self): with_speeds_df = pd.read_csv("emission/tests/data/smoothing_data/all_cluster_case_2.csv") with_speeds_df.drop(["distance", "speed", "heading"], axis="columns", inplace=True) with_speeds_df["loc"] = with_speeds_df["loc"].apply(lambda lstr: json.loads(lstr.replace("'", '"'))) - filtered_points = eaicl.get_points_to_filter(with_speeds_df, outlier_algo, jump_algo, None) + (sel_algo, filtered_points) = eaicl.get_points_to_filter(with_speeds_df, outlier_algo, jump_algo, None) # original values, inserted in # https://github.com/e-mission/e-mission-server/pull/897/commits/434a9a19b7f41ae868102e0154df95db8ec633c4 # removed in https://github.com/e-mission/e-mission-server/pull/897/commits/67f5c86206e41168c6f3664fa5a2d4152d9d4091 expected_result_idx = list(itertools.chain([0], range(2,11), range(12, 14))) self.assertEqual(list(filtered_points.dropna().index), expected_result_idx) + self.assertEqual(sel_algo, jump_algo) + self.assertEqual(sel_algo.__class__.__name__.split(".")[-1], "SmoothZigzag") # US to ocean jump: case 1 of https://github.com/e-mission/e-mission-docs/issues/843 with_speeds_df = pd.read_csv("emission/tests/data/smoothing_data/all_cluster_case_1.csv", index_col=0) with_speeds_df.drop(["distance", "speed", "heading"], axis="columns", inplace=True) with_speeds_df["loc"] = with_speeds_df["loc"].apply(lambda lstr: json.loads(lstr.replace("'", '"'))) - filtered_points = eaicl.get_points_to_filter(with_speeds_df, outlier_algo, jump_algo, backup_algo) + (sel_algo, filtered_points) = eaicl.get_points_to_filter(with_speeds_df, outlier_algo, jump_algo, backup_algo) expected_result_idx = list(range(16, 21)) self.assertEqual(list(filtered_points.dropna().index), expected_result_idx) + self.assertEqual(sel_algo, backup_algo) + self.assertEqual(sel_algo.__class__.__name__.split(".")[-1], "SmoothPosdap") # PR to pakistan jump: case 2 of https://github.com/e-mission/e-mission-docs/issues/843 with_speeds_df = pd.read_csv("emission/tests/data/smoothing_data/all_cluster_case_2.csv") with_speeds_df.drop(["distance", "speed", "heading"], axis="columns", inplace=True) with_speeds_df["loc"] = with_speeds_df["loc"].apply(lambda lstr: json.loads(lstr.replace("'", '"'))) - filtered_points = eaicl.get_points_to_filter(with_speeds_df, outlier_algo, jump_algo, backup_algo) + (sel_algo, filtered_points) = eaicl.get_points_to_filter(with_speeds_df, outlier_algo, jump_algo, backup_algo) expected_result_idx = [11] self.assertEqual(list(filtered_points.dropna().index), expected_result_idx) + self.assertEqual(sel_algo, backup_algo) + self.assertEqual(sel_algo.__class__.__name__.split(".")[-1], "SmoothPosdap") def testPointFilteringZigzag(self): classicJumpTrip1 = self.trip_entries[8] From 95f88c54dd0627e0303fed221f9bb7c33baecaa9 Mon Sep 17 00:00:00 2001 From: Shankari Date: Tue, 24 Jan 2023 15:26:49 -0800 Subject: [PATCH 7/9] Fix regressions in tests - Unify algo outputs: `self.inlier_mask_ = self.inlier_mask_.to_numpy()` - remove `to_numpy()` from all the checks in the tests - Return two outputs -> `return (None, None)` Testing done: - All tests in this file pass --- .../cleaning/cleaning_methods/jump_smoothing.py | 3 ++- .../intake/cleaning/location_smoothing.py | 5 ++--- .../intakeTests/TestLocationSmoothing.py | 16 ++++++++-------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py b/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py index a7ae40c61..b1e532c18 100644 --- a/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py +++ b/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py @@ -242,7 +242,8 @@ def filter(self, with_speeds_df): (self.segment_list, len(self.segment_list))) if len(self.segment_list) == 1: # there were no jumps, so there's nothing to do - logging.info("No jumps, nothing to filter") + logging.info("No jumps, nothing to filter, early return") + self.inlier_mask_ = self.inlier_mask_.to_numpy() return start_segment_idx = self.find_start_segment(self.segment_list) self.segment_list[start_segment_idx].state = Segment.State.GOOD diff --git a/emission/analysis/intake/cleaning/location_smoothing.py b/emission/analysis/intake/cleaning/location_smoothing.py index 338422e69..70c892fcc 100644 --- a/emission/analysis/intake/cleaning/location_smoothing.py +++ b/emission/analysis/intake/cleaning/location_smoothing.py @@ -232,11 +232,10 @@ def get_points_to_filter(section_points_df, outlier_algo, filtering_algo, backup return (sel_algo, with_speeds_df[to_delete_mask]) except Exception as e: logging.exception("Caught error %s while processing section, skipping..." % e) - return None + return (None, None) else: logging.debug("no filtering algo specified, returning None") - return None - + return (None, None) def get_filtered_points(section_df, outlier_algo, filtering_algo): """ diff --git a/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py b/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py index 858f5701d..ee700a3b5 100644 --- a/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py +++ b/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py @@ -99,10 +99,10 @@ def testPointFilteringShanghaiJump(self): logging.debug("Max speed for section %s = %s" % (i, maxSpeed)) jump_algo.filter(with_speeds_df) - logging.debug("Retaining points %s" % np.nonzero(jump_algo.inlier_mask_.to_numpy())) + logging.debug("Retaining points %s" % np.nonzero(jump_algo.inlier_mask_)) to_delete_mask = np.logical_not(jump_algo.inlier_mask_) - logging.debug("Deleting points %s" % np.nonzero(to_delete_mask.to_numpy())) + logging.debug("Deleting points %s" % np.nonzero(to_delete_mask)) delete_ids = list(with_speeds_df[to_delete_mask]._id) logging.debug("Deleting ids %s" % delete_ids) @@ -137,10 +137,10 @@ def testPointFilteringRichmondJump(self): logging.debug("Max speed for section %s = %s" % (i, maxSpeed)) jump_algo.filter(with_speeds_df) - logging.debug("Retaining points %s" % np.nonzero(jump_algo.inlier_mask_.to_numpy())) + logging.debug("Retaining points %s" % np.nonzero(jump_algo.inlier_mask_)) to_delete_mask = np.logical_not(jump_algo.inlier_mask_) - logging.debug("Deleting points %s" % np.nonzero(to_delete_mask.to_numpy())) + logging.debug("Deleting points %s" % np.nonzero(to_delete_mask)) delete_ids = list(with_speeds_df[to_delete_mask]._id) logging.debug("Deleting ids %s" % delete_ids) @@ -215,17 +215,17 @@ def testPointFilteringZigzag(self): logging.debug("Max speed for section %s = %s" % (i, maxSpeed)) jump_algo.filter(with_speeds_df) - logging.debug("Retaining points %s" % np.nonzero(jump_algo.inlier_mask_.to_numpy())) + logging.debug("Retaining points %s" % np.nonzero(jump_algo.inlier_mask_)) to_delete_mask = np.logical_not(jump_algo.inlier_mask_) - logging.debug("Deleting points %s" % np.nonzero(to_delete_mask.to_numpy())) + logging.debug("Deleting points %s" % np.nonzero(to_delete_mask)) delete_ids = list(with_speeds_df[to_delete_mask]._id) logging.debug("Deleting ids %s" % delete_ids) if i == 0: # this is the zigzag section - self.assertEqual(np.nonzero(to_delete_mask.to_numpy())[0].tolist(), + self.assertEqual(np.nonzero(to_delete_mask)[0].tolist(), [25, 64, 114, 115, 116, 117, 118, 119, 120, 123, 126]) self.assertEqual(delete_ids, [boi.ObjectId('55edafe77d65cb39ee9882ff'), @@ -240,7 +240,7 @@ def testPointFilteringZigzag(self): boi.ObjectId('55edcc217d65cb39ee98841f'), boi.ObjectId('55edcc217d65cb39ee988429')]) else: - self.assertEqual(len(np.nonzero(to_delete_mask.to_numpy())[0]), 0) + self.assertEqual(len(np.nonzero(to_delete_mask)[0]), 0) self.assertEqual(len(delete_ids), 0) def testFilterSection(self): From 5a4ae3d6b5673cbbbce7f7736b9f8b9eb05a9acc Mon Sep 17 00:00:00 2001 From: Shankari Date: Tue, 24 Jan 2023 18:04:55 -0800 Subject: [PATCH 8/9] Fix regression caused by moving the second round checking out When we moved the second round checks to the calling function in cebb81fac17d1e4684d3b76754ecabaa49af1a57 we caused a very subtle regression The filtering code had an early return if there were no jumps detected. So in that case, we would not try the second round of checks, or attempt to filter again. However, when we moved the second round checking to the outer function, we called the second round anyway even if the first round didn't detect any jumps And in this one case, we actually found an outlier in the second round, which caused the test to fail. Fixed by checking to see if there were no outliers in the first round and skipping the second round check in that case. Everything in the `else` for the `if outlier_arr[0].shape[0] == 0:` is unchanged, just moved in a bit, not changed. The check for the length was unexpectedly complicated and took many hours to debug, so I added it as a simple use case. Note also that it is not clear if this is the correct long-term approach. If there were no jumps, then why did using the backup change anything? Maybe we should always use the backup. But changing this to avoid the regression for now; will look at this the next time we look at smoothing Testing done: - `TestPipelineRealData.testIosJumpsAndUntrackedSquishing` passes - `TestLocationSmoothing` passes --- .../cleaning_methods/jump_smoothing.py | 3 +- .../intake/cleaning/location_smoothing.py | 64 ++++++++++--------- .../intakeTests/TestLocationSmoothing.py | 13 ++++ 3 files changed, 50 insertions(+), 30 deletions(-) diff --git a/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py b/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py index b1e532c18..6d5d872d5 100644 --- a/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py +++ b/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py @@ -242,7 +242,8 @@ def filter(self, with_speeds_df): (self.segment_list, len(self.segment_list))) if len(self.segment_list) == 1: # there were no jumps, so there's nothing to do - logging.info("No jumps, nothing to filter, early return") + logging.info("No jumps, nothing to filter, early return, series = %s, to_numpy = %s" % + (self.inlier_mask_, self.inlier_mask_.to_numpy())) self.inlier_mask_ = self.inlier_mask_.to_numpy() return start_segment_idx = self.find_start_segment(self.segment_list) diff --git a/emission/analysis/intake/cleaning/location_smoothing.py b/emission/analysis/intake/cleaning/location_smoothing.py index 70c892fcc..2e158a1c1 100644 --- a/emission/analysis/intake/cleaning/location_smoothing.py +++ b/emission/analysis/intake/cleaning/location_smoothing.py @@ -189,42 +189,48 @@ def get_points_to_filter(section_points_df, outlier_algo, filtering_algo, backup if filtering_algo is not None: try: filtering_algo.filter(with_speeds_df) - recomputed_speeds_df = recalc_speed(with_speeds_df[filtering_algo.inlier_mask_]) - recomputed_threshold = outlier_algo.get_threshold(recomputed_speeds_df) - logging.info("After first round, recomputed max = %s, recomputed threshold = %s" % - (recomputed_speeds_df.speed.max(), recomputed_threshold)) - # assert recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0, "After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold] - if recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0: - logging.info("No outliers after first round, default algo worked, to_delete = %s" % - np.nonzero(np.logical_not(filtering_algo.inlier_mask_))) + outlier_arr = np.nonzero(np.logical_not(filtering_algo.inlier_mask_)) + logging.debug("After first filter, inliers = %s, outliers = %s of type %s" % + (filtering_algo.inlier_mask_, outlier_arr, type(outlier_arr))) + if outlier_arr[0].shape[0] == 0: sel_algo = filtering_algo else: - logging.info("After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold]) - if backup_filtering_algo is None or recomputed_speeds_df.speed.max() < MACH1: - logging.debug("backup algo is %s, max < MACH1, so returning default algo outliers %s" % - (backup_filtering_algo, np.nonzero(np.logical_not(filtering_algo.inlier_mask_)))) + recomputed_speeds_df = recalc_speed(with_speeds_df[filtering_algo.inlier_mask_]) + recomputed_threshold = outlier_algo.get_threshold(recomputed_speeds_df) + logging.info("After first round, recomputed max = %s, recomputed threshold = %s" % + (recomputed_speeds_df.speed.max(), recomputed_threshold)) + # assert recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0, "After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold] + if recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0: + logging.info("No outliers after first round, default algo worked, to_delete = %s" % + np.nonzero(np.logical_not(filtering_algo.inlier_mask_))) sel_algo = filtering_algo else: - backup_filtering_algo.filter(with_speeds_df) - recomputed_speeds_df = recalc_speed(with_speeds_df[backup_filtering_algo.inlier_mask_]) - recomputed_threshold = outlier_algo.get_threshold(recomputed_speeds_df) - logging.info("After second round, max = %s, recomputed threshold = %s" % - (recomputed_speeds_df.speed.max(), recomputed_threshold)) - # assert recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0, "After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold] - if recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0: - logging.info("After second round, no outliers, returning backup to delete %s" % - np.nonzero(np.logical_not(backup_filtering_algo.inlier_mask_))) - sel_algo = backup_filtering_algo + logging.info("After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold]) + if backup_filtering_algo is None or recomputed_speeds_df.speed.max() < MACH1: + logging.debug("backup algo is %s, max < MACH1, so returning default algo outliers %s" % + (backup_filtering_algo, np.nonzero(np.logical_not(filtering_algo.inlier_mask_)))) + sel_algo = filtering_algo else: - logging.info("After second round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold]) - if recomputed_speeds_df.speed.max() < MACH1: - logging.debug("But they are all < %s, so returning backup to delete %s" % - (MACH1, np.nonzero(np.logical_not(backup_filtering_algo.inlier_mask_)))) + backup_filtering_algo.filter(with_speeds_df) + recomputed_speeds_df = recalc_speed(with_speeds_df[backup_filtering_algo.inlier_mask_]) + recomputed_threshold = outlier_algo.get_threshold(recomputed_speeds_df) + logging.info("After second round, max = %s, recomputed threshold = %s" % + (recomputed_speeds_df.speed.max(), recomputed_threshold)) + # assert recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0, "After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold] + if recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0: + logging.info("After second round, no outliers, returning backup to delete %s" % + np.nonzero(np.logical_not(backup_filtering_algo.inlier_mask_))) sel_algo = backup_filtering_algo else: - logging.info("And they are also > %s, backup algo also failed, returning default to delete = %s" % - (MACH1, np.nonzero(np.logical_not(filtering_algo.inlier_mask_)))) - sel_algo = filtering_algo + logging.info("After second round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold]) + if recomputed_speeds_df.speed.max() < MACH1: + logging.debug("But they are all < %s, so returning backup to delete %s" % + (MACH1, np.nonzero(np.logical_not(backup_filtering_algo.inlier_mask_)))) + sel_algo = backup_filtering_algo + else: + logging.info("And they are also > %s, backup algo also failed, returning default to delete = %s" % + (MACH1, np.nonzero(np.logical_not(filtering_algo.inlier_mask_)))) + sel_algo = filtering_algo to_delete_mask = np.logical_not(sel_algo.inlier_mask_) logging.info("After all checks, inlier mask = %s, outlier_mask = %s" % diff --git a/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py b/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py index ee700a3b5..734d3bef6 100644 --- a/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py +++ b/emission/tests/analysisTests/intakeTests/TestLocationSmoothing.py @@ -16,6 +16,7 @@ import bson.objectid as boi import numpy as np import attrdict as ad +import pandas as pd # Our imports import emission.net.usercache.abstract_usercache as enua @@ -195,6 +196,18 @@ def testFilterAllClusters(self): self.assertEqual(sel_algo, backup_algo) self.assertEqual(sel_algo.__class__.__name__.split(".")[-1], "SmoothPosdap") + # I found some super weird behavior while fixing the issue with the early + # return when there are no jumps + # It looks like series -> numpy array -> non_zero returns a tuple so we can't get its length directly + # instead, we return the size of the first element in the tuple + def testWeirdNumpyBehavior(self): + test = pd.Series([True] * 10) + test = test.to_numpy() + self.assertEqual(test.shape, (10,)) + self.assertEqual(np.logical_not(test).shape, (10,)) + np.testing.assert_equal(np.nonzero(np.logical_not(test)), (np.array([], dtype=np.int64),)) + self.assertEqual(np.nonzero(np.logical_not(test))[0].shape[0], 0) + def testPointFilteringZigzag(self): classicJumpTrip1 = self.trip_entries[8] self.loadPointsForTrip(classicJumpTrip1.get_id()) From 29e78de3e78e1f33a7a77ba89ac34d5f7326332e Mon Sep 17 00:00:00 2001 From: Shankari Date: Tue, 24 Jan 2023 22:46:45 -0800 Subject: [PATCH 9/9] :fire: Remove unused function and extraneous logs `get_filtered_points` is not used anywhere else we don't need to print out the series and the numpy version any more now that we have added the unit test in 5a4ae3d6b5673cbbbce7f7736b9f8b9eb05a9acc --- .../cleaning_methods/jump_smoothing.py | 3 +- .../intake/cleaning/location_smoothing.py | 28 ------------------- 2 files changed, 1 insertion(+), 30 deletions(-) diff --git a/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py b/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py index 6d5d872d5..b1e532c18 100644 --- a/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py +++ b/emission/analysis/intake/cleaning/cleaning_methods/jump_smoothing.py @@ -242,8 +242,7 @@ def filter(self, with_speeds_df): (self.segment_list, len(self.segment_list))) if len(self.segment_list) == 1: # there were no jumps, so there's nothing to do - logging.info("No jumps, nothing to filter, early return, series = %s, to_numpy = %s" % - (self.inlier_mask_, self.inlier_mask_.to_numpy())) + logging.info("No jumps, nothing to filter, early return") self.inlier_mask_ = self.inlier_mask_.to_numpy() return start_segment_idx = self.find_start_segment(self.segment_list) diff --git a/emission/analysis/intake/cleaning/location_smoothing.py b/emission/analysis/intake/cleaning/location_smoothing.py index 2e158a1c1..78ba13969 100644 --- a/emission/analysis/intake/cleaning/location_smoothing.py +++ b/emission/analysis/intake/cleaning/location_smoothing.py @@ -243,34 +243,6 @@ def get_points_to_filter(section_points_df, outlier_algo, filtering_algo, backup logging.debug("no filtering algo specified, returning None") return (None, None) -def get_filtered_points(section_df, outlier_algo, filtering_algo): - """ - Filter the points that correspond to the section object that is passed in. - The section object is an AttrDict with the startTs and endTs fields. - Returns a filtered df with the index after the initial filter for accuracy - TODO: Switch this to the section wrapper object going forward - TODO: Note that here, we assume that the data has already been chunked into sections. - But really, we need to filter (at least for accuracy) before segmenting in - order to avoid issues like https://github.com/e-mission/e-mission-data-collection/issues/45 - """ - with_speeds_df = add_dist_heading_speed(section_df) - # if filtering algo is none, there's nothing that can use the max speed - if outlier_algo is not None and filtering_algo is not None: - maxSpeed = outlier_algo.get_threshold(with_speeds_df) - # TODO: Is this the best way to do this? Or should I pass this in as an argument to filter? - # Or create an explicit set_speed() method? - # Or pass the outlier_algo as the parameter to the filtering_algo? - filtering_algo.maxSpeed = maxSpeed - if filtering_algo is not None: - try: - filtering_algo.filter(with_speeds_df) - return with_speeds_df[filtering_algo.inlier_mask_] - except Exception as e: - logging.info("Caught error %s while processing section, skipping..." % e) - return with_speeds_df - else: - return with_speeds_df - def _ios_fill_fake_data(locs_df): diff_ts = locs_df.ts.diff() fill_ends = diff_ts[diff_ts > 60].index.tolist()