Skip to content

Commit

Permalink
Merge pull request #27 from nyrkio/to-asf-upstream2
Browse files Browse the repository at this point in the history
Upstreaming 2024 Nyrkiö patches
  • Loading branch information
henrikingo authored Jan 10, 2025
2 parents bc748fc + 542767d commit 72c34f6
Show file tree
Hide file tree
Showing 10 changed files with 734 additions and 141 deletions.
47 changes: 36 additions & 11 deletions hunter/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,18 @@ class ComparativeStats:
std_2: float
pvalue: float

def forward_rel_change(self):
def forward_rel_change(self, value_if_nan=0):
"""Relative change from left to right"""
if self.mean_1 == 0:
return value_if_nan

return self.mean_2 / self.mean_1 - 1.0

def backward_rel_change(self):
def backward_rel_change(self, value_if_nan=0):
"""Relative change from right to left"""
if self.mean_2 == 0:
return value_if_nan

return self.mean_1 / self.mean_2 - 1.0

def forward_change_percent(self) -> float:
Expand Down Expand Up @@ -180,7 +186,6 @@ def merge(
:param max_pvalue: maximum accepted pvalue
:param min_magnitude: minimum accepted relative change
"""

tester = TTestSignificanceTester(max_pvalue)
while change_points:

Expand Down Expand Up @@ -214,7 +219,8 @@ def recompute(index: int):
return change_points


def split(series: np.array, window_len: int = 30, max_pvalue: float = 0.001) -> List[ChangePoint]:
def split(series: np.array, window_len: int = 30, max_pvalue: float = 0.001,
new_points=None, old_cp=None) -> List[ChangePoint]:
"""
Finds change points by splitting the series top-down.
Expand All @@ -237,17 +243,36 @@ def split(series: np.array, window_len: int = 30, max_pvalue: float = 0.001) ->
start = 0
step = int(window_len / 2)
indexes = []
# N new_points are appended to the end of series. Typically N=1.
# old_cp are the weak change points from before new points were added.
# We now just compute e-e_divisive for the tail of the series, beginning at
# max(old_cp[-1], a step that is over 2 window_len from the end)
if new_points is not None and old_cp is not None:
indexes = [c.index for c in old_cp]
steps_needed = new_points/window_len + 4
max_start = len(series) - steps_needed*window_len
for c in old_cp:
if c.index < max_start:
start = c.index
for s in range(0, len(series), step):
if s < max_start and start < s:
start = s

tester = TTestSignificanceTester(max_pvalue)
while start < len(series):
end = min(start + window_len, len(series))
calculator = cext_calculator

algo = EDivisive(seed=None, calculator=calculator, significance_tester=tester)
pts = algo.get_change_points(series[start:end])
new_indexes = [p.index + start for p in pts]
new_indexes.sort()
last_new_change_point_index = next(iter(new_indexes[-1:]), 0)
start = max(last_new_change_point_index, start + step)
indexes += new_indexes
# incremental Hunter can duplicate an old cp
for i in new_indexes:
if i not in indexes:
indexes += [i]

window_endpoints = [0] + indexes + [len(series)]
return [tester.change_point(i, series, window_endpoints) for i in indexes]
Expand All @@ -258,13 +283,13 @@ def compute_change_points_orig(series: np.array, max_pvalue: float = 0.001) -> L
tester = QHatPermutationsSignificanceTester(calculator, pvalue=max_pvalue, permutations=100)
algo = EDivisive(seed=None, calculator=calculator, significance_tester=tester)
pts = algo.get_change_points(series)
indexes = [p.index for p in pts]
window_endpoints = [0] + indexes + [len(series)]
return [tester.change_point(i, series, window_endpoints) for i in indexes]
return pts, None


def compute_change_points(
series: np.array, window_len: int = 50, max_pvalue: float = 0.001, min_magnitude: float = 0.05
series: np.array, window_len: int = 50, max_pvalue: float = 0.001, min_magnitude: float = 0.0,
new_data=None, old_weak_cp=None
) -> List[ChangePoint]:
change_points = split(series, window_len, max_pvalue * 10)
return merge(change_points, series, max_pvalue, min_magnitude)
first_pass_pvalue = max_pvalue * 10 if max_pvalue < 0.05 else (max_pvalue * 2 if max_pvalue < 0.5 else max_pvalue)
weak_change_points = split(series, window_len, first_pass_pvalue, new_points=new_data, old_cp=old_weak_cp)
return merge(weak_change_points, series, max_pvalue, min_magnitude), weak_change_points
2 changes: 1 addition & 1 deletion hunter/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def __format_log_annotated(self, test_name: str) -> str:
def __format_json(self, test_name: str) -> str:
import json

return json.dumps({test_name: [cpg.to_json() for cpg in self.__change_points]})
return json.dumps({test_name: [cpg.to_json(rounded=True) for cpg in self.__change_points]})

def __format_regressions_only(self, test_name: str) -> str:
output = []
Expand Down
Loading

0 comments on commit 72c34f6

Please sign in to comment.