Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add move_quantile function #418

Open
wants to merge 34 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
524afbf
Add moving quantile
andrii-riazanov Aug 26, 2021
4f98aa4
initial changes from median to quantile
andrii-riazanov Sep 14, 2022
e470fa2
initial changes from median to quantile
andrii-riazanov Sep 14, 2022
1c97aac
Merge branch 'quantile' of https://github.com/andrii-riazanov/bottlen…
andrii-riazanov Sep 15, 2022
10b8824
Change all move_median to move_quantile
andrii-riazanov Sep 15, 2022
c718a18
Add move_median as move_quantile without q argument at C level
andrii-riazanov Sep 15, 2022
009c835
Fix bug with addressing quantile before assignment
andrii-riazanov Sep 16, 2022
832035a
Initial tests and some fixes
andrii-riazanov Sep 17, 2022
7af7168
Finish extensive testing of move_quantile
andrii-riazanov Sep 17, 2022
42eddad
Ignore warnings from numpy about infs and NaNs
andrii-riazanov Sep 17, 2022
c2a2ae3
move_quantile(q=0) vs move_min benching
andrii-riazanov Sep 21, 2022
9f4c5dc
Revert "move_quantile(q=0) vs move_min benching"
andrii-riazanov Sep 21, 2022
9447697
Some changes (to ammend later)
andrii-riazanov Sep 21, 2022
de181da
Bench move_quantile(q=0.5) with slow.move_median
andrii-riazanov Sep 21, 2022
02a0ce1
Bring old move_median, add move_quantile separately
andrii-riazanov Sep 21, 2022
cd49b4f
Finish bringing move_median back
andrii-riazanov Sep 21, 2022
fcaefde
Move move_quantile to C level fully
andrii-riazanov Sep 21, 2022
1bddedd
Refactor parse_args function in move_template
andrii-riazanov Sep 22, 2022
7a413cb
Add docs and comments
andrii-riazanov Sep 22, 2022
6851ed2
Update move_test.py
andrii-riazanov Sep 22, 2022
97ecd15
Actually add docs and comments
andrii-riazanov Sep 23, 2022
a7d5c22
Add comments, modify tests, change back gitignore
andrii-riazanov Sep 23, 2022
5a2bcac
Refactor parse_args again to actually work
andrii-riazanov Sep 23, 2022
c390863
Dial tests back a little to run reasonable time
andrii-riazanov Sep 23, 2022
6f1e5d4
Modify benches, restore old files
andrii-riazanov Sep 23, 2022
7f1c3af
Change `packaging` module to `pkg_resources`
andrii-riazanov Sep 26, 2022
4dadfe4
Update move_quantile benches in asv with q=0.25
andrii-riazanov Sep 27, 2022
9012e24
Make mm_handle and mq_handle the same
andrii-riazanov Sep 28, 2022
72677f8
Median and quantile with function pointers
andrii-riazanov Sep 28, 2022
2c892db
Support of itrable q argument for move_quantile
andrii-riazanov Oct 2, 2022
654ab14
Make tests work with posiitonal q in move_quantile
andrii-riazanov Oct 2, 2022
04ce117
Merge branch 'master' into quantile
andrii-riazanov Jan 31, 2023
00cd119
Merge branch 'master' into quantile
andrii-riazanov Mar 19, 2023
4ec8945
Merge branch 'master' into quantile
andrii-riazanov Apr 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions asv_bench/benchmarks/move.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ def time_move_argmax(self, dtype, shape, window):
def time_move_median(self, dtype, shape, window):
bn.move_median(self.arr, window)

def time_move_quantile(self, dtype, shape, window):
bn.move_quantile(self.arr, window, q=0.25)

def time_move_rank(self, dtype, shape, window):
bn.move_rank(self.arr, window)

Expand Down Expand Up @@ -83,6 +86,9 @@ def time_move_argmax(self, dtype, shape, order, axis, window):

def time_move_median(self, dtype, shape, order, axis, window):
bn.move_median(self.arr, window, axis=axis)

def time_move_quantile(self, dtype, shape, order, axis, window):
bn.move_quantile(self.arr, window, axis=axis, q=0.25)

def time_move_rank(self, dtype, shape, order, axis, window):
bn.move_rank(self.arr, window, axis=axis)
5 changes: 4 additions & 1 deletion bottleneck/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
from . import slow
from ._pytesttester import PytestTester
from .move import (move_argmax, move_argmin, move_max, move_mean, move_median,
move_min, move_rank, move_std, move_sum, move_var)
move_min, move_rank, move_std, move_sum, move_var)

from .src.move_quantile import move_quantile

from .nonreduce import replace
from .nonreduce_axis import (argpartition, nanrankdata, partition, push,
rankdata)
Expand Down
2 changes: 2 additions & 0 deletions bottleneck/benchmark/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@ def getsetups(setup, shapes, nans, axes, dtype, order):
run = {}
run["name"] = func
run["statements"] = ["bn_func(a, w, 1, axis)", "sw_func(a, w, 1, axis)"]
if func == "move_quantile":
run["statements"] = ["bn_func(a, w, 1, axis, q=0.25)", "sw_func(a, w, 1, axis, q=0.25)"]
setup = """
from bottleneck.slow.move import %s as sw_func
from bottleneck import %s as bn_func
Expand Down
38 changes: 22 additions & 16 deletions bottleneck/benchmark/bench_detailed.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,14 @@ def benchsuite(function, fraction_nan):
index = 0
elif function in ["rankdata", "nanrankdata"]:
index = 0
elif function in bn.get_functions("move", as_string=True):
elif function in bn.get_functions("move", as_string=True) and function != "move_quantile":
index = 1
elif function in ["partition", "argpartition", "push"]:
index = 2
elif function == "replace":
index = 3
elif function == "move_quantile":
index = 4
else:
raise ValueError("`function` (%s) not recognized" % function)

Expand Down Expand Up @@ -133,30 +135,32 @@ def get_instructions():
"(a, 1)", # move
"(a, 0)", # (arg)partition
"(a, np.nan, 0)", # replace
"(a, 1, q=0.25)", # move_quantile
10,
),
("rand(10)", "(a)", "(a, 2)", "(a, 2)", "(a, np.nan, 0)", 10),
("rand(100)", "(a)", "(a, 20)", "(a, 20)", "(a, np.nan, 0)", 6),
("rand(1000)", "(a)", "(a, 200)", "(a, 200)", "(a, np.nan, 0)", 3),
("rand(1000000)", "(a)", "(a, 200)", "(a, 200)", "(a, np.nan, 0)", 2),
("rand(10)", "(a)", "(a, 2)", "(a, 2)", "(a, np.nan, 0)", "(a, 2, q=0.25)", 10),
("rand(100)", "(a)", "(a, 20)", "(a, 20)", "(a, np.nan, 0)", "(a, 20, q=0.25)", 6),
("rand(1000)", "(a)", "(a, 200)", "(a, 200)", "(a, np.nan, 0)", "(a, 200, q=0.25)", 3),
("rand(1000000)", "(a)", "(a, 200)", "(a, 200)", "(a, np.nan, 0)", None, 2),
# 2d input array
("rand(10, 10)", "(a)", "(a, 2)", "(a, 2)", "(a, np.nan, 0)", 6),
("rand(100, 100)", "(a)", "(a, 20)", "(a, 20)", "(a, np.nan, 0)", 3),
("rand(1000, 1000)", "(a)", "(a, 200)", "(a, 200)", "(a, np.nan, 0)", 2),
("rand(10, 10)", "(a, 1)", None, None, None, 6),
("rand(100, 100)", "(a, 1)", None, None, None, 3),
("rand(1000, 1000)", "(a, 1)", None, None, None, 2),
("rand(100000, 2)", "(a, 1)", "(a, 1)", "(a, 1)", None, 2),
("rand(10, 10)", "(a, 0)", None, None, None, 6),
("rand(100, 100)", "(a, 0)", "(a, 20, axis=0)", None, None, 3),
("rand(1000, 1000)", "(a, 0)", "(a, 200, axis=0)", None, None, 2),
("rand(10, 10)", "(a)", "(a, 2)", "(a, 2)", "(a, np.nan, 0)", "(a, 2, q=0.25)", 6),
("rand(100, 100)", "(a)", "(a, 20)", "(a, 20)", "(a, np.nan, 0)", "(a, 20, q=0.25)", 3),
("rand(1000, 1000)", "(a)", "(a, 200)", "(a, 200)", "(a, np.nan, 0)", None ,2),
("rand(10, 10)", "(a, 1)", None, None, None, None, 6),
("rand(100, 100)", "(a, 1)", None, None, None, None, 3),
("rand(1000, 1000)", "(a, 1)", None, None, None, None, 2),
("rand(100000, 2)", "(a, 1)", "(a, 1)", "(a, 1)", None, None, 2),
("rand(10, 10)", "(a, 0)", None, None, None, None, 6),
("rand(100, 100)", "(a, 0)", "(a, 20, axis=0)", None, None, None, 3),
("rand(1000, 1000)", "(a, 0)", "(a, 200, axis=0)", None, None, None, 2),
# 3d input array
(
"rand(100, 100, 100)",
"(a, 0)",
"(a, 20, axis=0)",
"(a, 20, axis=0)",
None,
"(a, 20, axis=0, q=0.25)",
2,
),
(
Expand All @@ -165,6 +169,7 @@ def get_instructions():
"(a, 20, axis=1)",
"(a, 20, axis=1)",
None,
"(a, 20, axis=1, q=0.25)",
2,
),
(
Expand All @@ -173,10 +178,11 @@ def get_instructions():
"(a, 20, axis=2)",
"(a, 20, axis=2)",
"(a, np.nan, 0)",
"(a, 20, axis=2, q=0.25)",
2,
),
# 0d input array
("array(1.0)", "(a)", None, None, "(a, 0, 2)", 10),
("array(1.0)", "(a)", None, None, "(a, 0, 2)", None, 10),
]

return instructions
38 changes: 34 additions & 4 deletions bottleneck/slow/move.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"move_argmin",
"move_argmax",
"move_median",
"move_quantile",
"move_rank",
]

Expand All @@ -37,13 +38,15 @@ def move_var(a, window, min_count=None, axis=-1, ddof=0):
"Slow move_var for unaccelerated dtype"
return move_func(np.nanvar, a, window, min_count, axis=axis, ddof=ddof)


def move_min(a, window, min_count=None, axis=-1):
# move_min, move_max, and move_median from bn.slow can be called
# from bn.move_quantile in case of byte swapped input array,
# and so can take `q` argument, hence add **kwargs to these functions
def move_min(a, window, min_count=None, axis=-1, **kwargs):
"Slow move_min for unaccelerated dtype"
return move_func(np.nanmin, a, window, min_count, axis=axis)


def move_max(a, window, min_count=None, axis=-1):
def move_max(a, window, min_count=None, axis=-1, **kwargs):
"Slow move_max for unaccelerated dtype"
return move_func(np.nanmax, a, window, min_count, axis=axis)

Expand Down Expand Up @@ -100,16 +103,43 @@ def argmax(a, axis):
return move_func(argmax, a, window, min_count, axis=axis)


def move_median(a, window, min_count=None, axis=-1):
def move_median(a, window, min_count=None, axis=-1, **kwargs):
"Slow move_median for unaccelerated dtype"
return move_func(np.nanmedian, a, window, min_count, axis=axis)


# keyword argument for interpolation method in np.nanquantile was changed in 1.22.0
import pkg_resources
if pkg_resources.parse_version(np.__version__) >= pkg_resources.parse_version("1.22.0"):
METHOD_KEYWORD = "method"
else:
METHOD_KEYWORD = "interpolation"

def move_quantile(a, window, min_count=None, axis=-1, q=0.5, **kwargs):
"Slow move_quantile for unaccelerated dtype"
with warnings.catch_warnings():
warnings.simplefilter("ignore")
if not np.isinf(a).any():
kwargs[METHOD_KEYWORD] = 'midpoint'
return move_func(np.nanquantile, a, window, min_count, axis=axis, q=q, **kwargs)
else:
return move_func(np_nanquantile_infs, a, window, min_count, axis=axis, q=q, **kwargs)

def move_rank(a, window, min_count=None, axis=-1):
"Slow move_rank for unaccelerated dtype"
return move_func(lastrank, a, window, min_count, axis=axis)


# function for handling infs in np.nanquantile
def np_nanquantile_infs(a, **kwargs):
kwargs[METHOD_KEYWORD] = 'lower'
lower_nanquantile = np.nanquantile(a, **kwargs)
kwargs[METHOD_KEYWORD] = 'higher'
higher_nanquantile = np.nanquantile(a, **kwargs)

midpoint_nanquantile = (lower_nanquantile + higher_nanquantile) / 2
return midpoint_nanquantile

# magic utility functions ---------------------------------------------------


Expand Down
Loading