Skip to content

Commit

Permalink
FIX-modin-project#7157: Make sure quantile function works with `num…
Browse files Browse the repository at this point in the history
…eric_only=True` (modin-project#7160)

Co-authored-by: Iaroslav Igoshev <[email protected]>
Signed-off-by: Anatoly Myachev <[email protected]>
  • Loading branch information
anmyachev and YarShev authored Apr 11, 2024
1 parent fe57e19 commit 0755a61
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 14 deletions.
2 changes: 1 addition & 1 deletion modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2588,7 +2588,7 @@ def quantile_for_list_of_values(self, **kwargs):
axis = kwargs.get("axis", 0)
q = kwargs.get("q")
numeric_only = kwargs.get("numeric_only", True)
assert isinstance(q, (pandas.Series, np.ndarray, pandas.Index, list))
assert isinstance(q, (pandas.Series, np.ndarray, pandas.Index, list, tuple))

if numeric_only:
new_columns = self._modin_frame.numeric_columns()
Expand Down
29 changes: 16 additions & 13 deletions modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2304,6 +2304,7 @@ def quantile(
def check_dtype(t):
return is_numeric_dtype(t) or lib.is_np_dtype(t, "mM")

numeric_only_df = self
if not numeric_only:
# If not numeric_only and columns, then check all columns are either
# numeric, timestamp, or timedelta
Expand All @@ -2322,31 +2323,33 @@ def check_dtype(t):
)
)
else:
# Normally pandas returns this near the end of the quantile, but we
# can't afford the overhead of running the entire operation before
# we error.
if not any(is_numeric_dtype(t) for t in self._get_dtypes()):
raise ValueError("need at least one array to concatenate")
numeric_only_df = self.drop(
columns=[
i for i in self.dtypes.index if not is_numeric_dtype(self.dtypes[i])
]
)

# check that all qs are between 0 and 1
validate_percentile(q)
axis = self._get_axis_number(axis)
if isinstance(q, (pandas.Series, np.ndarray, pandas.Index, list)):
return self.__constructor__(
query_compiler=self._query_compiler.quantile_for_list_of_values(
axis = numeric_only_df._get_axis_number(axis)
if isinstance(q, (pandas.Series, np.ndarray, pandas.Index, list, tuple)):
return numeric_only_df.__constructor__(
query_compiler=numeric_only_df._query_compiler.quantile_for_list_of_values(
q=q,
axis=axis,
numeric_only=numeric_only,
# `numeric_only=True` has already been processed by using `self.drop` function
numeric_only=False,
interpolation=interpolation,
method=method,
)
)
else:
result = self._reduce_dimension(
self._query_compiler.quantile_for_single_value(
result = numeric_only_df._reduce_dimension(
numeric_only_df._query_compiler.quantile_for_single_value(
q=q,
axis=axis,
numeric_only=numeric_only,
# `numeric_only=True` has already been processed by using `self.drop` function
numeric_only=False,
interpolation=interpolation,
method=method,
)
Expand Down
26 changes: 26 additions & 0 deletions modin/tests/pandas/dataframe/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,32 @@ def test_quantile(request, data, q):
modin_df.T.quantile(q)


def test_quantile_7157():
# for details: https://github.com/modin-project/modin/issues/7157
n_rows = 100
n_fcols = 10
n_mcols = 5

df1_md, df1_pd = create_test_dfs(
random_state.rand(n_rows, n_fcols),
columns=[f"feat_{i}" for i in range(n_fcols)],
)
df2_md, df2_pd = create_test_dfs(
{
"test_string1": ["test_string2" for _ in range(n_rows)]
for _ in range(n_mcols)
}
)
df3_md = pd.concat([df2_md, df1_md], axis=1)
df3_pd = pandas.concat([df2_pd, df1_pd], axis=1)

eval_general(df3_md, df3_pd, lambda df: df.quantile(0.25, numeric_only=True))
eval_general(df3_md, df3_pd, lambda df: df.quantile((0.25,), numeric_only=True))
eval_general(
df3_md, df3_pd, lambda df: df.quantile((0.25, 0.75), numeric_only=True)
)


@pytest.mark.parametrize("axis", ["rows", "columns"])
@pytest.mark.parametrize(
"na_option", ["keep", "top", "bottom"], ids=["keep", "top", "bottom"]
Expand Down

0 comments on commit 0755a61

Please sign in to comment.