Skip to content

Commit

Permalink
Update to work with Pandas 2.2 release candidate (#6074)
Browse files Browse the repository at this point in the history
  • Loading branch information
hoxbro committed Feb 2, 2024
1 parent 960e40b commit b21262b
Show file tree
Hide file tree
Showing 8 changed files with 76 additions and 35 deletions.
2 changes: 1 addition & 1 deletion holoviews/core/data/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs):
group_by = [d.name for d in index_dims]
data = []
if len(dimensions) == 1:
for k, v in dataset.data.groupby(index_dims[0].name):
for k, v in dataset.data.groupby(index_dims[0].name, squeeze=False):
if drop_dim:
v = v.to_dataframe().reset_index()
data.append((k, group_type(v, **group_kwargs)))
Expand Down
41 changes: 26 additions & 15 deletions holoviews/element/selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""

import sys
from importlib.util import find_spec

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -79,7 +80,7 @@ def spatial_select_gridded(xvals, yvals, geometry):
sel_mask = spatial_select_columnar(xvals.flatten(), yvals.flatten(), geometry)
return sel_mask.reshape(xvals.shape)

def spatial_select_columnar(xvals, yvals, geometry):
def spatial_select_columnar(xvals, yvals, geometry, geom_method=None):
if 'cudf' in sys.modules:
import cudf
if isinstance(xvals, cudf.Series):
Expand Down Expand Up @@ -119,27 +120,37 @@ def spatial_select_columnar(xvals, yvals, geometry):
sel_mask = (xvals>=x0) & (xvals<=x1) & (yvals>=y0) & (yvals<=y1)
masked_xvals = xvals[sel_mask]
masked_yvals = yvals[sel_mask]
try:
from spatialpandas.geometry import PointArray, Polygon
points = PointArray((masked_xvals.astype('float'), masked_yvals.astype('float')))
poly = Polygon([np.concatenate([geometry, geometry[:1]]).flatten()])
geom_mask = points.intersects(poly)
except ImportError:
try:
from shapely.geometry import Point, Polygon
points = (Point(x, y) for x, y in zip(masked_xvals, masked_yvals))
poly = Polygon(geometry)
geom_mask = np.array([poly.contains(p) for p in points])
except ImportError:
raise ImportError("Lasso selection on tabular data requires "
"either spatialpandas or shapely to be available.") from None
if geom_method is None:
if find_spec("spatialpandas") is not None:
geom_method = "spatialpandas"
elif find_spec("shapely") is not None:
geom_method = "shapely"
else:
msg = "Lasso selection on tabular data requires either spatialpandas or shapely to be available."
raise ImportError(msg) from None
geom_function = {"spatialpandas": _mask_spatialpandas, "shapely": _mask_shapely}[geom_method]
geom_mask = geom_function(masked_xvals, masked_yvals, geometry)
if isinstance(xvals, pd.Series):
sel_mask[sel_mask.index[np.where(sel_mask)[0]]] = geom_mask
else:
sel_mask[np.where(sel_mask)[0]] = geom_mask
return sel_mask


def _mask_spatialpandas(masked_xvals, masked_yvals, geometry):
from spatialpandas.geometry import PointArray, Polygon
points = PointArray((masked_xvals.astype('float'), masked_yvals.astype('float')))
poly = Polygon([np.concatenate([geometry, geometry[:1]]).flatten()])
return points.intersects(poly)


def _mask_shapely(masked_xvals, masked_yvals, geometry):
from shapely.geometry import Point, Polygon
points = (Point(x, y) for x, y in zip(masked_xvals, masked_yvals))
poly = Polygon(geometry)
return np.array([poly.contains(p) for p in points], dtype=bool)


def spatial_select(xvals, yvals, geometry):
if xvals.ndim > 1:
return spatial_select_gridded(xvals, yvals, geometry)
Expand Down
2 changes: 1 addition & 1 deletion holoviews/tests/core/data/test_pandasinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ class PandasInterfaceTests(BasePandasInterfaceTests):
__test__ = True

def test_data_with_tz(self):
dates = pd.date_range("2018-01-01", periods=3, freq="H")
dates = pd.date_range("2018-01-01", periods=3, freq="h")
dates_tz = dates.tz_localize("UTC")
df = pd.DataFrame({"dates": dates_tz})
data = Dataset(df).dimension_values("dates")
Expand Down
14 changes: 9 additions & 5 deletions holoviews/tests/core/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
from holoviews.streams import ParamMethod, Params


def makeDataFrame():
data = np.random.default_rng(2).standard_normal((30, 4))
return pd.DataFrame(data, columns=list('ABCD'))

class ParamClass(param.Parameterized):

label = param.String(default='Test')
Expand Down Expand Up @@ -280,7 +284,7 @@ def test_dmap_apply_dynamic_with_param_method(self):


def test_nested_widgets():
df = pd._testing.makeDataFrame()
df = makeDataFrame()
column = RadioButtonGroup(value="A", options=list("ABC"))
ds = Dataset(df)
transform = util.transform.df_dim("*").groupby(["D", column]).mean()
Expand All @@ -295,7 +299,7 @@ def test_nested_widgets():


def test_slice_iloc():
df = pd._testing.makeDataFrame()
df = makeDataFrame()
column = IntSlider(start=10, end=40)
ds = Dataset(df)
transform = util.transform.df_dim("*").iloc[:column].mean(axis=0)
Expand All @@ -310,7 +314,7 @@ def test_slice_iloc():


def test_slice_loc():
df = pd._testing.makeDataFrame()
df = makeDataFrame()
df.index = np.arange(5, len(df) + 5)
column = IntSlider(start=10, end=40)
ds = Dataset(df)
Expand All @@ -330,7 +334,7 @@ def test_slice_loc():


def test_int_iloc():
df = pd._testing.makeDataFrame()
df = makeDataFrame()
column = IntSlider(start=10, end=40)
ds = Dataset(df)
transform = util.transform.df_dim("*").iloc[column]
Expand All @@ -345,7 +349,7 @@ def test_int_iloc():


def test_int_loc():
df = pd._testing.makeDataFrame()
df = makeDataFrame()
df.index = np.arange(5, len(df) + 5)
column = IntSlider(start=10, end=40)
ds = Dataset(df)
Expand Down
37 changes: 28 additions & 9 deletions holoviews/tests/element/test_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,8 +616,10 @@ def test_poly_geom_selection_inverted(self):
self.assertEqual(region, Rectangles([]) * Path([list(geom)+[(0.2, -0.15)]]))


@pytest.mark.skipif(shapely is None and spd is None, reason='Neither shapely nor spatialpandas are available')
class TestSpatialSelectColumnar:
__test__ = False
method = None

geometry_encl = np.array([
[-1, 0.5],
[ 1, 0.5],
Expand Down Expand Up @@ -660,25 +662,42 @@ def pandas_df(self):
def dask_df(self, pandas_df):
return dd.from_pandas(pandas_df, npartitions=2)

@pytest.fixture(scope="function")
def _method(self):
return self.method

@pytest.mark.parametrize("geometry,pt_mask", [(geometry_encl, pt_mask_encl),(geometry_noencl, pt_mask_noencl)])
class TestSpatialSelectColumnarPtMask:
def test_pandas(self, geometry, pt_mask, pandas_df):
mask = spatial_select_columnar(pandas_df.x, pandas_df.y, geometry)

def test_pandas(self, geometry, pt_mask, pandas_df, _method):
mask = spatial_select_columnar(pandas_df.x, pandas_df.y, geometry, _method)
assert np.array_equal(mask, pt_mask)

@dd_available
def test_dask(self, geometry, pt_mask, dask_df):
mask = spatial_select_columnar(dask_df.x, dask_df.y, geometry)
def test_dask(self, geometry, pt_mask, dask_df, _method):
mask = spatial_select_columnar(dask_df.x, dask_df.y, geometry, _method)
assert np.array_equal(mask.compute(), pt_mask)

def test_numpy(self, geometry, pt_mask, pandas_df):
mask = spatial_select_columnar(pandas_df.x.to_numpy(copy=True), pandas_df.y.to_numpy(copy=True), geometry)
def test_numpy(self, geometry, pt_mask, pandas_df, _method):
mask = spatial_select_columnar(pandas_df.x.to_numpy(copy=True), pandas_df.y.to_numpy(copy=True), geometry, _method)
assert np.array_equal(mask, pt_mask)


@pytest.mark.parametrize("geometry", [geometry_encl, geometry_noencl])
class TestSpatialSelectColumnarDaskMeta:
@dd_available
def test_meta_dtype(self, geometry, dask_df):
mask = spatial_select_columnar(dask_df.x, dask_df.y, geometry)
def test_meta_dtype(self, geometry, dask_df, _method):
mask = spatial_select_columnar(dask_df.x, dask_df.y, geometry, _method)
assert mask._meta.dtype == np.bool_


@pytest.mark.skipif(shapely is None, reason='Shapely not available')
class TestSpatialSelectColumnarShapely(TestSpatialSelectColumnar):
__test__ = True
method = 'shapely'


@pytest.mark.skipif(spd is None, reason='Spatialpandas not available')
class TestSpatialSelectColumnarSpatialpandas(TestSpatialSelectColumnar):
__test__ = True
method = 'spatialpandas'
6 changes: 3 additions & 3 deletions holoviews/tests/operation/test_datashader.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def test_aggregate_curve_datetimes(self):
def test_aggregate_curve_datetimes_dask(self):
df = pd.DataFrame(
data=np.arange(1000), columns=['a'],
index=pd.date_range('2019-01-01', freq='1T', periods=1000),
index=pd.date_range('2019-01-01', freq='1min', periods=1000),
)
ddf = dd.from_pandas(df, npartitions=4)
curve = Curve(ddf, kdims=['index'], vdims=['a'])
Expand Down Expand Up @@ -270,7 +270,7 @@ def test_aggregate_ndoverlay_count_cat_datetimes_microsecond_timebase(self):
self.assertEqual(imgs[1], expected2)

def test_aggregate_dt_xaxis_constant_yaxis(self):
df = pd.DataFrame({'y': np.ones(100)}, index=pd.date_range('1980-01-01', periods=100, freq='1T'))
df = pd.DataFrame({'y': np.ones(100)}, index=pd.date_range('1980-01-01', periods=100, freq='1min'))
img = rasterize(Curve(df), dynamic=False, width=3)
xs = np.array(['1980-01-01T00:16:30.000000', '1980-01-01T00:49:30.000000',
'1980-01-01T01:22:30.000000'], dtype='datetime64[us]')
Expand Down Expand Up @@ -868,7 +868,7 @@ def test_shade_categorical_images_grid(self):
self.assertEqual(shaded, expected)

def test_shade_dt_xaxis_constant_yaxis(self):
df = pd.DataFrame({'y': np.ones(100)}, index=pd.date_range('1980-01-01', periods=100, freq='1T'))
df = pd.DataFrame({'y': np.ones(100)}, index=pd.date_range('1980-01-01', periods=100, freq='1min'))
rgb = shade(rasterize(Curve(df), dynamic=False, width=3))
xs = np.array(['1980-01-01T00:16:30.000000', '1980-01-01T00:49:30.000000',
'1980-01-01T01:22:30.000000'], dtype='datetime64[us]')
Expand Down
7 changes: 6 additions & 1 deletion holoviews/tests/plotting/bokeh/test_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,12 @@ def test_cds_resolves(self):
'value': points.columns()})

def test_rangexy_datetime(self):
curve = Curve(pd._testing.makeTimeDataFrame(), 'index', 'C')
df = pd.DataFrame(
data = np.random.default_rng(2).standard_normal((30, 4)),
columns=list('ABCD'),
index=pd.date_range('2018-01-01', freq='D', periods=30),
)
curve = Curve(df, 'index', 'C')
stream = RangeXY(source=curve)
plot = bokeh_server_renderer.get_plot(curve)
callback = plot.callbacks[0]
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ filterwarnings = [
"ignore:datetime.datetime.utcfromtimestamp():DeprecationWarning:dateutil.tz.tz", # https://github.com/dateutil/dateutil/pull/1285
"ignore:datetime.datetime.utcfromtimestamp():DeprecationWarning:bokeh", # https://github.com/bokeh/bokeh/issues/13125
"ignore:datetime.datetime.utcnow():DeprecationWarning:bokeh", # https://github.com/bokeh/bokeh/issues/13125
# 2024-01: Pandas 2.2 problems in Dask
"ignore:When grouping with a length-1 list::dask.dataframe.groupby", # https://github.com/dask/dask/issues/10572
]

[tool.coverage]
Expand Down

0 comments on commit b21262b

Please sign in to comment.