Skip to content

Commit

Permalink
Add pandas ExtensionArray for storing homogeneous ragged arrays (#687)
Browse files Browse the repository at this point in the history
* RaggedArray implementation
* RaggedArray line aggregation support for pandas
* Dask RaggedArray support
  • Loading branch information
jonmmease authored and jbednar committed Mar 1, 2019
1 parent 6ff6276 commit 3171d88
Show file tree
Hide file tree
Showing 9 changed files with 1,819 additions and 7 deletions.
8 changes: 8 additions & 0 deletions datashader/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import absolute_import

from distutils.version import LooseVersion

import param
__version__ = str(param.version.Version(fpath=__file__, archive_commit="$Format:%h$",reponame="datashader"))

Expand All @@ -15,6 +17,12 @@
except ImportError:
pass

# Make RaggedArray pandas extension array available for
# pandas >= 0.24.0 is installed
from pandas import __version__ as pandas_version
if LooseVersion(pandas_version) >= LooseVersion('0.24.0'):
from . import datatypes # noqa (API import)

# make pyct's example/data commands available if possible
from functools import partial
try:
Expand Down
39 changes: 33 additions & 6 deletions datashader/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ def line(self, source, x, y, agg=None, axis=0):
Define a canvas and a pandas DataFrame with 6 rows
>>> import pandas as pd # doctest: +SKIP
... import numpy as np
... import datashader as ds
... from datashader import Canvas
... import datashader.transfer_functions as tf
... cvs = Canvas()
Expand All @@ -222,23 +223,23 @@ def line(self, source, x, y, agg=None, axis=0):
Aggregate one line across all rows, with coordinates df.A1 by df.B1
>>> agg = cvs.line(df, x='A1', y='B1', axis=0) # doctest: +SKIP
... tf.shade(agg)
... tf.spread(tf.shade(agg))
Aggregate two lines across all rows. The first with coordinates
df.A1 by df.B1 and the second with coordinates df.A2 by df.B2
>>> agg = cvs.line(df, x=['A1', 'A2'], y=['B1', 'B2'], axis=0) # doctest: +SKIP
... tf.shade(agg)
... tf.spread(tf.shade(agg))
Aggregate two lines across all rows where the lines share the same
x coordinates. The first line will have coordinates df.A1 by df.B1
and the second will have coordinates df.A1 by df.B2
>>> agg = cvs.line(df, x='A1', y=['B1', 'B2'], axis=0) # doctest: +SKIP
... tf.shade(agg)
... tf.spread(tf.shade(agg))
Aggregate 6 length-2 lines, one per row, where the ith line has
coordinates [df.A1[i], df.A2[i]] by [df.B1[i], df.B2[i]]
>>> agg = cvs.line(df, x=['A1', 'A2'], y=['B1', 'B2'], axis=1) # doctest: +SKIP
... tf.shade(agg)
... tf.spread(tf.shade(agg))
Aggregate 6 length-4 lines, one per row, where the x coordinates
of every line are [0, 1, 2, 3] and the y coordinates of the ith line
Expand All @@ -247,10 +248,32 @@ def line(self, source, x, y, agg=None, axis=0):
... x=np.arange(4),
... y=['A1', 'A2', 'B1', 'B2'],
... axis=1)
... tf.shade(agg)
... tf.spread(tf.shade(agg))
Aggregate RaggedArrays of variable length lines, one per row
(requires pandas >= 0.24.0)
>>> df_ragged = pd.DataFrame({ # doctest: +SKIP
... 'A1': pd.array([
... [1, 1.5], [2, 2.5, 3], [1.5, 2, 3, 4], [3.2, 4, 5]],
... dtype='Ragged[float32]'),
... 'B1': pd.array([
... [10, 12], [11, 14, 13], [10, 7, 9, 10], [7, 8, 12]],
... dtype='Ragged[float32]'),
... 'group': pd.Categorical([0, 1, 2, 1])
... })
...
... agg = cvs.line(df_ragged, x='A1', y='B1', axis=1)
... tf.spread(tf.shade(agg))
Aggregate RaggedArrays of variable length lines by group column,
one per row (requires pandas >= 0.24.0)
>>> agg = cvs.line(df_ragged, x='A1', y='B1', # doctest: +SKIP
... agg=ds.count_cat('group'), axis=1)
... tf.spread(tf.shade(agg))
"""
from .glyphs import (LineAxis0, LinesAxis1, LinesAxis1XConstant,
LinesAxis1YConstant, LineAxis0Multi)
LinesAxis1YConstant, LineAxis0Multi,
LinesAxis1Ragged)
from .reductions import any as any_rdn
if agg is None:
agg = any_rdn()
Expand Down Expand Up @@ -286,6 +309,9 @@ def line(self, source, x, y, agg=None, axis=0):
elif (isinstance(x, (list, tuple)) and
isinstance(y, np.ndarray)):
glyph = LinesAxis1YConstant(tuple(x), y)
elif (isinstance(x, (Number, string_types)) and
isinstance(y, (Number, string_types))):
glyph = LinesAxis1Ragged(x, y)
else:
raise ValueError("""
Invalid combination of x and y arguments to Canvas.line when axis=1.
Expand All @@ -302,6 +328,7 @@ def line(self, source, x, y, agg=None, axis=0):

return bypixel(source, self, glyph, agg)


# TODO re 'untested', below: Consider replacing with e.g. a 3x3
# array in the call to Canvas (plot_height=3,plot_width=3), then
# show the output as a numpy array that has a compact
Expand Down
Loading

0 comments on commit 3171d88

Please sign in to comment.