diff --git a/doc/assets/diagram.png b/doc/assets/diagram.png
index 28d55d592..255d5f47a 100644
Binary files a/doc/assets/diagram.png and b/doc/assets/diagram.png differ
diff --git a/doc/assets/diagram.svg b/doc/assets/diagram.svg
index 392f5c5a8..a0825a141 100644
--- a/doc/assets/diagram.svg
+++ b/doc/assets/diagram.svg
@@ -10,6 +10,7 @@
width="875.71826mm"
sodipodi:docname="diagram.svg"
inkscape:export-filename="diagram.png"
+ xml:space="preserve"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:xlink="http://www.w3.org/1999/xlink"
@@ -17,108 +18,85 @@
xmlns:svg="http://www.w3.org/2000/svg"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:cc="http://creativecommons.org/ns#"
- xmlns:dc="http://purl.org/dc/elements/1.1/">
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- image/svg+xml
-
-
-
-
- image/svg+xml
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Ibis
-
- Ibis
-
-
- Data libraries
- Data libraries
-
-
-
-
-
- .plot() API
- .plot() APIRepresentation
- RepresentationPlotting output
- Plotting output
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- NetworkX
- NetworkX
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+ style="fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#2c7fb8;stroke-width:3.48217;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
diff --git a/doc/index.md b/doc/index.md
index 0a3d1171f..1f9396819 100644
--- a/doc/index.md
+++ b/doc/index.md
@@ -101,6 +101,7 @@ alt: Works with GeoPandas
align: center
---
:::
+
:::{tab-item} Polars
```python
import polars
@@ -116,6 +117,24 @@ align: center
---
:::
+:::{tab-item} DuckDB
+```python
+import duckdb
+import hvplot.duckdb
+from bokeh.sampledata.autompg import autompg_clean as df
+
+df_duckdb = duckdb.from_df(df)
+table = df_duckdb.groupby(['origin', 'mfr'])['mpg'].mean().sort_values().tail(5)
+table.hvplot.barh('mfr', 'mpg', by='origin', stacked=True)
+```
+```{image} ./_static/home/pandas.gif
+---
+alt: Works with DuckDB
+align: center
+---
+```
+
+:::
:::{tab-item} Intake
```python
import hvplot.intake
diff --git a/doc/user_guide/Integrations.ipynb b/doc/user_guide/Integrations.ipynb
index 39ce9f3f1..76676d013 100644
--- a/doc/user_guide/Integrations.ipynb
+++ b/doc/user_guide/Integrations.ipynb
@@ -254,19 +254,13 @@
},
{
"cell_type": "markdown",
- "id": "a46e377e-729a-4f99-b5d3-83b0736cb8a3",
+ "id": "7474a792-2cfd-4139-a1cd-872f913fa07b",
"metadata": {},
"source": [
":::{note}\n",
"Added in version `0.9.0`.\n",
- ":::"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "7474a792-2cfd-4139-a1cd-872f913fa07b",
- "metadata": {},
- "source": [
+ ":::\n",
+ "\n",
":::{important}\n",
"While other data sources like `Pandas` or `Dask` have built-in support in HoloViews, as of version 1.17.1 this is not yet the case for `Polars`. You can track this [issue](https://github.com/holoviz/holoviews/issues/5939) to follow the evolution of this feature in HoloViews. Internally hvPlot simply selects the columns that contribute to the plot and casts them to a Pandas object using Polars' `.to_pandas()` method.\n",
":::"
@@ -327,6 +321,111 @@
"df_polars['A'].hvplot.line(height=150)"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "efc2f45e",
+ "metadata": {},
+ "source": [
+ "#### DuckDB"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "db91860c",
+ "metadata": {},
+ "source": [
+ ":::{note}\n",
+ "Added in version `0.11.0`.\n",
+ ":::"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0d6460d0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "\n",
+ "df_pandas = pd.DataFrame(np.random.randn(1000, 4), columns=list('ABCD')).cumsum()\n",
+ "df_pandas.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "21638d45",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import hvplot.duckdb # noqa \n",
+ "import duckdb\n",
+ "\n",
+ "connection = duckdb.connect(':memory:')\n",
+ "relation = duckdb.from_df(df_pandas, connection=connection)\n",
+ "relation.to_view(\"example_view\");"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "40b56f16",
+ "metadata": {},
+ "source": [
+ "`.hvplot()` supports [DuckDB](https://duckdb.org/docs/api/python/overview.html) `DuckDBPyRelation` and `DuckDBConnection` objects."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f588e3fe",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "relation.hvplot.line(y=['A', 'B', 'C', 'D'], height=150)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "68a47856",
+ "metadata": {},
+ "source": [
+ "`DuckDBPyRelation` is a bit more optimized because it handles column subsetting directly within DuckDB before the data is converted to a `pd.DataFrame`.\n",
+ "\n",
+ "So, it's a good idea to use the `connection.sql()` method when possible, which gives you a `DuckDBPyRelation`, instead of `connection.execute()`, which returns a `DuckDBPyConnection`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "214c60ee",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sql_expr = \"SELECT * FROM example_view WHERE A > 0 AND B > 0\"\n",
+ "connection.sql(sql_expr).hvplot.line(y=['A', 'B'], hover_cols=[\"C\"], height=150) # subsets A, B, C"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2a2f61d4",
+ "metadata": {},
+ "source": [
+ "Alternatively, you can directly subset the desired columns in the SQL expression."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5ce25c3d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sql_expr = \"SELECT A, B, C FROM example_view WHERE A > 0 AND B > 0\"\n",
+ "connection.execute(sql_expr).hvplot.line(y=['A', 'B'], hover_cols=[\"C\"], height=150)"
+ ]
+ },
{
"cell_type": "markdown",
"id": "25a6e724-6a84-4bff-9108-ac71dcfa9116",
diff --git a/doc/user_guide/Introduction.ipynb b/doc/user_guide/Introduction.ipynb
index 0a25dec8b..a15e91a94 100644
--- a/doc/user_guide/Introduction.ipynb
+++ b/doc/user_guide/Introduction.ipynb
@@ -15,6 +15,7 @@
"\n",
"* [Pandas](https://pandas.pydata.org): DataFrame, Series (columnar/tabular data)\n",
"* [Rapids cuDF](https://docs.rapids.ai/api/cudf/stable/): GPU DataFrame, Series (columnar/tabular data)\n",
+ "* [DuckDB](https://www.duckdb.org/): DuckDB is a fast in-process analytical database\n",
"* [Polars](https://www.pola.rs/): Polars is a fast DataFrame library/in-memory query engine (columnar/tabular data)\n",
"* [Dask](https://www.dask.org): DataFrame, Series (distributed/out of core arrays and columnar data)\n",
"* [XArray](https://xarray.pydata.org): Dataset, DataArray (labelled multidimensional arrays)\n",
diff --git a/envs/py3.10-tests.yaml b/envs/py3.10-tests.yaml
index 34c9b6d0c..6d17eab82 100644
--- a/envs/py3.10-tests.yaml
+++ b/envs/py3.10-tests.yaml
@@ -21,6 +21,7 @@ dependencies:
- dask
- dask>=2021.3.0
- datashader>=0.6.5
+ - duckdb
- fiona
- fugue
- fugue-sql-antlr>=0.2.0
diff --git a/envs/py3.11-docs.yaml b/envs/py3.11-docs.yaml
index f8c29d248..8df704288 100644
--- a/envs/py3.11-docs.yaml
+++ b/envs/py3.11-docs.yaml
@@ -20,6 +20,7 @@ dependencies:
- colorcet>=2
- dask>=2021.3.0
- datashader>=0.6.5
+ - duckdb
- fiona
- fugue
- fugue-sql-antlr>=0.2.0
diff --git a/envs/py3.11-tests.yaml b/envs/py3.11-tests.yaml
index a13f4d400..292decce8 100644
--- a/envs/py3.11-tests.yaml
+++ b/envs/py3.11-tests.yaml
@@ -21,6 +21,7 @@ dependencies:
- dask
- dask>=2021.3.0
- datashader>=0.6.5
+ - duckdb
- fiona
- fugue
- fugue-sql-antlr>=0.2.0
diff --git a/envs/py3.12-tests.yaml b/envs/py3.12-tests.yaml
index 2f9e4d653..a429d17eb 100644
--- a/envs/py3.12-tests.yaml
+++ b/envs/py3.12-tests.yaml
@@ -21,6 +21,7 @@ dependencies:
- dask
- dask>=2021.3.0
- datashader>=0.6.5
+ - duckdb
- fiona
- fugue
- fugue-sql-antlr>=0.2.0
diff --git a/envs/py3.9-tests.yaml b/envs/py3.9-tests.yaml
index 45bfee438..5b354ff7f 100644
--- a/envs/py3.9-tests.yaml
+++ b/envs/py3.9-tests.yaml
@@ -20,6 +20,7 @@ dependencies:
- dask
- dask>=2021.3.0
- datashader>=0.6.5
+ - duckdb
- fiona
- fugue
- fugue-sql-antlr>=0.2.0
diff --git a/hvplot/converter.py b/hvplot/converter.py
index 29cc84566..0f90fc800 100644
--- a/hvplot/converter.py
+++ b/hvplot/converter.py
@@ -55,6 +55,7 @@
is_tabular,
is_series,
is_dask,
+ is_duckdb,
is_intake,
is_cudf,
is_streamz,
@@ -1094,6 +1095,9 @@ def _process_data(
elif is_dask(data):
datatype = 'dask'
self.data = data.persist() if persist else data
+ elif is_duckdb(data):
+ datatype = 'duckdb'
+ self.data = data
elif is_cudf(data):
datatype = 'cudf'
self.data = data
diff --git a/hvplot/duckdb.py b/hvplot/duckdb.py
new file mode 100644
index 000000000..3f53bb630
--- /dev/null
+++ b/hvplot/duckdb.py
@@ -0,0 +1,27 @@
+"""Adds the `.hvplot` method to duckdb.DuckDBPyRelation and duckdb.DuckDBPyConnection"""
+
+
+def patch(name='hvplot', interactive='interactive', extension='bokeh', logo=False):
+ from hvplot.plotting.core import hvPlotTabularDuckDB
+ from . import post_patch, _module_extensions
+
+ if 'hvplot.duckdb' not in _module_extensions:
+ try:
+ import duckdb
+ except ImportError:
+ raise ImportError(
+ 'Could not patch plotting API onto DuckDB. DuckDB could not be imported.'
+ )
+
+ # Patching for DuckDBPyRelation and DuckDBPyConnection
+ _patch_duckdb_plot = lambda self: hvPlotTabularDuckDB(self) # noqa: E731
+ _patch_duckdb_plot.__doc__ = hvPlotTabularDuckDB.__call__.__doc__
+ plot_prop_duckdb = property(_patch_duckdb_plot)
+ setattr(duckdb.DuckDBPyRelation, name, plot_prop_duckdb)
+ setattr(duckdb.DuckDBPyConnection, name, plot_prop_duckdb)
+ _module_extensions.add('hvplot.duckdb')
+
+ post_patch(extension, logo)
+
+
+patch()
diff --git a/hvplot/plotting/__init__.py b/hvplot/plotting/__init__.py
index d58ad1ace..e5038abd7 100644
--- a/hvplot/plotting/__init__.py
+++ b/hvplot/plotting/__init__.py
@@ -1,5 +1,5 @@
import holoviews as hv
-from ..util import with_hv_extension, is_polars
+from ..util import with_hv_extension, is_duckdb, is_polars
from .core import hvPlot, hvPlotTabular # noqa
@@ -34,6 +34,11 @@ def plot(data, kind, **kwargs):
from .core import hvPlotTabularPolars
return hvPlotTabularPolars(data)(kind=kind, **no_none_kwargs)
+
+ elif is_duckdb(data):
+ from .core import hvPlotTabularDuckDB
+
+ return hvPlotTabularDuckDB(data)(kind=kind, **no_none_kwargs)
return hvPlotTabular(data)(kind=kind, **no_none_kwargs)
diff --git a/hvplot/plotting/core.py b/hvplot/plotting/core.py
index c06abaa70..a28148d70 100644
--- a/hvplot/plotting/core.py
+++ b/hvplot/plotting/core.py
@@ -1864,6 +1864,89 @@ def labels(self, x=None, y=None, text=None, **kwds):
return self(x, y, text=text, kind='labels', **kwds)
+class hvPlotTabularDuckDB(hvPlotTabular):
+ def _get_converter(self, x=None, y=None, kind=None, **kwds):
+ import duckdb
+ from duckdb.typing import (
+ BIGINT,
+ FLOAT,
+ DOUBLE,
+ INTEGER,
+ SMALLINT,
+ TINYINT,
+ UBIGINT,
+ UINTEGER,
+ USMALLINT,
+ UTINYINT,
+ HUGEINT,
+ )
+
+ params = dict(self._metadata, **kwds)
+ x = x or params.pop('x', None)
+ y = y or params.pop('y', None)
+ kind = kind or params.pop('kind', None)
+
+ # Handle DuckDB Relation and Connection objects
+ if isinstance(self._data, (duckdb.DuckDBPyConnection, duckdb.DuckDBPyRelation)):
+ if isinstance(self._data, duckdb.DuckDBPyConnection):
+ data = self._data.df()
+ else:
+ data = self._data
+
+ if params.get('hover_cols') != 'all':
+ data_columns = data.columns
+ possible_columns = [
+ [v] if isinstance(v, str) else v
+ for v in params.values()
+ if isinstance(v, (str, list))
+ ]
+
+ columns = (set(data_columns) & set(itertools.chain(*possible_columns))) or {
+ data_columns[0]
+ }
+ if y is None:
+ # When y is not specified HoloViewsConverter finds all the numeric
+ # columns and use them as y values (see _process_chart_y). We need
+ # to include these columns too.
+
+ if isinstance(data, duckdb.DuckDBPyRelation):
+ numeric_columns = data.select_types(
+ [
+ BIGINT,
+ FLOAT,
+ DOUBLE,
+ INTEGER,
+ SMALLINT,
+ TINYINT,
+ UBIGINT,
+ UINTEGER,
+ USMALLINT,
+ UTINYINT,
+ HUGEINT,
+ ]
+ ).columns
+ else:
+ numeric_columns = data.select_dtypes(include='number').columns
+ columns |= set(numeric_columns)
+ xs = x if is_list_like(x) else (x,)
+ ys = y if is_list_like(y) else (y,)
+ columns |= {*xs, *ys}
+ columns.discard(None)
+
+ if isinstance(data, duckdb.DuckDBPyRelation):
+ columns = sorted(columns, key=lambda c: data_columns.index(c))
+ data = data.select(*columns).to_df()
+ else:
+ columns = sorted(columns, key=lambda c: data.columns.get_loc(c))
+ data = data[list(columns)]
+ else:
+ raise ValueError(
+ 'Only duckdb.DuckDBPyConnection and duckdb.DuckDBPyRelation are supported'
+ )
+
+ return HoloViewsConverter(data, x, y, kind=kind, **params)
+
+
class hvPlotTabularPolars(hvPlotTabular):
def _get_converter(self, x=None, y=None, kind=None, **kwds):
import polars as pl
diff --git a/hvplot/tests/testpatch.py b/hvplot/tests/testpatch.py
index ae05e488d..9ed865ae6 100644
--- a/hvplot/tests/testpatch.py
+++ b/hvplot/tests/testpatch.py
@@ -128,3 +128,30 @@ def test_polars_lazyframe_patched(self):
pldf = pl.LazyFrame({'x': [1, 3, 5], 'y': [2, 4, 6]})
self.assertIsInstance(pldf.hvplot, hvPlotTabular)
+
+
+class TestPatchDuckDB(TestCase):
+ def setUp(self):
+ try:
+ import duckdb # noqa
+ except ImportError:
+ raise SkipTest('DuckDB not available')
+ import hvplot.duckdb # noqa
+
+ def test_duckdb_relation_patched(self):
+ import duckdb
+
+ df = pd.DataFrame({'x': [1, 2, 3], 'y': [1, 2, 3]})
+ connection = duckdb.connect(':memory:')
+ relation = duckdb.from_df(df, connection=connection)
+ self.assertIsInstance(relation.hvplot, hvPlotTabular)
+
+ def test_duckdb_connection_patched(self):
+ import duckdb
+
+ df = pd.DataFrame({'x': [1, 2, 3], 'y': [1, 2, 3]})
+ connection = duckdb.connect(':memory:')
+ duckdb.from_df(df, connection=connection).to_view('test_connection')
+ self.assertIsInstance(
+ connection.execute('SELECT * FROM test_connection').hvplot, hvPlotTabular
+ )
diff --git a/hvplot/tests/testplotting.py b/hvplot/tests/testplotting.py
index e7c747e0b..bdf54b139 100644
--- a/hvplot/tests/testplotting.py
+++ b/hvplot/tests/testplotting.py
@@ -4,10 +4,14 @@
from unittest import TestCase
+import pytest
import pandas as pd
from parameterized import parameterized
+import holoviews as hv
+from hvplot.plotting import plot
+from hvplot.tests.util import makeDataFrame
from hvplot.converter import HoloViewsConverter
no_args = ['line', 'area', 'hist', 'box', 'kde', 'density', 'bar', 'barh']
@@ -50,3 +54,20 @@ def test_pandas_dataframe_plot_does_not_implement_pie(self):
class TestPandasHvplotPlotting(TestPandasHoloviewsPlotting):
def setUp(self):
pd.options.plotting.backend = 'hvplot'
+
+
+def test_plot_supports_duckdb_relation():
+ duckdb = pytest.importorskip('duckdb')
+ connection = duckdb.connect(':memory:')
+ relation = duckdb.from_df(makeDataFrame(), connection=connection)
+ out = plot(relation, 'line')
+ assert isinstance(out, hv.NdOverlay)
+
+
+def test_plot_supports_duckdb_connection():
+ duckdb = pytest.importorskip('duckdb')
+ connection = duckdb.connect(':memory:')
+ relation = duckdb.from_df(makeDataFrame(), connection=connection)
+ relation.to_view('test')
+ out = plot(connection.execute('SELECT * FROM test'), 'line')
+ assert isinstance(out, hv.NdOverlay)
diff --git a/hvplot/util.py b/hvplot/util.py
index a0ffe3361..8a8c1079c 100644
--- a/hvplot/util.py
+++ b/hvplot/util.py
@@ -398,6 +398,14 @@ def is_dask(data):
return isinstance(data, (dd.DataFrame, dd.Series))
+def is_duckdb(data):
+ if not check_library(data, 'duckdb'):
+ return False
+ import duckdb
+
+ return isinstance(data, (duckdb.DuckDBPyRelation, duckdb.DuckDBPyConnection))
+
+
def is_polars(data):
if not check_library(data, 'polars'):
return False
diff --git a/pyproject.toml b/pyproject.toml
index fb8d936fd..5f8fe5a8c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -80,6 +80,7 @@ tests = [
"polars",
"dask",
"spatialpandas",
+ "duckdb",
]
# In 0.9 fugue added the sql extra but didn't add a fugue-sql package, removing the sql deps from fugue
# Adding them manually here
@@ -107,6 +108,7 @@ graphviz = [
examples = [
"dask[dataframe] >=2021.3.0",
"datashader >=0.6.5",
+ "duckdb",
"fugue[sql]",
"hvplot[fugue-sql]",
"ibis-framework[duckdb]", # ibis-duckdb on conda