diff --git a/docs/notebooks/84_read_parquet.ipynb b/docs/notebooks/84_read_parquet.ipynb new file mode 100644 index 0000000000..e73191187f --- /dev/null +++ b/docs/notebooks/84_read_parquet.ipynb @@ -0,0 +1,197 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![image](https://jupyterlite.rtfd.io/en/latest/_static/badge.svg)](https://demo.leafmap.org/lab/index.html?path=notebooks/84_read_parquet.ipynb)\n", + "[![image](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/opengeos/leafmap/blob/master/examples/notebooks/84_read_parquet.ipynb)\n", + "[![image](https://img.shields.io/badge/Open-Planetary%20Computer-black?style=flat&logo=microsoft)](https://pccompute.westeurope.cloudapp.azure.com/compute/hub/user-redirect/git-pull?repo=https://github.com/opengeos/leafmap&urlpath=lab/tree/leafmap/examples/notebooks/84_read_parquet.ipynb&branch=master)\n", + "[![image](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/opengeos/leafmap/blob/master/examples/notebooks/84_read_parquet.ipynb)\n", + "[![image](https://mybinder.org/badge_logo.svg)](https://gishub.org/leafmap-binder)\n", + "\n", + "**Reading GeoParquet files and visualizing vector data interactively**\n", + "\n", + "Uncomment the following line to install [leafmap](https://leafmap.org) if needed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# %pip install -U leafmap lonboard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import leafmap" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Visualizing point data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "url = 'https://open.gishub.org/data/duckdb/cities.parquet'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Read GeoParquet and return a GeoPandas GeoDataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gdf = leafmap.read_parquet(url, return_type='gdf', src_crs='EPSG:4326')\n", + "gdf.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "View the GeoDataFrame interactively using folium." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gdf.explore()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Visualize the GeoDataFrame using lonboard." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "leafmap.view_vector(gdf, get_radius=20000, get_fill_color='blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Visualizing polygon data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "url = 'https://data.source.coop/giswqs/nwi/wetlands/DC_Wetlands.parquet'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gdf = leafmap.read_parquet(url, return_type='gdf', src_crs='EPSG:5070', dst_crs='EPSG:4326')\n", + "gdf.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gdf.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "leafmap.view_vector(gdf, get_fill_color=[0, 0, 255, 128])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use DuckDB." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import duckdb\n", + "import leafmap.deckgl as leafmap\n", + "\n", + "con = duckdb.connect()\n", + "con.install_extension(\"spatial\")\n", + "con.load_extension(\"spatial\")\n", + "\n", + "state = \"DC\" # Change to the US State of your choice\n", + "url = f\"https://data.source.coop/giswqs/nwi/wetlands/{state}_Wetlands.parquet\"\n", + "df = con.sql(f\"SELECT * EXCLUDE geometry, ST_AsText(ST_GeomFromWKB(geometry)) AS geometry FROM '{url}'\").df()\n", + "gdf = leafmap.df_to_gdf(df, src_crs=\"EPSG:5070\", dst_crs=\"EPSG:4326\")\n", + "\n", + "m = leafmap.Map()\n", + "m.add_gdf(gdf)\n", + "m" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/tutorials.md b/docs/tutorials.md index 6557e22ab0..a9ce11dcb7 100644 --- a/docs/tutorials.md +++ b/docs/tutorials.md @@ -97,6 +97,7 @@ 81. Downloading Microsoft and Google Building Footprints ([notebook](https://leafmap.org/notebooks/81_buildings)) 82. Visualizing PMTiles with leafmap ([notebook](https://leafmap.org/notebooks/82_pmtiles)) 83. Visualizing large vector datasets with lonboard ([notebook](https://leafmap.org/notebooks/83_vector_viz)) +84. Reading GeoParquet files and visualizing vector data interactively ([notebook](https://leafmap.org/notebooks/84_read_parquet)) ## Demo diff --git a/examples/README.md b/examples/README.md index 891455fd2e..c3e4ce2e55 100644 --- a/examples/README.md +++ b/examples/README.md @@ -104,6 +104,7 @@ 81. Downloading Microsoft and Google Building Footprints ([notebook](https://leafmap.org/notebooks/81_buildings)) 82. Visualizing PMTiles with leafmap ([notebook](https://leafmap.org/notebooks/82_pmtiles)) 83. Visualizing large vector datasets with lonboard ([notebook](https://leafmap.org/notebooks/83_vector_viz)) +84. Reading GeoParquet files and visualizing vector data interactively ([notebook](https://leafmap.org/notebooks/84_read_parquet)) ## Demo diff --git a/examples/notebooks/84_read_parquet.ipynb b/examples/notebooks/84_read_parquet.ipynb new file mode 100644 index 0000000000..e73191187f --- /dev/null +++ b/examples/notebooks/84_read_parquet.ipynb @@ -0,0 +1,197 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![image](https://jupyterlite.rtfd.io/en/latest/_static/badge.svg)](https://demo.leafmap.org/lab/index.html?path=notebooks/84_read_parquet.ipynb)\n", + "[![image](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/opengeos/leafmap/blob/master/examples/notebooks/84_read_parquet.ipynb)\n", + "[![image](https://img.shields.io/badge/Open-Planetary%20Computer-black?style=flat&logo=microsoft)](https://pccompute.westeurope.cloudapp.azure.com/compute/hub/user-redirect/git-pull?repo=https://github.com/opengeos/leafmap&urlpath=lab/tree/leafmap/examples/notebooks/84_read_parquet.ipynb&branch=master)\n", + "[![image](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/opengeos/leafmap/blob/master/examples/notebooks/84_read_parquet.ipynb)\n", + "[![image](https://mybinder.org/badge_logo.svg)](https://gishub.org/leafmap-binder)\n", + "\n", + "**Reading GeoParquet files and visualizing vector data interactively**\n", + "\n", + "Uncomment the following line to install [leafmap](https://leafmap.org) if needed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# %pip install -U leafmap lonboard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import leafmap" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Visualizing point data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "url = 'https://open.gishub.org/data/duckdb/cities.parquet'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Read GeoParquet and return a GeoPandas GeoDataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gdf = leafmap.read_parquet(url, return_type='gdf', src_crs='EPSG:4326')\n", + "gdf.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "View the GeoDataFrame interactively using folium." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gdf.explore()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Visualize the GeoDataFrame using lonboard." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "leafmap.view_vector(gdf, get_radius=20000, get_fill_color='blue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Visualizing polygon data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "url = 'https://data.source.coop/giswqs/nwi/wetlands/DC_Wetlands.parquet'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gdf = leafmap.read_parquet(url, return_type='gdf', src_crs='EPSG:5070', dst_crs='EPSG:4326')\n", + "gdf.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gdf.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "leafmap.view_vector(gdf, get_fill_color=[0, 0, 255, 128])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use DuckDB." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import duckdb\n", + "import leafmap.deckgl as leafmap\n", + "\n", + "con = duckdb.connect()\n", + "con.install_extension(\"spatial\")\n", + "con.load_extension(\"spatial\")\n", + "\n", + "state = \"DC\" # Change to the US State of your choice\n", + "url = f\"https://data.source.coop/giswqs/nwi/wetlands/{state}_Wetlands.parquet\"\n", + "df = con.sql(f\"SELECT * EXCLUDE geometry, ST_AsText(ST_GeomFromWKB(geometry)) AS geometry FROM '{url}'\").df()\n", + "gdf = leafmap.df_to_gdf(df, src_crs=\"EPSG:5070\", dst_crs=\"EPSG:4326\")\n", + "\n", + "m = leafmap.Map()\n", + "m.add_gdf(gdf)\n", + "m" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/leafmap/__init__.py b/leafmap/__init__.py index a758f695d3..d17f8f0187 100644 --- a/leafmap/__init__.py +++ b/leafmap/__init__.py @@ -5,6 +5,7 @@ __version__ = "0.28.1" import os +from .report import Report def _in_colab_shell(): @@ -25,6 +26,26 @@ def _use_folium(): return False +def view_vector(vector, zoom_to_layer=True, pickable=True, open_args={}, **kwargs): + """Visualize a vector dataset on the map. + + Args: + vector (Union[str, GeoDataFrame]): The file path or URL to the vector data, or a GeoDataFrame. + zoom_to_layer (bool, optional): Flag to zoom to the added layer. Defaults to True. + pickable (bool, optional): Flag to enable picking on the added layer. Defaults to True. + open_args (dict, optional): Additional keyword arguments that will be passed to gpd.read_file() if vector is a file path or URL. Defaults to {}. + **kwargs: Additional keyword arguments that will be passed to the vector layer. + + Returns: + None + """ + from .deckgl import Map + + m = Map() + m.add_vector(vector, zoom_to_layer, pickable, open_args, **kwargs) + return m + + if _use_folium(): from .foliumap import * else: @@ -40,6 +61,3 @@ def _use_folium(): "Please restart Jupyter kernel after installation if you encounter any errors when importing leafmap." ) raise Exception(e) - - -from .report import Report diff --git a/leafmap/common.py b/leafmap/common.py index f2170049b5..c1fce14e51 100644 --- a/leafmap/common.py +++ b/leafmap/common.py @@ -11757,7 +11757,7 @@ def vector_to_parquet( def df_to_gdf( - df, geometry="geometry", src_crs="EPSG:4326", dst_crs="EPSG:4326", **kwargs + df, geometry="geometry", src_crs="EPSG:4326", dst_crs=None, **kwargs ): """ Converts a pandas DataFrame to a GeoPandas GeoDataFrame. @@ -11766,7 +11766,7 @@ def df_to_gdf( df (pandas.DataFrame): The pandas DataFrame to convert. geometry (str): The name of the geometry column in the DataFrame. src_crs (str): The coordinate reference system (CRS) of the GeoDataFrame. Default is "EPSG:4326". - dst_crs (str): The target CRS of the GeoDataFrame. Default is "EPSG:4326". + dst_crs (str): The target CRS of the GeoDataFrame. Default is None Returns: geopandas.GeoDataFrame: The converted GeoPandas GeoDataFrame. @@ -11779,7 +11779,7 @@ def df_to_gdf( # Convert the pandas DataFrame to a GeoPandas GeoDataFrame gdf = gpd.GeoDataFrame(df, geometry=geometry, crs=src_crs, **kwargs) - if dst_crs != src_crs: + if dst_crs is not None and dst_crs != src_crs: gdf = gdf.to_crs(dst_crs) return gdf @@ -11802,3 +11802,112 @@ def check_url(url: str) -> bool: return False except requests.exceptions.RequestException: return False + + +def read_parquet( + source: str, + geometry: Optional[str] = None, + columns: Optional[Union[str, list]] = None, + exclude: Optional[Union[str, list]] = None, + db: Optional[str] = None, + table_name: Optional[str] = None, + sql: Optional[str] = None, + limit: Optional[int] = None, + src_crs: Optional[str] = None, + dst_crs: Optional[str] = None, + return_type: str = "gdf", + **kwargs, +): + """ + Read Parquet data from a source and return a GeoDataFrame or DataFrame. + + Args: + source (str): The path to the Parquet file or directory containing Parquet files. + geometry (str, optional): The name of the geometry column. Defaults to None. + columns (str or list, optional): The columns to select. Defaults to None (select all columns). + exclude (str or list, optional): The columns to exclude from the selection. Defaults to None. + db (str, optional): The DuckDB database path or alias. Defaults to None. + table_name (str, optional): The name of the table in the DuckDB database. Defaults to None. + sql (str, optional): The SQL query to execute. Defaults to None. + limit (int, optional): The maximum number of rows to return. Defaults to None (return all rows). + src_crs (str, optional): The source CRS (Coordinate Reference System) of the geometries. Defaults to None. + dst_crs (str, optional): The target CRS to reproject the geometries. Defaults to None. + return_type (str, optional): The type of object to return: + - 'gdf': GeoDataFrame (default) + - 'df': DataFrame + - 'numpy': NumPy array + - 'arrow': Arrow Table + - 'polars': Polars DataFrame + **kwargs: Additional keyword arguments that are passed to the DuckDB connection. + + Returns: + Union[gpd.GeoDataFrame, pd.DataFrame, np.ndarray]: The loaded data. + + Raises: + ValueError: If the columns or exclude arguments are not of the correct type. + + """ + import duckdb + + if isinstance(db, str): + con = duckdb.connect(db) + else: + con = duckdb.connect() + + con.install_extension("httpfs") + con.load_extension("httpfs") + + con.install_extension("spatial") + con.load_extension("spatial") + + if columns is None: + columns = "*" + elif isinstance(columns, list): + columns = ", ".join(columns) + elif not isinstance(columns, str): + raise ValueError("columns must be a list or a string.") + + if exclude is not None: + if isinstance(exclude, list): + exclude = ", ".join(exclude) + elif not isinstance(exclude, str): + raise ValueError("exclude_columns must be a list or a string.") + columns = f"{columns} EXCLUDE {exclude}" + + if return_type in ["df", "numpy", "arrow", "polars"]: + if sql is None: + sql = f"SELECT {columns} FROM '{source}'" + if limit is not None: + sql += f" LIMIT {limit}" + + if return_type == "df": + result = con.sql(sql, **kwargs).df() + elif return_type == "numpy": + result = con.sql(sql, **kwargs).fetchnumpy() + elif return_type == "arrow": + result = con.sql(sql, **kwargs).arrow() + elif return_type == "polars": + result = con.sql(sql, **kwargs).pl() + + if table_name is not None: + con.sql(f"CREATE OR REPLACE TABLE {table_name} AS FROM result", **kwargs) + + elif return_type == "gdf": + if geometry is None: + geometry = "geometry" + if sql is None: + # if src_crs is not None and dst_crs is not None: + # geom_sql = f"ST_AsText(ST_Transform(ST_GeomFromWKB({geometry}), '{src_crs}', '{dst_crs}', true)) AS {geometry}" + # else: + geom_sql = f"ST_AsText(ST_GeomFromWKB({geometry})) AS {geometry}" + sql = f"SELECT {columns} EXCLUDE {geometry}, {geom_sql} FROM '{source}'" + if limit is not None: + sql += f" LIMIT {limit}" + + df = con.sql(sql, **kwargs).df() + if table_name is not None: + con.sql(f"CREATE OR REPLACE TABLE {table_name} AS FROM df", **kwargs) + result = df_to_gdf(df, geometry=geometry, src_crs=src_crs, dst_crs=dst_crs) + + con.close() + return result diff --git a/requirements.txt b/requirements.txt index 2ab7762984..e703ca13ce 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ bqplot colour +duckdb folium gdown geojson