From 5695f17b50281b171fd4dd6c541a7576d34a8365 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Tue, 18 May 2021 13:17:27 +1200 Subject: [PATCH] Handle geopandas and shapely geometries via __geo_interface__ link (#1000) Initial stab at allowing PyGMT to accept Python objects that implement __geo_interface__, i.e. a GeoJSON-like format. Works by conversion to a temporary OGR_GMT file, which can then be read natively by GMT . This is currently tested via `pygmt.info` in test_geopandas.py on a geopandas.GeoDataFrame object only. Will handle raw GeoJSON dict-like objects properly via fiona in subsequent iterations. * Add intersphinx mapping to geopandas docs * Create tempfile_from_geojson function to handle __geo_interface__ * Add geopandas.GeoDataFrame to list of allowed table inputs to info * Handle generic __geo_interface__ objects via fiona and geopandas Hacky attempt to handle non-geopandas objects (e.g. shapely.geometry) that have a __geo_interface__ dictionary property associated with it. Still assumes that geopandas is installed (along with fiona), so not a perfect solution. Also added another test with different geometry types. * Install geopandas on the Python 3.9/NumPy 1.20 test build only * Mention optional geopandas dependency in install and maintenance docs * Tweak tempfile_from_geojson docstring and remove unused fiona code * Add geopandas.GeoDataFrame to standardized table-classes filler text Co-authored-by: Michael Grund --- .github/workflows/ci_tests.yaml | 4 ++ doc/conf.py | 1 + doc/install.rst | 5 ++- doc/maintenance.md | 9 +++-- pygmt/clib/session.py | 14 +++++-- pygmt/helpers/__init__.py | 2 +- pygmt/helpers/decorators.py | 14 ++++--- pygmt/helpers/tempfile.py | 44 ++++++++++++++++++++++ pygmt/helpers/utils.py | 2 + pygmt/tests/test_geopandas.py | 66 +++++++++++++++++++++++++++++++++ 10 files changed, 145 insertions(+), 16 deletions(-) create mode 100644 pygmt/tests/test_geopandas.py diff --git a/.github/workflows/ci_tests.yaml b/.github/workflows/ci_tests.yaml index dbacd83b6c8..8967ad675dc 100644 --- a/.github/workflows/ci_tests.yaml +++ b/.github/workflows/ci_tests.yaml @@ -45,11 +45,14 @@ jobs: # python-version: 3.7 # isDraft: true # Pair Python 3.7 with NumPy 1.17 and Python 3.9 with NumPy 1.20 + # Only install optional packages on Python 3.9/NumPy 1.20 include: - python-version: 3.7 numpy-version: '1.17' + optional-packages: '' - python-version: 3.9 numpy-version: '1.20' + optional-packages: 'geopandas' defaults: run: shell: bash -l {0} @@ -89,6 +92,7 @@ jobs: conda install -c conda-forge/label/dev gmt=6.2.0rc1 conda install numpy=${{ matrix.numpy-version }} \ pandas xarray netCDF4 packaging \ + ${{ matrix.optional-packages }} \ codecov coverage[toml] dvc ipython make \ pytest-cov pytest-mpl pytest>=6.0 \ sphinx-gallery diff --git a/doc/conf.py b/doc/conf.py index e43193f4060..d5f17603a5e 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -48,6 +48,7 @@ # intersphinx configuration intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), + "geopandas": ("https://geopandas.org/", None), "numpy": ("https://numpy.org/doc/stable/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), "xarray": ("https://xarray.pydata.org/en/stable/", None), diff --git a/doc/install.rst b/doc/install.rst index 75e9995331c..bc3ebee02f2 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -86,9 +86,10 @@ PyGMT requires the following libraries to be installed: * `netCDF4 `__ * `packaging `__ -The following are optional (but recommended) dependencies: +The following are optional dependencies: -* `IPython `__: For embedding the figures in Jupyter notebooks. +* `IPython `__: For embedding the figures in Jupyter notebooks (recommended). +* `GeoPandas `__: For using and plotting GeoDataFrame objects. Installing GMT and other dependencies ------------------------------------- diff --git a/doc/maintenance.md b/doc/maintenance.md index dabf808d76e..ea5f1892e4b 100644 --- a/doc/maintenance.md +++ b/doc/maintenance.md @@ -65,9 +65,12 @@ There are 9 configuration files located in `.github/workflows`: This is run on every commit to the *master* and Pull Request branches. It is also scheduled to run daily on the *master* branch. - In draft Pull Requests, only two jobs on Linux (minimum NEP29 Python/NumPy versions - and latest Python/NumPy versions) are triggered to save on Continuous Integration - resources. + In draft Pull Requests, only two jobs on Linux are triggered to save on + Continuous Integration resources: + + - Minimum [NEP29](https://numpy.org/neps/nep-0029-deprecation_policy) + Python/NumPy versions + - Latest Python/NumPy versions + optional packages (e.g. GeoPandas) 3. `ci_docs.yml` (Build documentation on Linux/macOS/Windows) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 9c442be7a04..81e3a27afa7 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -25,7 +25,7 @@ GMTInvalidInput, GMTVersionError, ) -from pygmt.helpers import data_kind, dummy_context, fmt_docstring +from pygmt.helpers import data_kind, dummy_context, fmt_docstring, tempfile_from_geojson FAMILIES = [ "GMT_IS_DATASET", @@ -1418,12 +1418,18 @@ def virtualfile_from_data( if check_kind == "raster" and kind not in ("file", "grid"): raise GMTInvalidInput(f"Unrecognized data type for grid: {type(data)}") - if check_kind == "vector" and kind not in ("file", "matrix", "vectors"): - raise GMTInvalidInput(f"Unrecognized data type: {type(data)}") + if check_kind == "vector" and kind not in ( + "file", + "matrix", + "vectors", + "geojson", + ): + raise GMTInvalidInput(f"Unrecognized data type for vector: {type(data)}") # Decide which virtualfile_from_ function to use _virtualfile_from = { "file": dummy_context, + "geojson": tempfile_from_geojson, "grid": self.virtualfile_from_grid, # Note: virtualfile_from_matrix is not used because a matrix can be # converted to vectors instead, and using vectors allows for better @@ -1433,7 +1439,7 @@ def virtualfile_from_data( }[kind] # Ensure the data is an iterable (Python list or tuple) - if kind in ("file", "grid"): + if kind in ("file", "geojson", "grid"): _data = (data,) elif kind == "vectors": _data = [np.atleast_1d(x), np.atleast_1d(y)] diff --git a/pygmt/helpers/__init__.py b/pygmt/helpers/__init__.py index 2b372c59aa1..4b9c4051e80 100644 --- a/pygmt/helpers/__init__.py +++ b/pygmt/helpers/__init__.py @@ -7,7 +7,7 @@ kwargs_to_strings, use_alias, ) -from pygmt.helpers.tempfile import GMTTempFile, unique_name +from pygmt.helpers.tempfile import GMTTempFile, tempfile_from_geojson, unique_name from pygmt.helpers.utils import ( args_in_kwargs, build_arg_string, diff --git a/pygmt/helpers/decorators.py b/pygmt/helpers/decorators.py index f3cc66876bb..d3d66a6cb98 100644 --- a/pygmt/helpers/decorators.py +++ b/pygmt/helpers/decorators.py @@ -203,11 +203,12 @@ def fmt_docstring(module_func): Parameters ---------- - data : str or numpy.ndarray or pandas.DataFrame or xarray.Dataset + data : str or numpy.ndarray or pandas.DataFrame or xarray.Dataset or geo... Pass in either a file name to an ASCII data table, a 2D - :class:`numpy.ndarray`, a :class:`pandas.DataFrame`, or an + :class:`numpy.ndarray`, a :class:`pandas.DataFrame`, an :class:`xarray.Dataset` made up of 1D :class:`xarray.DataArray` - data variables containing the tabular data. + data variables, or a :class:`geopandas.GeoDataFrame` containing the + tabular data. region : str or list *Required if this is the first plot command*. *xmin/xmax/ymin/ymax*\ [**+r**][**+u**\ *unit*]. @@ -237,13 +238,14 @@ def fmt_docstring(module_func): "numpy.ndarray", "pandas.DataFrame", "xarray.Dataset", - # "geopandas.GeoDataFrame", + "geopandas.GeoDataFrame", ] ) filler_text["table-classes"] = ( - ":class:`numpy.ndarray`, a :class:`pandas.DataFrame`, or an\n" + ":class:`numpy.ndarray`, a :class:`pandas.DataFrame`, an\n" " :class:`xarray.Dataset` made up of 1D :class:`xarray.DataArray`\n" - " data variables containing the tabular data" + " data variables, or a :class:`geopandas.GeoDataFrame` containing the\n" + " tabular data" ) for marker, text in COMMON_OPTIONS.items(): diff --git a/pygmt/helpers/tempfile.py b/pygmt/helpers/tempfile.py index 6b14d769015..4f243a100fa 100644 --- a/pygmt/helpers/tempfile.py +++ b/pygmt/helpers/tempfile.py @@ -3,6 +3,7 @@ """ import os import uuid +from contextlib import contextmanager from tempfile import NamedTemporaryFile import numpy as np @@ -104,3 +105,46 @@ def loadtxt(self, **kwargs): Data read from the text file. """ return np.loadtxt(self.name, **kwargs) + + +@contextmanager +def tempfile_from_geojson(geojson): + """ + Saves any geo-like Python object which implements ``__geo_interface__`` + (e.g. a geopandas.GeoDataFrame or shapely.geometry) to a temporary OGR_GMT + text file. + + Parameters + ---------- + geojson : geopandas.GeoDataFrame + A geopandas GeoDataFrame, or any geo-like Python object which + implements __geo_interface__, i.e. a GeoJSON. + + Yields + ------ + tmpfilename : str + A temporary OGR_GMT format file holding the geographical data. + E.g. '1a2b3c4d5e6.gmt'. + """ + with GMTTempFile(suffix=".gmt") as tmpfile: + os.remove(tmpfile.name) # ensure file is deleted first + ogrgmt_kwargs = dict(filename=tmpfile.name, driver="OGR_GMT", mode="w") + try: + # Using geopandas.to_file to directly export to OGR_GMT format + geojson.to_file(**ogrgmt_kwargs) + except AttributeError: + # pylint: disable=import-outside-toplevel + # Other 'geo' formats which implement __geo_interface__ + import json + + import fiona + import geopandas as gpd + + with fiona.Env(): + jsontext = json.dumps(geojson.__geo_interface__) + # Do Input/Output via Fiona virtual memory + with fiona.io.MemoryFile(file_or_bytes=jsontext.encode()) as memfile: + geoseries = gpd.GeoSeries.from_file(filename=memfile) + geoseries.to_file(**ogrgmt_kwargs) + + yield tmpfile.name diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index ec970df26a4..177e3d23f62 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -70,6 +70,8 @@ def data_kind(data, x=None, y=None, z=None): kind = "file" elif isinstance(data, xr.DataArray): kind = "grid" + elif hasattr(data, "__geo_interface__"): + kind = "geojson" elif data is not None: kind = "matrix" else: diff --git a/pygmt/tests/test_geopandas.py b/pygmt/tests/test_geopandas.py new file mode 100644 index 00000000000..d7f04e6cb45 --- /dev/null +++ b/pygmt/tests/test_geopandas.py @@ -0,0 +1,66 @@ +""" +Tests on integration with geopandas. +""" +import numpy.testing as npt +import pytest +from pygmt import info + +gpd = pytest.importorskip("geopandas") +shapely = pytest.importorskip("shapely") + + +@pytest.fixture(scope="module", name="gdf") +def fixture_gdf(): + """ + Create a sample geopandas GeoDataFrame object with shapely geometries of + different types. + """ + linestring = shapely.geometry.LineString([(20, 15), (30, 15)]) + polygon = shapely.geometry.Polygon([(20, 10), (23, 10), (23, 14), (20, 14)]) + multipolygon = shapely.geometry.shape( + { + "type": "MultiPolygon", + "coordinates": [ + [ + [[0, 0], [20, 0], [10, 20], [0, 0]], # Counter-clockwise + [[3, 2], [10, 16], [17, 2], [3, 2]], # Clockwise + ], + [[[6, 4], [14, 4], [10, 12], [6, 4]]], # Counter-clockwise + [[[25, 5], [30, 10], [35, 5], [25, 5]]], + ], + } + ) + # Multipolygon first so the OGR_GMT file has @GMULTIPOLYGON in the header + gdf = gpd.GeoDataFrame( + index=["multipolygon", "polygon", "linestring"], + geometry=[multipolygon, polygon, linestring], + ) + + return gdf + + +def test_geopandas_info_geodataframe(gdf): + """ + Check that info can return the bounding box region from a + geopandas.GeoDataFrame. + """ + output = info(table=gdf, per_column=True) + npt.assert_allclose(actual=output, desired=[0.0, 35.0, 0.0, 20.0]) + + +@pytest.mark.parametrize( + "geomtype,desired", + [ + ("multipolygon", [0.0, 35.0, 0.0, 20.0]), + ("polygon", [20.0, 23.0, 10.0, 14.0]), + ("linestring", [20.0, 30.0, 15.0, 15.0]), + ], +) +def test_geopandas_info_shapely(gdf, geomtype, desired): + """ + Check that info can return the bounding box region from a shapely.geometry + object that has a __geo_interface__ property. + """ + geom = gdf.loc[geomtype].geometry + output = info(table=geom, per_column=True) + npt.assert_allclose(actual=output, desired=desired)