From af645c28609f1ba150d11ffe0bcb5977c4c0482d Mon Sep 17 00:00:00 2001 From: Meghan Jones Date: Tue, 23 Mar 2021 15:36:13 -0400 Subject: [PATCH] Wrap blockmean (#1092) * Rename blockmedian.py to blockm.py * Refactor blockmedian to support mean and mode * Wrap method blockmean * Add tests for blockmean method Co-authored-by: Dongdong Tian Co-authored-by: Wei Ji <23487320+weiji14@users.noreply.github.com> --- doc/api/index.rst | 1 + pygmt/__init__.py | 1 + pygmt/src/__init__.py | 2 +- pygmt/src/{blockmedian.py => blockm.py} | 146 +++++++++++++++++++----- pygmt/tests/test_blockmean.py | 78 +++++++++++++ 5 files changed, 199 insertions(+), 29 deletions(-) rename pygmt/src/{blockmedian.py => blockm.py} (51%) create mode 100644 pygmt/tests/test_blockmean.py diff --git a/doc/api/index.rst b/doc/api/index.rst index 58a41dd8cec..14a8fd31708 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -67,6 +67,7 @@ Operations on tabular data: .. autosummary:: :toctree: generated + blockmean blockmedian surface diff --git a/pygmt/__init__.py b/pygmt/__init__.py index 17e6f295b94..2aeb76d10a6 100644 --- a/pygmt/__init__.py +++ b/pygmt/__init__.py @@ -29,6 +29,7 @@ from pygmt.session_management import begin as _begin from pygmt.session_management import end as _end from pygmt.src import ( + blockmean, blockmedian, config, grd2cpt, diff --git a/pygmt/src/__init__.py b/pygmt/src/__init__.py index 93930a3a55c..1ea68b899fc 100644 --- a/pygmt/src/__init__.py +++ b/pygmt/src/__init__.py @@ -3,7 +3,7 @@ """ # pylint: disable=import-outside-toplevel from pygmt.src.basemap import basemap -from pygmt.src.blockmedian import blockmedian +from pygmt.src.blockm import blockmean, blockmedian from pygmt.src.coast import coast from pygmt.src.colorbar import colorbar from pygmt.src.config import config diff --git a/pygmt/src/blockmedian.py b/pygmt/src/blockm.py similarity index 51% rename from pygmt/src/blockmedian.py rename to pygmt/src/blockm.py index 350f12ba5f0..a9cf45f4616 100644 --- a/pygmt/src/blockmedian.py +++ b/pygmt/src/blockm.py @@ -1,5 +1,5 @@ """ -blockmedian - Block average (x,y,z) data tables by median estimation. +blockm - Block average (x,y,z) data tables by mean or median estimation. """ import pandas as pd from pygmt.clib import Session @@ -15,6 +15,122 @@ ) +def _blockm(block_method, table, outfile, **kwargs): + r""" + Block average (x,y,z) data tables by mean or median estimation. + + Reads arbitrarily located (x,y,z) triples [or optionally weighted + quadruples (x,y,z,w)] from a table and writes to the output a mean or + median (depending on ``block_method``) position and value for every + non-empty block in a grid region defined by the ``region`` and ``spacing`` + parameters. + + Parameters + ---------- + block_method : str + Name of the GMT module to call. Must be "blockmean" or "blockmedian". + + Returns + ------- + output : pandas.DataFrame or None + Return type depends on whether the ``outfile`` parameter is set: + + - :class:`pandas.DataFrame` table with (x, y, z) columns if ``outfile`` + is not set + - None if ``outfile`` is set (filtered output will be stored in file + set by ``outfile``) + """ + + kind = data_kind(table) + with GMTTempFile(suffix=".csv") as tmpfile: + with Session() as lib: + if kind == "matrix": + if not hasattr(table, "values"): + raise GMTInvalidInput(f"Unrecognized data type: {type(table)}") + file_context = lib.virtualfile_from_matrix(table.values) + elif kind == "file": + if outfile is None: + raise GMTInvalidInput("Please pass in a str to 'outfile'") + file_context = dummy_context(table) + else: + raise GMTInvalidInput(f"Unrecognized data type: {type(table)}") + + with file_context as infile: + if outfile is None: + outfile = tmpfile.name + arg_str = " ".join([infile, build_arg_string(kwargs), "->" + outfile]) + lib.call_module(module=block_method, args=arg_str) + + # Read temporary csv output to a pandas table + if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame + result = pd.read_csv(tmpfile.name, sep="\t", names=table.columns) + elif outfile != tmpfile.name: # return None if outfile set, output in outfile + result = None + + return result + + +@fmt_docstring +@use_alias( + I="spacing", + R="region", + V="verbose", + a="aspatial", + f="coltypes", + r="registration", +) +@kwargs_to_strings(R="sequence") +def blockmean(table, outfile=None, **kwargs): + r""" + Block average (x,y,z) data tables by mean estimation. + + Reads arbitrarily located (x,y,z) triples [or optionally weighted + quadruples (x,y,z,w)] from a table and writes to the output a mean + position and value for every non-empty block in a grid region defined by + the ``region`` and ``spacing`` parameters. + + Full option list at :gmt-docs:`blockmean.html` + + {aliases} + + Parameters + ---------- + table : pandas.DataFrame or str + Either a pandas dataframe with (x, y, z) or (longitude, latitude, + elevation) values in the first three columns, or a file name to an + ASCII data table. + + spacing : str + *xinc*\[\ *unit*\][**+e**\|\ **n**] + [/*yinc*\ [*unit*][**+e**\|\ **n**]]. + *xinc* [and optionally *yinc*] is the grid spacing. + + region : str or list + *xmin/xmax/ymin/ymax*\[\ **+r**\][**+u**\ *unit*]. + Specify the region of interest. + + outfile : str + Required if ``table`` is a file. The file name for the output ASCII + file. + + {V} + {a} + {f} + {r} + + Returns + ------- + output : pandas.DataFrame or None + Return type depends on whether the ``outfile`` parameter is set: + + - :class:`pandas.DataFrame` table with (x, y, z) columns if ``outfile`` + is not set + - None if ``outfile`` is set (filtered output will be stored in file + set by ``outfile``) + """ + return _blockm(block_method="blockmean", table=table, outfile=outfile, **kwargs) + + @fmt_docstring @use_alias( I="spacing", @@ -73,30 +189,4 @@ def blockmedian(table, outfile=None, **kwargs): - None if ``outfile`` is set (filtered output will be stored in file set by ``outfile``) """ - kind = data_kind(table) - with GMTTempFile(suffix=".csv") as tmpfile: - with Session() as lib: - if kind == "matrix": - if not hasattr(table, "values"): - raise GMTInvalidInput(f"Unrecognized data type: {type(table)}") - file_context = lib.virtualfile_from_matrix(table.values) - elif kind == "file": - if outfile is None: - raise GMTInvalidInput("Please pass in a str to 'outfile'") - file_context = dummy_context(table) - else: - raise GMTInvalidInput(f"Unrecognized data type: {type(table)}") - - with file_context as infile: - if outfile is None: - outfile = tmpfile.name - arg_str = " ".join([infile, build_arg_string(kwargs), "->" + outfile]) - lib.call_module(module="blockmedian", args=arg_str) - - # Read temporary csv output to a pandas table - if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame - result = pd.read_csv(tmpfile.name, sep="\t", names=table.columns) - elif outfile != tmpfile.name: # return None if outfile set, output in outfile - result = None - - return result + return _blockm(block_method="blockmedian", table=table, outfile=outfile, **kwargs) diff --git a/pygmt/tests/test_blockmean.py b/pygmt/tests/test_blockmean.py new file mode 100644 index 00000000000..76adb922f6b --- /dev/null +++ b/pygmt/tests/test_blockmean.py @@ -0,0 +1,78 @@ +""" +Tests for blockmean. +""" +import os + +import numpy.testing as npt +import pandas as pd +import pytest +from pygmt import blockmean +from pygmt.datasets import load_sample_bathymetry +from pygmt.exceptions import GMTInvalidInput +from pygmt.helpers import GMTTempFile, data_kind + + +def test_blockmean_input_dataframe(): + """ + Run blockmean by passing in a pandas.DataFrame as input. + """ + dataframe = load_sample_bathymetry() + output = blockmean(table=dataframe, spacing="5m", region=[245, 255, 20, 30]) + assert isinstance(output, pd.DataFrame) + assert all(dataframe.columns == output.columns) + assert output.shape == (5849, 3) + npt.assert_allclose(output.iloc[0], [245.888877, 29.978707, -384.0]) + + return output + + +def test_blockmean_wrong_kind_of_input_table_matrix(): + """ + Run blockmean using table input that is not a pandas.DataFrame but still a + matrix. + """ + dataframe = load_sample_bathymetry() + invalid_table = dataframe.values + assert data_kind(invalid_table) == "matrix" + with pytest.raises(GMTInvalidInput): + blockmean(table=invalid_table, spacing="5m", region=[245, 255, 20, 30]) + + +def test_blockmean_wrong_kind_of_input_table_grid(): + """ + Run blockmean using table input that is not a pandas.DataFrame or file but + a grid. + """ + dataframe = load_sample_bathymetry() + invalid_table = dataframe.bathymetry.to_xarray() + assert data_kind(invalid_table) == "grid" + with pytest.raises(GMTInvalidInput): + blockmean(table=invalid_table, spacing="5m", region=[245, 255, 20, 30]) + + +def test_blockmean_input_filename(): + """ + Run blockmean by passing in an ASCII text file as input. + """ + with GMTTempFile() as tmpfile: + output = blockmean( + table="@tut_ship.xyz", + spacing="5m", + region=[245, 255, 20, 30], + outfile=tmpfile.name, + ) + assert output is None # check that output is None since outfile is set + assert os.path.exists(path=tmpfile.name) # check that outfile exists at path + output = pd.read_csv(tmpfile.name, sep="\t", header=None) + assert output.shape == (5849, 3) + npt.assert_allclose(output.iloc[0], [245.888877, 29.978707, -384.0]) + + return output + + +def test_blockmean_without_outfile_setting(): + """ + Run blockmean by not passing in outfile parameter setting. + """ + with pytest.raises(GMTInvalidInput): + blockmean(table="@tut_ship.xyz", spacing="5m", region=[245, 255, 20, 30])