diff --git a/doc/api/index.rst b/doc/api/index.rst index 80cb112d015..651d2421f15 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -60,6 +60,7 @@ Operations on tabular data: .. autosummary:: :toctree: generated + blockmedian info surface diff --git a/pygmt/__init__.py b/pygmt/__init__.py index b886a267acf..07352ae40e2 100644 --- a/pygmt/__init__.py +++ b/pygmt/__init__.py @@ -14,6 +14,7 @@ # Import modules to make the high-level GMT Python API from .session_management import begin as _begin, end as _end from .figure import Figure +from .filtering import blockmedian from .gridding import surface from .mathops import makecpt from .modules import info, grdinfo, which diff --git a/pygmt/filtering.py b/pygmt/filtering.py new file mode 100644 index 00000000000..028637d57e4 --- /dev/null +++ b/pygmt/filtering.py @@ -0,0 +1,83 @@ +""" +GMT modules for Filtering of 1-D and 2-D Data +""" +import pandas as pd + +from .clib import Session +from .exceptions import GMTInvalidInput +from .helpers import ( + build_arg_string, + data_kind, + dummy_context, + fmt_docstring, + GMTTempFile, + kwargs_to_strings, + use_alias, +) + + +@fmt_docstring +@use_alias(I="spacing", R="region") +@kwargs_to_strings(R="sequence") +def blockmedian(table, outfile=None, **kwargs): + """ + Block average (x,y,z) data tables by L1 norm. + + Reads arbitrarily located (x,y,z) triples [or optionally weighted quadruples + (x,y,z,w)] from a table and writes to the output a median position and value for + every non-empty block in a grid region defined by the region and spacing arguments. + + Full option list at :gmt-docs:`blockmedian.html` + + Parameters + ---------- + table : pandas.DataFrame or str + Either a pandas dataframe with (x, y, z) or (longitude, latitude, elevation) + values in the first three columns, or a file name to an ASCII data table. + + spacing (I) : str + ``'xinc[unit][+e|n][/yinc[unit][+e|n]]'``. + x_inc [and optionally y_inc] is the grid spacing. + + region (R) : str or list + ``'xmin/xmax/ymin/ymax[+r][+uunit]'``. + Specify the region of interest. + + outfile: str + Required if 'table' is a file. The file name for the output ASCII file. + + Returns + ------- + output: pandas.DataFrame or None + Return type depends on whether the outfile parameter is set: + + - pandas.DataFrame table with (x, y, z) columns if outfile is not set + - None if outfile is set (filtered output will be stored in outfile) + """ + kind = data_kind(table) + with GMTTempFile(suffix=".csv") as tmpfile: + with Session() as lib: + if kind == "matrix": + if not hasattr(table, "values"): + raise GMTInvalidInput(f"Unrecognized data type: {type(table)}") + file_context = lib.virtualfile_from_matrix(table.values) + elif kind == "file": + if outfile is None: + raise GMTInvalidInput("Please pass in a str to 'outfile'") + file_context = dummy_context(table) + else: + raise GMTInvalidInput(f"Unrecognized data type: {type(table)}") + + with file_context as infile: + if outfile is None: + outfile = tmpfile.name + arg_str = " ".join([infile, build_arg_string(kwargs), "->" + outfile]) + lib.call_module(module="blockmedian", args=arg_str) + + # Read temporary csv output to a pandas table + if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame + result = pd.read_csv(tmpfile.name, sep="\t", names=table.columns) + elif outfile != tmpfile.name: # return None if outfile set, output in outfile + result = None + + return result diff --git a/pygmt/tests/test_blockmedian.py b/pygmt/tests/test_blockmedian.py new file mode 100644 index 00000000000..c2f438483f8 --- /dev/null +++ b/pygmt/tests/test_blockmedian.py @@ -0,0 +1,77 @@ +""" +Tests for blockmedian +""" +import os + +import numpy.testing as npt +import pandas as pd +import pytest + +from .. import blockmedian +from ..datasets import load_sample_bathymetry +from ..exceptions import GMTInvalidInput +from ..helpers import data_kind, GMTTempFile + + +def test_blockmedian_input_dataframe(): + """ + Run blockmedian by passing in a pandas.DataFrame as input + """ + dataframe = load_sample_bathymetry() + output = blockmedian(table=dataframe, spacing="5m", region=[245, 255, 20, 30]) + assert isinstance(output, pd.DataFrame) + assert all(dataframe.columns == output.columns) + assert output.shape == (5849, 3) + npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0]) + + return output + + +def test_blockmedian_wrong_kind_of_input_table_matrix(): + """ + Run blockmedian using table input that is not a pandas.DataFrame but still a matrix + """ + dataframe = load_sample_bathymetry() + invalid_table = dataframe.values + assert data_kind(invalid_table) == "matrix" + with pytest.raises(GMTInvalidInput): + blockmedian(table=invalid_table, spacing="5m", region=[245, 255, 20, 30]) + + +def test_blockmedian_wrong_kind_of_input_table_grid(): + """ + Run blockmedian using table input that is not a pandas.DataFrame or file but a grid + """ + dataframe = load_sample_bathymetry() + invalid_table = dataframe.bathymetry.to_xarray() + assert data_kind(invalid_table) == "grid" + with pytest.raises(GMTInvalidInput): + blockmedian(table=invalid_table, spacing="5m", region=[245, 255, 20, 30]) + + +def test_blockmedian_input_filename(): + """ + Run blockmedian by passing in an ASCII text file as input + """ + with GMTTempFile() as tmpfile: + output = blockmedian( + table="@tut_ship.xyz", + spacing="5m", + region=[245, 255, 20, 30], + outfile=tmpfile.name, + ) + assert output is None # check that output is None since outfile is set + assert os.path.exists(path=tmpfile.name) # check that outfile exists at path + output = pd.read_csv(tmpfile.name, sep="\t", header=None) + assert output.shape == (5849, 3) + npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0]) + + return output + + +def test_blockmedian_without_outfile_setting(): + """ + Run blockmedian by not passing in outfile parameter setting + """ + with pytest.raises(GMTInvalidInput): + blockmedian(table="@tut_ship.xyz", spacing="5m", region=[245, 255, 20, 30])