Skip to content

Commit

Permalink
Wrap blockmedian
Browse files Browse the repository at this point in the history
Initial commit for wrapping the blockmedian function for #348, to be implemented under filtering.py. Original GMT `blockmedian` documentation can be found at https://docs.generic-mapping-tools.org/latest/blockmedian.html.

Storing sample test cases under test_blockmedian.py. Current implementation only allows for pandas.DataFrame or ASCII file name inputs, and correspondingly outputs the same type. Tests currently use the load_sample_bathymetry dataset, and we check for a variety of cases that the blockmedian filtered output is valid. Also aliased required arguments spacing (I) and region (R).
  • Loading branch information
weiji14 committed Nov 7, 2019
1 parent 59dbcbe commit 1ee3f98
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 0 deletions.
1 change: 1 addition & 0 deletions doc/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ Operations on tabular data:
.. autosummary::
:toctree: generated

blockmedian
info
surface

Expand Down
1 change: 1 addition & 0 deletions pygmt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# Import modules to make the high-level GMT Python API
from .session_management import begin as _begin, end as _end
from .figure import Figure
from .filtering import blockmedian
from .gridding import surface
from .mathops import makecpt
from .modules import info, grdinfo, which
Expand Down
83 changes: 83 additions & 0 deletions pygmt/filtering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""
GMT modules for Filtering of 1-D and 2-D Data
"""
import pandas as pd

from .clib import Session
from .exceptions import GMTInvalidInput
from .helpers import (
build_arg_string,
data_kind,
dummy_context,
fmt_docstring,
GMTTempFile,
kwargs_to_strings,
use_alias,
)


@fmt_docstring
@use_alias(I="spacing", R="region")
@kwargs_to_strings(R="sequence")
def blockmedian(table, outfile=None, **kwargs):
"""
Block average (x,y,z) data tables by L1 norm.
Reads arbitrarily located (x,y,z) triples [or optionally weighted quadruples
(x,y,z,w)] from a table and writes to the output a median position and value for
every non-empty block in a grid region defined by the region and spacing arguments.
Full option list at :gmt-docs:`blockmedian.html`
Parameters
----------
table : pandas.DataFrame or str
Either a pandas dataframe with (x, y, z) or (longitude, latitude, elevation)
values in the first three columns, or a file name to an ASCII data table.
spacing (I) : str
``'xinc[unit][+e|n][/yinc[unit][+e|n]]'``.
x_inc [and optionally y_inc] is the grid spacing.
region (R) : str or list
``'xmin/xmax/ymin/ymax[+r][+uunit]'``.
Specify the region of interest.
outfile: str
Required if 'table' is a file. The file name for the output ASCII file.
Returns
-------
output: pandas.DataFrame or None
Return type depends on whether the outfile parameter is set:
- pandas.DataFrame table with (x, y, z) columns if outfile is not set
- None if outfile is set (filtered output will be stored in outfile)
"""
kind = data_kind(table)
with GMTTempFile(suffix=".csv") as tmpfile:
with Session() as lib:
if kind == "matrix":
if not hasattr(table, "values"):
raise GMTInvalidInput(f"Unrecognized data type: {type(table)}")
file_context = lib.virtualfile_from_matrix(table.values)
elif kind == "file":
if outfile is None:
raise GMTInvalidInput("Please pass in a str to 'outfile'")
file_context = dummy_context(table)
else:
raise GMTInvalidInput(f"Unrecognized data type: {type(table)}")

with file_context as infile:
if outfile is None:
outfile = tmpfile.name
arg_str = " ".join([infile, build_arg_string(kwargs), "->" + outfile])
lib.call_module(module="blockmedian", args=arg_str)

# Read temporary csv output to a pandas table
if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame
result = pd.read_csv(tmpfile.name, sep="\t", names=table.columns)
elif outfile != tmpfile.name: # return None if outfile set, output in outfile
result = None

return result
77 changes: 77 additions & 0 deletions pygmt/tests/test_blockmedian.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""
Tests for blockmedian
"""
import os

import numpy.testing as npt
import pandas as pd
import pytest

from .. import blockmedian
from ..datasets import load_sample_bathymetry
from ..exceptions import GMTInvalidInput
from ..helpers import data_kind, GMTTempFile


def test_blockmedian_input_dataframe():
"""
Run blockmedian by passing in a pandas.DataFrame as input
"""
dataframe = load_sample_bathymetry()
output = blockmedian(table=dataframe, spacing="5m", region=[245, 255, 20, 30])
assert isinstance(output, pd.DataFrame)
assert all(dataframe.columns == output.columns)
assert output.shape == (5849, 3)
npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0])

return output


def test_blockmedian_wrong_kind_of_input_table_matrix():
"""
Run blockmedian using table input that is not a pandas.DataFrame but still a matrix
"""
dataframe = load_sample_bathymetry()
invalid_table = dataframe.values
assert data_kind(invalid_table) == "matrix"
with pytest.raises(GMTInvalidInput):
blockmedian(table=invalid_table, spacing="5m", region=[245, 255, 20, 30])


def test_blockmedian_wrong_kind_of_input_table_grid():
"""
Run blockmedian using table input that is not a pandas.DataFrame or file but a grid
"""
dataframe = load_sample_bathymetry()
invalid_table = dataframe.bathymetry.to_xarray()
assert data_kind(invalid_table) == "grid"
with pytest.raises(GMTInvalidInput):
blockmedian(table=invalid_table, spacing="5m", region=[245, 255, 20, 30])


def test_blockmedian_input_filename():
"""
Run blockmedian by passing in an ASCII text file as input
"""
with GMTTempFile() as tmpfile:
output = blockmedian(
table="@tut_ship.xyz",
spacing="5m",
region=[245, 255, 20, 30],
outfile=tmpfile.name,
)
assert output is None # check that output is None since outfile is set
assert os.path.exists(path=tmpfile.name) # check that outfile exists at path
output = pd.read_csv(tmpfile.name, sep="\t", header=None)
assert output.shape == (5849, 3)
npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0])

return output


def test_blockmedian_without_outfile_setting():
"""
Run blockmedian by not passing in outfile parameter setting
"""
with pytest.raises(GMTInvalidInput):
blockmedian(table="@tut_ship.xyz", spacing="5m", region=[245, 255, 20, 30])

0 comments on commit 1ee3f98

Please sign in to comment.