Skip to content

Commit

Permalink
Merge branch 'main' into pass-arg-list-to-module
Browse files Browse the repository at this point in the history
  • Loading branch information
seisman committed Apr 7, 2024
2 parents 72e684b + a32049d commit faa4a4c
Show file tree
Hide file tree
Showing 13 changed files with 380 additions and 149 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:

# Install Miniconda with conda-forge dependencies
- name: Setup Miniconda
uses: conda-incubator/[email protected].1
uses: conda-incubator/[email protected].3
with:
auto-activate-base: true
activate-environment: "" # base environment
Expand Down Expand Up @@ -86,7 +86,7 @@ jobs:

# Run the benchmark tests
- name: Run benchmarks
uses: CodSpeedHQ/action@v2.2.1
uses: CodSpeedHQ/action@v2.3.0
with:
run: |
python -c "import pygmt; pygmt.show_versions()"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
- python-version: '3.10'
numpy-version: '1.23'
pandas-version: '=1.5'
xarray-version: '=2022.03'
xarray-version: '=2022.06'
optional-packages: ''
- python-version: '3.12'
numpy-version: '1.26'
Expand Down
2 changes: 1 addition & 1 deletion doc/minversions.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ after their initial release.

| PyGMT Version | GMT | Python | NumPy | Pandas | Xarray |
|---|---|---|---|---|---|
| [Dev][]* [[Docs][Docs Dev]] | >=6.3.0 | >=3.10 | >=1.23 | >=1.5 | >=2022.03 |
| [Dev][]* [[Docs][Docs Dev]] | >=6.3.0 | >=3.10 | >=1.23 | >=1.5 | >=2022.06 |
| [v0.11.0][] [[Docs][Docs v0.11.0]] | >=6.3.0 | >=3.9 | >=1.23 | | |
| [v0.10.0][] [[Docs][Docs v0.10.0]] | >=6.3.0 | >=3.9 | >=1.22 | | |
| [v0.9.0][] [[Docs][Docs v0.9.0]] | >=6.3.0 | >=3.8 | >=1.21 | | |
Expand Down
4 changes: 2 additions & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dependencies:
- ghostscript=10.03.0
- numpy>=1.23
- pandas>=1.5
- xarray>=2022.03
- xarray>=2022.06
- netCDF4
- packaging
# Optional dependencies
Expand All @@ -27,7 +27,7 @@ dependencies:
- codespell
- ruff>=0.3.0
# Dev dependencies (unit testing)
- matplotlib
- matplotlib-base
- pytest-cov
- pytest-doctestplus
- pytest-mpl
Expand Down
58 changes: 52 additions & 6 deletions pygmt/clib/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -1709,6 +1709,35 @@ def virtualfile_out(
with self.open_virtualfile(family, geometry, "GMT_OUT", None) as vfile:
yield vfile

def inquire_virtualfile(self, vfname: str) -> int:
"""
Get the family of a virtual file.
Parameters
----------
vfname
Name of the virtual file to inquire.
Returns
-------
family
The integer value for the family of the virtual file.
Examples
--------
>>> from pygmt.clib import Session
>>> with Session() as lib:
... with lib.virtualfile_out(kind="dataset") as vfile:
... family = lib.inquire_virtualfile(vfile)
... assert family == lib["GMT_IS_DATASET"]
"""
c_inquire_virtualfile = self.get_libgmt_func(
"GMT_Inquire_VirtualFile",
argtypes=[ctp.c_void_p, ctp.c_char_p],
restype=ctp.c_uint,
)
return c_inquire_virtualfile(self.session_pointer, vfname.encode())

def read_virtualfile(
self, vfname: str, kind: Literal["dataset", "grid", None] = None
):
Expand Down Expand Up @@ -1775,7 +1804,7 @@ def read_virtualfile(
def virtualfile_to_dataset(
self,
vfname: str,
output_type: Literal["pandas", "numpy", "file"] = "pandas",
output_type: Literal["pandas", "numpy", "file", "strings"] = "pandas",
column_names: list[str] | None = None,
dtype: type | dict[str, type] | None = None,
index_col: str | int | None = None,
Expand All @@ -1796,6 +1825,7 @@ def virtualfile_to_dataset(
- ``"pandas"`` will return a :class:`pandas.DataFrame` object.
- ``"numpy"`` will return a :class:`numpy.ndarray` object.
- ``"file"`` means the result was saved to a file and will return ``None``.
- ``"strings"`` will return the trailing text only as an array of strings.
column_names
The column names for the :class:`pandas.DataFrame` output.
dtype
Expand Down Expand Up @@ -1841,6 +1871,16 @@ def virtualfile_to_dataset(
... assert result is None
... assert Path(outtmp.name).stat().st_size > 0
...
... # strings output
... with Session() as lib:
... with lib.virtualfile_out(kind="dataset") as vouttbl:
... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
... outstr = lib.virtualfile_to_dataset(
... vfname=vouttbl, output_type="strings"
... )
... assert isinstance(outstr, np.ndarray)
... assert outstr.dtype.kind in ("S", "U")
...
... # numpy output
... with Session() as lib:
... with lib.virtualfile_out(kind="dataset") as vouttbl:
Expand Down Expand Up @@ -1869,6 +1909,9 @@ def virtualfile_to_dataset(
... column_names=["col1", "col2", "col3", "coltext"],
... )
... assert isinstance(outpd2, pd.DataFrame)
>>> outstr
array(['TEXT1 TEXT23', 'TEXT4 TEXT567', 'TEXT8 TEXT90',
'TEXT123 TEXT456789'], dtype='<U18')
>>> outnp
array([[1.0, 2.0, 3.0, 'TEXT1 TEXT23'],
[4.0, 5.0, 6.0, 'TEXT4 TEXT567'],
Expand All @@ -1890,11 +1933,14 @@ def virtualfile_to_dataset(
if output_type == "file": # Already written to file, so return None
return None

# Read the virtual file as a GMT dataset and convert to pandas.DataFrame
result = self.read_virtualfile(vfname, kind="dataset").contents.to_dataframe(
column_names=column_names,
dtype=dtype,
index_col=index_col,
# Read the virtual file as a _GMT_DATASET object
result = self.read_virtualfile(vfname, kind="dataset").contents

if output_type == "strings": # strings output
return result.to_strings()

result = result.to_dataframe(
column_names=column_names, dtype=dtype, index_col=index_col
)
if output_type == "numpy": # numpy.ndarray output
return result.to_numpy()
Expand Down
44 changes: 25 additions & 19 deletions pygmt/datatypes/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,17 @@ class _GMT_DATASEGMENT(ctp.Structure): # noqa: N801
("hidden", ctp.c_void_p),
]

def to_strings(self) -> np.ndarray[Any, np.dtype[np.str_]]:
"""
Convert the trailing text column to an array of strings.
"""
textvector = []
for table in self.table[: self.n_tables]:
for segment in table.contents.segment[: table.contents.n_segments]:
if segment.contents.text:
textvector.extend(segment.contents.text[: segment.contents.n_rows])
return np.char.decode(textvector) if textvector else np.array([], dtype=str)

def to_dataframe(
self,
column_names: pd.Index | None = None,
Expand Down Expand Up @@ -194,7 +205,11 @@ def to_dataframe(
... with lib.virtualfile_out(kind="dataset") as vouttbl:
... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
... ds = lib.read_virtualfile(vouttbl, kind="dataset")
... text = ds.contents.to_strings()
... df = ds.contents.to_dataframe()
>>> text
array(['TEXT1 TEXT23', 'TEXT4 TEXT567', 'TEXT8 TEXT90',
'TEXT123 TEXT456789'], dtype='<U18')
>>> df
0 1 2 3
0 1.0 2.0 3.0 TEXT1 TEXT23
Expand All @@ -207,28 +222,19 @@ def to_dataframe(
vectors = []
# Deal with numeric columns
for icol in range(self.n_columns):
colvector = []
for itbl in range(self.n_tables):
dtbl = self.table[itbl].contents
for iseg in range(dtbl.n_segments):
dseg = dtbl.segment[iseg].contents
colvector.append(
np.ctypeslib.as_array(dseg.data[icol], shape=(dseg.n_rows,))
)
colvector = [
np.ctypeslib.as_array(
seg.contents.data[icol], shape=(seg.contents.n_rows,)
)
for tbl in self.table[: self.n_tables]
for seg in tbl.contents.segment[: tbl.contents.n_segments]
]
vectors.append(pd.Series(data=np.concatenate(colvector)))

# Deal with trailing text column
textvector = []
for itbl in range(self.n_tables):
dtbl = self.table[itbl].contents
for iseg in range(dtbl.n_segments):
dseg = dtbl.segment[iseg].contents
if dseg.text:
textvector.extend(dseg.text[: dseg.n_rows])
if textvector:
vectors.append(
pd.Series(data=np.char.decode(textvector), dtype=pd.StringDtype())
)
textvector = self.to_strings()
if len(textvector) != 0:
vectors.append(pd.Series(data=textvector, dtype=pd.StringDtype()))

if len(vectors) == 0:
# Return an empty DataFrame if no columns are found.
Expand Down
98 changes: 1 addition & 97 deletions pygmt/datatypes/grid.py
Original file line number Diff line number Diff line change
@@ -1,104 +1,8 @@
"""
Wrapper for the GMT_GRID data type and the GMT_GRID_HEADER data structure.
Wrapper for the GMT_GRID data type.
"""

import ctypes as ctp
from typing import ClassVar

# Constants for lengths of grid header variables.
#
# Note: Ideally we should be able to get these constants from the GMT shared library
# using the ``lib["GMT_GRID_UNIT_LEN80"]`` syntax, but it causes cyclic import error.
# So we have to hardcode the values here.
GMT_GRID_UNIT_LEN80 = 80
GMT_GRID_TITLE_LEN80 = 80
GMT_GRID_COMMAND_LEN320 = 320
GMT_GRID_REMARK_LEN160 = 160

# GMT uses single-precision for grids by default, but can be built to use
# double-precision. Currently, only single-precision is supported.
gmt_grdfloat = ctp.c_float


class _GMT_GRID_HEADER(ctp.Structure): # noqa: N801
"""
GMT grid header structure for metadata about the grid.
The class is used in the `GMT_GRID`/`GMT_IMAGE`/`GMT_CUBE` data structure. See the
GMT source code gmt_resources.h for the original C structure definitions.
"""

_fields_: ClassVar = [
# Number of columns
("n_columns", ctp.c_uint32),
# Number of rows
("n_rows", ctp.c_uint32),
# Grid registration, 0 for gridline and 1 for pixel
("registration", ctp.c_uint32),
# Minimum/maximum x and y coordinates
("wesn", ctp.c_double * 4),
# Minimum z value
("z_min", ctp.c_double),
# Maximum z value
("z_max", ctp.c_double),
# x and y increments
("inc", ctp.c_double * 2),
# Grid values must be multiplied by this factor
("z_scale_factor", ctp.c_double),
# After scaling, add this offset
("z_add_offset", ctp.c_double),
# Units in x-directions, in the form "long_name [units]"
("x_units", ctp.c_char * GMT_GRID_UNIT_LEN80),
# Units in y-direction, in the form "long_name [units]"
("y_units", ctp.c_char * GMT_GRID_UNIT_LEN80),
# Grid value units, in the form "long_name [units]"
("z_units", ctp.c_char * GMT_GRID_UNIT_LEN80),
# Name of data set
("title", ctp.c_char * GMT_GRID_TITLE_LEN80),
# Name of generating command
("command", ctp.c_char * GMT_GRID_COMMAND_LEN320),
# Comments for this data set
("remark", ctp.c_char * GMT_GRID_REMARK_LEN160),
# Below are items used internally by GMT
# Number of data points (n_columns * n_rows) [paddings are excluded]
("nm", ctp.c_size_t),
# Actual number of items (not bytes) required to hold this grid (mx * my),
# per band (for images)
("size", ctp.c_size_t),
# Bits per data value (e.g., 32 for ints/floats; 8 for bytes).
# Only used for ERSI ArcInfo ASCII Exchange grids.
("bits", ctp.c_uint),
# For complex grid.
# 0 for normal
# GMT_GRID_IS_COMPLEX_REAL = real part of complex grid
# GMT_GRID_IS_COMPLEX_IMAG = imag part of complex grid
("complex_mode", ctp.c_uint),
# Grid format
("type", ctp.c_uint),
# Number of bands [1]. Used with GMT_IMAGE containers
("n_bands", ctp.c_uint),
# Actual x-dimension in memory. mx = n_columns + pad[0] + pad[1]
("mx", ctp.c_uint),
# Actual y-dimension in memory. my = n_rows + pad[2] + pad[3]
("my", ctp.c_uint),
# Paddings on west, east, south, north sides [2,2,2,2]
("pad", ctp.c_uint * 4),
# Three or four char codes T|B R|C S|R|S (grd) or B|L|P + A|a (img)
# describing array layout in mem and interleaving
("mem_layout", ctp.c_char * 4),
# Missing value as stored in grid file
("nan_value", gmt_grdfloat),
# 0.0 for gridline grids and 0.5 for pixel grids
("xy_off", ctp.c_double),
# Referencing system string in PROJ.4 format
("ProjRefPROJ4", ctp.c_char_p),
# Referencing system string in WKT format
("ProjRefWKT", ctp.c_char_p),
# Referencing system EPSG code
("ProjRefEPSG", ctp.c_int),
# Lower-level information for GMT use only
("hidden", ctp.c_void_p),
]


class _GMT_GRID(ctp.Structure): # noqa: N801
Expand Down
Loading

0 comments on commit faa4a4c

Please sign in to comment.