From 5a56ba8ad2c314df7a00b597a3684b1710fc0318 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 8 Jul 2024 13:30:14 +0800 Subject: [PATCH 01/21] Wrap the GMT API function GMT_Read_Data to read data into GMT data containers --- pygmt/clib/conversion.py | 4 ++- pygmt/clib/session.py | 72 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index 95f2f08aa51..a296601e5a1 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -247,7 +247,9 @@ def as_c_contiguous(array): return array -def sequence_to_ctypes_array(sequence: Sequence, ctype, size: int) -> ctp.Array | None: +def sequence_to_ctypes_array( + sequence: Sequence | None, ctype, size: int +) -> ctp.Array | None: """ Convert a sequence of numbers into a ctypes array variable. diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 7abe9b77e91..4c81d0b70d6 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -10,6 +10,7 @@ import pathlib import sys import warnings +from collections.abc import Sequence from typing import Literal import numpy as np @@ -1068,6 +1069,77 @@ def put_matrix(self, dataset, matrix, pad=0): if status != 0: raise GMTCLibError(f"Failed to put matrix of type {matrix.dtype}.") + def read_data( + self, + family: str, + geometry: str, + mode: str, + wesn: Sequence[float] | None, + infile: str, + data=None, + ): + """ + Read a data file into a GMT data container. + + Wraps ``GMT_Read_Data`` but only allows reading from a file, so the ``method`` + ``method`` argument is omitted. + + Parameters + ---------- + family + A valid GMT data family name (e.g., ``"GMT_IS_DATASET"``). See the + ``FAMILIES`` attribute for valid names. + geometry + A valid GMT data geometry name (e.g., ``"GMT_IS_POINT"``). See the + ``GEOMETRIES`` attribute for valid names. + mode + How the data is to be read from the file. This option varies depending on + the given family. See the GMT API documentation for details. + wesn + Subregion of the data, in the form of [xmin, xmax, ymin, ymax, zmin, zmax]. + If ``None``, the whole data is read. + input + The input file name. + data + ``None`` or the pointer returned by this function after a first call. It's + useful when reading grids in two steps (get a grid structure with a header, + then read the data). + + Returns + ------- + Pointer to the data container, or ``None`` if there were errors. + """ + c_read_data = self.get_libgmt_func( + "GMT_Read_Data", + argtypes=[ + ctp.c_void_p, + ctp.c_uint, + ctp.c_uint, + ctp.c_uint, + ctp.c_uint, + ctp.POINTER(ctp.c_double), + ctp.c_char_p, + ctp.c_void_p, + ], + restype=ctp.c_void_p, + ) + + family_int = self._parse_constant(family, valid=FAMILIES, valid_modifiers=VIAS) + geometry_int = self._parse_constant(geometry, valid=GEOMETRIES) + method = self["GMT_IS_FILE"] # Reading from a file. + + data_ptr = c_read_data( + self.session_pointer, + family_int, + method, + geometry_int, + self[mode], + sequence_to_ctypes_array(wesn, ctp.c_double, 6), + infile.encode(), + data, + ) + return data_ptr + def write_data(self, family, geometry, mode, wesn, output, data): """ Write a GMT data container to a file. From 7ccf641299b4900457257741bf2571ebe4d387ca Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 8 Jul 2024 21:56:43 +0800 Subject: [PATCH 02/21] Add tests for reading datasets and grids --- pygmt/tests/test_clib_read_data.py | 145 +++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 pygmt/tests/test_clib_read_data.py diff --git a/pygmt/tests/test_clib_read_data.py b/pygmt/tests/test_clib_read_data.py new file mode 100644 index 00000000000..934e22bf536 --- /dev/null +++ b/pygmt/tests/test_clib_read_data.py @@ -0,0 +1,145 @@ +""" +Test the Session.read_data method. +""" + +import ctypes as ctp +from pathlib import Path + +import numpy as np +from pygmt.clib import Session +from pygmt.datatypes import _GMT_DATASET, _GMT_GRID +from pygmt.helpers import GMTTempFile + + +def test_clib_read_data_dataset(): + """ + Test the Session.read_data method for datasets. + + The test is adapted from the doctest in the _GMT_DATASET class. + """ + with GMTTempFile(suffix=".txt") as tmpfile: + # Prepare the sample data file + with Path(tmpfile.name).open(mode="w", encoding="utf-8") as fp: + print("# x y z name", file=fp) + print(">", file=fp) + print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp) + print("4.0 5.0 6.0 TEXT4 TEXT567", file=fp) + print(">", file=fp) + print("7.0 8.0 9.0 TEXT8 TEXT90", file=fp) + print("10.0 11.0 12.0 TEXT123 TEXT456789", file=fp) + + with Session() as lib: + data_ptr = lib.read_data( + "GMT_IS_DATASET", + "GMT_IS_PLP", + "GMT_READ_NORMAL", + None, + tmpfile.name, + None, + ) + ds = ctp.cast(data_ptr, ctp.POINTER(_GMT_DATASET)).contents + + assert ds.n_tables == 1 + assert ds.n_segments == 2 + assert ds.n_columns == 3 + + tbl = ds.table[0].contents + assert tbl.min[: tbl.n_columns] == [1.0, 2.0, 3.0] + assert tbl.max[: tbl.n_columns] == [10.0, 11.0, 12.0] + + +def test_clib_read_data_grid(): + """ + Test the Session.read_data method for grids. + + The test is adapted from the doctest in the _GMT_GRID class. + """ + with Session() as lib: + data_ptr = lib.read_data( + "GMT_IS_GRID", + "GMT_IS_SURFACE", + "GMT_CONTAINER_AND_DATA", + None, + "@static_earth_relief.nc", + None, + ) + grid = ctp.cast(data_ptr, ctp.POINTER(_GMT_GRID)).contents + header = grid.header.contents + assert header.n_rows == 14 + assert header.n_columns == 8 + assert header.n_bands == 1 + assert header.wesn[:] == [-55.0, -47.0, -24.0, -10.0] + assert header.z_min == 190.0 + assert header.z_max == 981.0 + + assert grid.data # The data is read + data = np.reshape(grid.data[: header.mx * header.my], (header.my, header.mx)) + data = data[ + header.pad[2] : header.my - header.pad[3], + header.pad[0] : header.mx - header.pad[1], + ] + assert data[3][4] == 250.0 + + +def test_clib_read_data_grid_two_steps(): + """ + Test the Session.read_data method for grids in two steps, first reading the header + and then the data. + + The test is adapted from the doctest in the _GMT_GRID class. + """ + family, geometry = "GMT_IS_GRID", "GMT_IS_SURFACE" + infile = "@static_earth_relief.nc" + with Session() as lib: + # Read the header first + data_ptr = lib.read_data( + family, geometry, "GMT_CONTAINER_ONLY", None, infile, None + ) + grid = ctp.cast(data_ptr, ctp.POINTER(_GMT_GRID)).contents + header = grid.header.contents + assert header.n_rows == 14 + assert header.n_columns == 8 + assert header.n_bands == 1 + assert header.wesn[:] == [-55.0, -47.0, -24.0, -10.0] + assert header.z_min == 190.0 + assert header.z_max == 981.0 + + assert not grid.data # The data is not read yet + + # Read the data + data_ptr = lib.read_data( + family, geometry, "GMT_DATA_ONLY", None, infile, data_ptr + ) + grid = ctp.cast(data_ptr, ctp.POINTER(_GMT_GRID)).contents + + assert grid.data # The data is read + header = grid.header.contents + data = np.reshape(grid.data[: header.mx * header.my], (header.my, header.mx)) + data = data[ + header.pad[2] : header.my - header.pad[3], + header.pad[0] : header.mx - header.pad[1], + ] + assert data[3][4] == 250.0 + + +def test_clib_read_data_image_as_grid(): + """ + Test the Session.read_data method for images. + """ + with Session() as lib: + data_ptr = lib.read_data( + "GMT_IS_GRID", + "GMT_IS_SURFACE", + "GMT_CONTAINER_AND_DATA", + None, + "@earth_day_01d_p", + None, + ) + image = ctp.cast(data_ptr, ctp.POINTER(_GMT_GRID)).contents + header = image.header.contents + assert header.n_rows == 180 + assert header.n_columns == 360 + assert header.n_bands == 1 + assert header.wesn[:] == [-180.0, 180.0, -90.0, 90.0] + + assert image.data From 024728d4680833d2a430938009da2631e7312dbc Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 10 Jul 2024 15:05:51 +0800 Subject: [PATCH 03/21] Add the new API function to docs --- doc/api/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/api/index.rst b/doc/api/index.rst index 5cf28bb3ebb..cff460ce2ff 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -309,6 +309,7 @@ Low level access (these are mostly used by the :mod:`pygmt.clib` package): clib.Session.put_matrix clib.Session.put_strings clib.Session.put_vector + clib.Session.read_data clib.Session.write_data clib.Session.open_virtualfile clib.Session.read_virtualfile From afd8dc729ffa1fdb5ecc19a1d5c8c393a97cbb52 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 10 Jul 2024 15:33:41 +0800 Subject: [PATCH 04/21] Refactor to make the function definition more Pythonic --- pygmt/clib/session.py | 55 ++++++++++++++++-------------- pygmt/tests/test_clib_read_data.py | 30 ++++------------ 2 files changed, 35 insertions(+), 50 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 4c81d0b70d6..aac3ad6256d 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1071,35 +1071,31 @@ def put_matrix(self, dataset, matrix, pad=0): def read_data( self, - family: str, - geometry: str, - mode: str, - wesn: Sequence[float] | None, infile: str, + kind: Literal["dataset", "grid"], + mode: str | None = None, + wesn: Sequence[float] | None = None, data=None, ): """ Read a data file into a GMT data container. - Wraps ``GMT_Read_Data`` but only allows reading from a file, so the ``method`` - ``method`` argument is omitted. + Wraps ``GMT_Read_Data`` but only allows reading from a file. The function + definition is different from the original C API function. Parameters ---------- - family - A valid GMT data family name (e.g., ``"GMT_IS_DATASET"``). See the - ``FAMILIES`` attribute for valid names. - geometry - A valid GMT data geometry name (e.g., ``"GMT_IS_POINT"``). See the - ``GEOMETRIES`` attribute for valid names. + input + The input file name. + kind + The data kind of the input file. Currently, valid values are ``"dataset"`` + and ``"grid"``. mode How the data is to be read from the file. This option varies depending on the given family. See the GMT API documentation for details. wesn Subregion of the data, in the form of [xmin, xmax, ymin, ymax, zmin, zmax]. If ``None``, the whole data is read. - input - The input file name. data ``None`` or the pointer returned by this function after a first call. It's useful when reading grids in two steps (get a grid structure with a header, @@ -1112,28 +1108,35 @@ def read_data( c_read_data = self.get_libgmt_func( "GMT_Read_Data", argtypes=[ - ctp.c_void_p, - ctp.c_uint, - ctp.c_uint, - ctp.c_uint, - ctp.c_uint, - ctp.POINTER(ctp.c_double), - ctp.c_char_p, - ctp.c_void_p, + ctp.c_void_p, # V_API + ctp.c_uint, # family + ctp.c_uint, # method + ctp.c_uint, # geometry + ctp.c_uint, # mode + ctp.POINTER(ctp.c_double), # wesn + ctp.c_char_p, # infile + ctp.c_void_p, # data ], - restype=ctp.c_void_p, + restype=ctp.c_void_p, # data_ptr ) + # Determine the family and geometry from kind + family, geometry = { + "dataset": ("GMT_IS_DATASET", "GMT_IS_PLP"), + "grid": ("GMT_IS_GRID", "GMT_IS_SURFACE"), + }[kind] + family_int = self._parse_constant(family, valid=FAMILIES, valid_modifiers=VIAS) geometry_int = self._parse_constant(geometry, valid=GEOMETRIES) - method = self["GMT_IS_FILE"] # Reading from a file. + method_int = self["GMT_IS_FILE"] # Reading from a file + mode_int = self["GMT_READ_NORMAL"] if mode is None else self[mode] data_ptr = c_read_data( self.session_pointer, family_int, - method, + method_int, geometry_int, - self[mode], + mode_int, sequence_to_ctypes_array(wesn, ctp.c_double, 6), infile.encode(), data, diff --git a/pygmt/tests/test_clib_read_data.py b/pygmt/tests/test_clib_read_data.py index 934e22bf536..66d221c1e6c 100644 --- a/pygmt/tests/test_clib_read_data.py +++ b/pygmt/tests/test_clib_read_data.py @@ -29,14 +29,7 @@ def test_clib_read_data_dataset(): print("10.0 11.0 12.0 TEXT123 TEXT456789", file=fp) with Session() as lib: - data_ptr = lib.read_data( - "GMT_IS_DATASET", - "GMT_IS_PLP", - "GMT_READ_NORMAL", - None, - tmpfile.name, - None, - ) + data_ptr = lib.read_data(tmpfile.name, kind="dataset") ds = ctp.cast(data_ptr, ctp.POINTER(_GMT_DATASET)).contents assert ds.n_tables == 1 @@ -56,12 +49,9 @@ def test_clib_read_data_grid(): """ with Session() as lib: data_ptr = lib.read_data( - "GMT_IS_GRID", - "GMT_IS_SURFACE", - "GMT_CONTAINER_AND_DATA", - None, "@static_earth_relief.nc", - None, + kind="grid", + mode="GMT_CONTAINER_AND_DATA", ) grid = ctp.cast(data_ptr, ctp.POINTER(_GMT_GRID)).contents header = grid.header.contents @@ -88,13 +78,10 @@ def test_clib_read_data_grid_two_steps(): The test is adapted from the doctest in the _GMT_GRID class. """ - family, geometry = "GMT_IS_GRID", "GMT_IS_SURFACE" infile = "@static_earth_relief.nc" with Session() as lib: # Read the header first - data_ptr = lib.read_data( - family, geometry, "GMT_CONTAINER_ONLY", None, infile, None - ) + data_ptr = lib.read_data(infile, kind="grid", mode="GMT_CONTAINER_ONLY") grid = ctp.cast(data_ptr, ctp.POINTER(_GMT_GRID)).contents header = grid.header.contents assert header.n_rows == 14 @@ -108,7 +95,7 @@ def test_clib_read_data_grid_two_steps(): # Read the data data_ptr = lib.read_data( - family, geometry, "GMT_DATA_ONLY", None, infile, data_ptr + infile, kind="grid", mode="GMT_DATA_ONLY", data=data_ptr ) grid = ctp.cast(data_ptr, ctp.POINTER(_GMT_GRID)).contents @@ -128,12 +115,7 @@ def test_clib_read_data_image_as_grid(): """ with Session() as lib: data_ptr = lib.read_data( - "GMT_IS_GRID", - "GMT_IS_SURFACE", - "GMT_CONTAINER_AND_DATA", - None, - "@earth_day_01d_p", - None, + "@earth_day_01d_p", kind="grid", mode="GMT_CONTAINER_AND_DATA" ) image = ctp.cast(data_ptr, ctp.POINTER(_GMT_GRID)).contents header = image.header.contents From 8dfb24ae8a0d342a5bfa1cd9aa21511fc6224de3 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 10 Jul 2024 15:39:40 +0800 Subject: [PATCH 05/21] Let the function return the pointer to the actual data containers, so need cast is needed outside the function --- pygmt/clib/session.py | 8 ++++---- pygmt/tests/test_clib_read_data.py | 12 +++++------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index aac3ad6256d..41ee0fbe75d 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1121,9 +1121,9 @@ def read_data( ) # Determine the family and geometry from kind - family, geometry = { - "dataset": ("GMT_IS_DATASET", "GMT_IS_PLP"), - "grid": ("GMT_IS_GRID", "GMT_IS_SURFACE"), + family, geometry, dtype = { + "dataset": ("GMT_IS_DATASET", "GMT_IS_PLP", _GMT_DATASET), + "grid": ("GMT_IS_GRID", "GMT_IS_SURFACE", _GMT_GRID), }[kind] family_int = self._parse_constant(family, valid=FAMILIES, valid_modifiers=VIAS) @@ -1141,7 +1141,7 @@ def read_data( infile.encode(), data, ) - return data_ptr + return ctp.cast(data_ptr, ctp.POINTER(dtype)) def write_data(self, family, geometry, mode, wesn, output, data): """ diff --git a/pygmt/tests/test_clib_read_data.py b/pygmt/tests/test_clib_read_data.py index 66d221c1e6c..f37d4088cea 100644 --- a/pygmt/tests/test_clib_read_data.py +++ b/pygmt/tests/test_clib_read_data.py @@ -2,12 +2,10 @@ Test the Session.read_data method. """ -import ctypes as ctp from pathlib import Path import numpy as np from pygmt.clib import Session -from pygmt.datatypes import _GMT_DATASET, _GMT_GRID from pygmt.helpers import GMTTempFile @@ -30,7 +28,7 @@ def test_clib_read_data_dataset(): with Session() as lib: data_ptr = lib.read_data(tmpfile.name, kind="dataset") - ds = ctp.cast(data_ptr, ctp.POINTER(_GMT_DATASET)).contents + ds = data_ptr.contents assert ds.n_tables == 1 assert ds.n_segments == 2 @@ -53,7 +51,7 @@ def test_clib_read_data_grid(): kind="grid", mode="GMT_CONTAINER_AND_DATA", ) - grid = ctp.cast(data_ptr, ctp.POINTER(_GMT_GRID)).contents + grid = data_ptr.contents header = grid.header.contents assert header.n_rows == 14 assert header.n_columns == 8 @@ -82,7 +80,7 @@ def test_clib_read_data_grid_two_steps(): with Session() as lib: # Read the header first data_ptr = lib.read_data(infile, kind="grid", mode="GMT_CONTAINER_ONLY") - grid = ctp.cast(data_ptr, ctp.POINTER(_GMT_GRID)).contents + grid = data_ptr.contents header = grid.header.contents assert header.n_rows == 14 assert header.n_columns == 8 @@ -97,7 +95,7 @@ def test_clib_read_data_grid_two_steps(): data_ptr = lib.read_data( infile, kind="grid", mode="GMT_DATA_ONLY", data=data_ptr ) - grid = ctp.cast(data_ptr, ctp.POINTER(_GMT_GRID)).contents + grid = data_ptr.contents assert grid.data # The data is read header = grid.header.contents @@ -117,7 +115,7 @@ def test_clib_read_data_image_as_grid(): data_ptr = lib.read_data( "@earth_day_01d_p", kind="grid", mode="GMT_CONTAINER_AND_DATA" ) - image = ctp.cast(data_ptr, ctp.POINTER(_GMT_GRID)).contents + image = data_ptr.contents header = image.header.contents assert header.n_rows == 180 assert header.n_columns == 360 From 76a511eb976de6ade721e68c74748822ede1a218 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 10 Jul 2024 15:59:51 +0800 Subject: [PATCH 06/21] No need to call _parse_constant --- pygmt/clib/session.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 41ee0fbe75d..7c266fb0c74 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1126,17 +1126,12 @@ def read_data( "grid": ("GMT_IS_GRID", "GMT_IS_SURFACE", _GMT_GRID), }[kind] - family_int = self._parse_constant(family, valid=FAMILIES, valid_modifiers=VIAS) - geometry_int = self._parse_constant(geometry, valid=GEOMETRIES) - method_int = self["GMT_IS_FILE"] # Reading from a file - mode_int = self["GMT_READ_NORMAL"] if mode is None else self[mode] - data_ptr = c_read_data( self.session_pointer, - family_int, - method_int, - geometry_int, - mode_int, + self[family], + self["GMT_IS_FILE"], # Reading from a file + self[geometry], + self["GMT_READ_NORMAL"] if mode is None else self[mode], sequence_to_ctypes_array(wesn, ctp.c_double, 6), infile.encode(), data, From 591172b1a33aea2954f1ab7341959b29a7b7a64f Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 10 Jul 2024 17:13:18 +0800 Subject: [PATCH 07/21] Simplify the tests by converting GMT data containers into dataframe or dataarray --- pygmt/tests/test_clib_read_data.py | 95 +++++++++++++++--------------- 1 file changed, 46 insertions(+), 49 deletions(-) diff --git a/pygmt/tests/test_clib_read_data.py b/pygmt/tests/test_clib_read_data.py index f37d4088cea..2150b901d66 100644 --- a/pygmt/tests/test_clib_read_data.py +++ b/pygmt/tests/test_clib_read_data.py @@ -4,9 +4,21 @@ from pathlib import Path -import numpy as np +import pandas as pd +import pytest +import xarray as xr from pygmt.clib import Session from pygmt.helpers import GMTTempFile +from pygmt.io import load_dataarray +from pygmt.src import which + + +@pytest.fixture(scope="module", name="expected_xrgrid") +def fixture_expected_xrgrid(): + """ + The expected xr.DataArray object for the static_earth_relief.nc file. + """ + return load_dataarray(which("@static_earth_relief.nc")) def test_clib_read_data_dataset(): @@ -27,49 +39,42 @@ def test_clib_read_data_dataset(): print("10.0 11.0 12.0 TEXT123 TEXT456789", file=fp) with Session() as lib: - data_ptr = lib.read_data(tmpfile.name, kind="dataset") - ds = data_ptr.contents - - assert ds.n_tables == 1 - assert ds.n_segments == 2 - assert ds.n_columns == 3 - - tbl = ds.table[0].contents - assert tbl.min[: tbl.n_columns] == [1.0, 2.0, 3.0] - assert tbl.max[: tbl.n_columns] == [10.0, 11.0, 12.0] - - -def test_clib_read_data_grid(): + ds = lib.read_data(tmpfile.name, kind="dataset").contents + df = ds.to_dataframe(header=0) + expected_df = pd.DataFrame( + data={ + "x": [1.0, 4.0, 7.0, 10.0], + "y": [2.0, 5.0, 8.0, 11.0], + "z": [3.0, 6.0, 9.0, 12.0], + "name": pd.Series( + [ + "TEXT1 TEXT23", + "TEXT4 TEXT567", + "TEXT8 TEXT90", + "TEXT123 TEXT456789", + ], + dtype=pd.StringDtype(), + ), + } + ) + pd.testing.assert_frame_equal(df, expected_df) + + +def test_clib_read_data_grid(expected_xrgrid): """ Test the Session.read_data method for grids. The test is adapted from the doctest in the _GMT_GRID class. """ with Session() as lib: - data_ptr = lib.read_data( - "@static_earth_relief.nc", - kind="grid", - mode="GMT_CONTAINER_AND_DATA", - ) - grid = data_ptr.contents - header = grid.header.contents - assert header.n_rows == 14 - assert header.n_columns == 8 - assert header.n_bands == 1 - assert header.wesn[:] == [-55.0, -47.0, -24.0, -10.0] - assert header.z_min == 190.0 - assert header.z_max == 981.0 + grid = lib.read_data("@static_earth_relief.nc", kind="grid").contents + xrgrid = grid.to_dataarray() + xr.testing.assert_equal(xrgrid, expected_xrgrid) + # Explicitely check n_bands + assert grid.header.contents.n_bands == 1 - assert grid.data # The data is read - data = np.reshape(grid.data[: header.mx * header.my], (header.my, header.mx)) - data = data[ - header.pad[2] : header.my - header.pad[3], - header.pad[0] : header.mx - header.pad[1], - ] - assert data[3][4] == 250.0 - -def test_clib_read_data_grid_two_steps(): +def test_clib_read_data_grid_two_steps(expected_xrgrid): """ Test the Session.read_data method for grids in two steps, first reading the header and then the data. @@ -92,19 +97,11 @@ def test_clib_read_data_grid_two_steps(): assert not grid.data # The data is not read yet # Read the data - data_ptr = lib.read_data( - infile, kind="grid", mode="GMT_DATA_ONLY", data=data_ptr - ) - grid = data_ptr.contents - - assert grid.data # The data is read - header = grid.header.contents - data = np.reshape(grid.data[: header.mx * header.my], (header.my, header.mx)) - data = data[ - header.pad[2] : header.my - header.pad[3], - header.pad[0] : header.mx - header.pad[1], - ] - assert data[3][4] == 250.0 + lib.read_data(infile, kind="grid", mode="GMT_DATA_ONLY", data=data_ptr) + xrgrid = data_ptr.contents.to_dataarray() + xr.testing.assert_equal(xrgrid, expected_xrgrid) + # Explicitely check n_bands + assert grid.header.contents.n_bands == 1 def test_clib_read_data_image_as_grid(): From 3d8ccb6b57bd90b6be3fbd19e3b3509dac890529 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 10 Jul 2024 17:58:18 +0800 Subject: [PATCH 08/21] Further simplify the tests --- pygmt/tests/test_clib_read_data.py | 40 +++++++++++++----------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/pygmt/tests/test_clib_read_data.py b/pygmt/tests/test_clib_read_data.py index 2150b901d66..b4e85b2e9c4 100644 --- a/pygmt/tests/test_clib_read_data.py +++ b/pygmt/tests/test_clib_read_data.py @@ -24,8 +24,6 @@ def fixture_expected_xrgrid(): def test_clib_read_data_dataset(): """ Test the Session.read_data method for datasets. - - The test is adapted from the doctest in the _GMT_DATASET class. """ with GMTTempFile(suffix=".txt") as tmpfile: # Prepare the sample data file @@ -63,23 +61,18 @@ def test_clib_read_data_dataset(): def test_clib_read_data_grid(expected_xrgrid): """ Test the Session.read_data method for grids. - - The test is adapted from the doctest in the _GMT_GRID class. """ with Session() as lib: grid = lib.read_data("@static_earth_relief.nc", kind="grid").contents xrgrid = grid.to_dataarray() xr.testing.assert_equal(xrgrid, expected_xrgrid) - # Explicitely check n_bands - assert grid.header.contents.n_bands == 1 + assert grid.header.contents.n_bands == 1 # Explicitely check n_bands def test_clib_read_data_grid_two_steps(expected_xrgrid): """ Test the Session.read_data method for grids in two steps, first reading the header and then the data. - - The test is adapted from the doctest in the _GMT_GRID class. """ infile = "@static_earth_relief.nc" with Session() as lib: @@ -89,34 +82,35 @@ def test_clib_read_data_grid_two_steps(expected_xrgrid): header = grid.header.contents assert header.n_rows == 14 assert header.n_columns == 8 - assert header.n_bands == 1 assert header.wesn[:] == [-55.0, -47.0, -24.0, -10.0] assert header.z_min == 190.0 assert header.z_max == 981.0 - + assert header.n_bands == 1 # Explicitely check n_bands assert not grid.data # The data is not read yet # Read the data lib.read_data(infile, kind="grid", mode="GMT_DATA_ONLY", data=data_ptr) xrgrid = data_ptr.contents.to_dataarray() xr.testing.assert_equal(xrgrid, expected_xrgrid) - # Explicitely check n_bands - assert grid.header.contents.n_bands == 1 -def test_clib_read_data_image_as_grid(): +def test_clib_read_data_grid_actual_image(): """ - Test the Session.read_data method for images. + Test the Session.read_data method for grid, but actually the file is an image. """ with Session() as lib: - data_ptr = lib.read_data( + image = lib.read_data( "@earth_day_01d_p", kind="grid", mode="GMT_CONTAINER_AND_DATA" + ).contents + # Explicitely check n_bands. + assert image.header.contents.n_bands == 1 # Only one band is read. + xrimage = image.to_dataarray() + + expected_xrimage = xr.open_dataarray(which("@earth_day_01d_p")) + assert expected_xrimage.band.size == 3 # 3-band image. + xr.testing.assert_equal( + xrimage, + expected_xrimage.isel(band=0) + .drop_vars(["band", "spatial_ref"]) + .sortby("y"), ) - image = data_ptr.contents - header = image.header.contents - assert header.n_rows == 180 - assert header.n_columns == 360 - assert header.n_bands == 1 - assert header.wesn[:] == [-180.0, 180.0, -90.0, 90.0] - - assert image.data From d1c6249832358bf091c374420dbe642af67ad923 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 10 Jul 2024 18:07:00 +0800 Subject: [PATCH 09/21] Simple checks if rioxarray is not installed --- pygmt/tests/test_clib_read_data.py | 39 ++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/pygmt/tests/test_clib_read_data.py b/pygmt/tests/test_clib_read_data.py index b4e85b2e9c4..5be5ae05859 100644 --- a/pygmt/tests/test_clib_read_data.py +++ b/pygmt/tests/test_clib_read_data.py @@ -12,6 +12,13 @@ from pygmt.io import load_dataarray from pygmt.src import which +try: + import rioxarray # noqa: F401 + + _HAS_RIOXARRAY = True +except ImportError: + _HAS_RIOXARRAY = False + @pytest.fixture(scope="module", name="expected_xrgrid") def fixture_expected_xrgrid(): @@ -99,18 +106,24 @@ def test_clib_read_data_grid_actual_image(): Test the Session.read_data method for grid, but actually the file is an image. """ with Session() as lib: - image = lib.read_data( + data_ptr = lib.read_data( "@earth_day_01d_p", kind="grid", mode="GMT_CONTAINER_AND_DATA" - ).contents - # Explicitely check n_bands. - assert image.header.contents.n_bands == 1 # Only one band is read. - xrimage = image.to_dataarray() - - expected_xrimage = xr.open_dataarray(which("@earth_day_01d_p")) - assert expected_xrimage.band.size == 3 # 3-band image. - xr.testing.assert_equal( - xrimage, - expected_xrimage.isel(band=0) - .drop_vars(["band", "spatial_ref"]) - .sortby("y"), ) + image = data_ptr.contents + header = image.header.contents + assert header.n_rows == 180 + assert header.n_columns == 360 + assert header.wesn[:] == [-180.0, 180.0, -90.0, 90.0] + # Explicitely check n_bands. Only one band is read for 3-band images. + assert header.n_bands == 1 + + if _HAS_RIOXARRAY: # Full check if rioxarray is installed. + xrimage = image.to_dataarray() + expected_xrimage = xr.open_dataarray(which("@earth_day_01d_p")) + assert expected_xrimage.band.size == 3 # 3-band image. + xr.testing.assert_equal( + xrimage, + expected_xrimage.isel(band=0) + .drop_vars(["band", "spatial_ref"]) + .sortby("y"), + ) From b4b034ca5bb807017265cf6db43dbd8dab5c7aea Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 11 Jul 2024 11:43:17 +0800 Subject: [PATCH 10/21] Let mode default to 'GMT_READ_NORMAL' --- pygmt/clib/session.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 7c266fb0c74..f16b380cb6c 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1073,7 +1073,7 @@ def read_data( self, infile: str, kind: Literal["dataset", "grid"], - mode: str | None = None, + mode: str = "GMT_READ_NORMAL", wesn: Sequence[float] | None = None, data=None, ): @@ -1088,8 +1088,8 @@ def read_data( input The input file name. kind - The data kind of the input file. Currently, valid values are ``"dataset"`` - and ``"grid"``. + The data kind of the input file. Valid values are ``"dataset"`` and + ``"grid"``. mode How the data is to be read from the file. This option varies depending on the given family. See the GMT API documentation for details. @@ -1131,7 +1131,7 @@ def read_data( self[family], self["GMT_IS_FILE"], # Reading from a file self[geometry], - self["GMT_READ_NORMAL"] if mode is None else self[mode], + self[mode], sequence_to_ctypes_array(wesn, ctp.c_double, 6), infile.encode(), data, From 5c036fab011ad64aa4fa5577676094b4cb108ab1 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 11 Jul 2024 11:45:44 +0800 Subject: [PATCH 11/21] Rename the parameter from wesn to region --- pygmt/clib/session.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index f16b380cb6c..23dc3127650 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1074,7 +1074,7 @@ def read_data( infile: str, kind: Literal["dataset", "grid"], mode: str = "GMT_READ_NORMAL", - wesn: Sequence[float] | None = None, + region: Sequence[float] | None = None, data=None, ): """ @@ -1093,7 +1093,7 @@ def read_data( mode How the data is to be read from the file. This option varies depending on the given family. See the GMT API documentation for details. - wesn + region Subregion of the data, in the form of [xmin, xmax, ymin, ymax, zmin, zmax]. If ``None``, the whole data is read. data @@ -1132,7 +1132,7 @@ def read_data( self["GMT_IS_FILE"], # Reading from a file self[geometry], self[mode], - sequence_to_ctypes_array(wesn, ctp.c_double, 6), + sequence_to_ctypes_array(region, ctp.c_double, 6), infile.encode(), data, ) From 8f8494c5d7431a2450bf4e30a70b44b09c613749 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 11 Jul 2024 11:52:40 +0800 Subject: [PATCH 12/21] Improve docstrings --- pygmt/clib/session.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 23dc3127650..a0e51f60b07 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1098,12 +1098,17 @@ def read_data( If ``None``, the whole data is read. data ``None`` or the pointer returned by this function after a first call. It's - useful when reading grids in two steps (get a grid structure with a header, - then read the data). + useful when reading grids/images/cubes in two steps (get a grid/image/cube + structure with a header, then read the data). Returns ------- Pointer to the data container, or ``None`` if there were errors. + + Raises + ------ + GMTCLibError + If the GMT API function fails to read the data. """ c_read_data = self.get_libgmt_func( "GMT_Read_Data", @@ -1136,6 +1141,8 @@ def read_data( infile.encode(), data, ) + if data_ptr is None: + raise GMTCLibError(f"Failed to read dataset from '{infile}'.") return ctp.cast(data_ptr, ctp.POINTER(dtype)) def write_data(self, family, geometry, mode, wesn, output, data): From 6206cdbf3af172a20e88c541869988b537cd171d Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 11 Jul 2024 11:59:24 +0800 Subject: [PATCH 13/21] Add one more test to catch exceptions --- pygmt/tests/test_clib_read_data.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pygmt/tests/test_clib_read_data.py b/pygmt/tests/test_clib_read_data.py index 5be5ae05859..3bbfe81c5c2 100644 --- a/pygmt/tests/test_clib_read_data.py +++ b/pygmt/tests/test_clib_read_data.py @@ -8,6 +8,7 @@ import pytest import xarray as xr from pygmt.clib import Session +from pygmt.exceptions import GMTCLibError from pygmt.helpers import GMTTempFile from pygmt.io import load_dataarray from pygmt.src import which @@ -127,3 +128,12 @@ def test_clib_read_data_grid_actual_image(): .drop_vars(["band", "spatial_ref"]) .sortby("y"), ) + + +def test_clib_read_data_fails(): + """ + Test that the Session.read_data method raises an exception if there are errors. + """ + with Session() as lib: + with pytest.raises(GMTCLibError): + lib.read_data("not-exsits.txt", kind="dataset") From fad4ccef2b7a53a4bce2ae53214a0bc8e6ba1384 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 11 Jul 2024 17:31:53 +0800 Subject: [PATCH 14/21] Fix a comment --- pygmt/clib/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index a0e51f60b07..7afea49ed23 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1125,7 +1125,7 @@ def read_data( restype=ctp.c_void_p, # data_ptr ) - # Determine the family and geometry from kind + # Determine the family, geometry and data container from kind family, geometry, dtype = { "dataset": ("GMT_IS_DATASET", "GMT_IS_PLP", _GMT_DATASET), "grid": ("GMT_IS_GRID", "GMT_IS_SURFACE", _GMT_GRID), From bc3494e0f78c9b78b382e39c141eea4353042f4f Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 17 Jul 2024 14:03:18 +0800 Subject: [PATCH 15/21] Allow setting family and geometry manually --- pygmt/clib/session.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 5c5e5902020..96696b2be85 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1071,6 +1071,8 @@ def read_data( self, infile: str, kind: Literal["dataset", "grid"], + family: str | None = None, + geometry: str | None = None, mode: str = "GMT_READ_NORMAL", region: Sequence[float] | None = None, data=None, @@ -1088,6 +1090,14 @@ def read_data( kind The data kind of the input file. Valid values are ``"dataset"`` and ``"grid"``. + family + A valid GMT data family name (e.g., ``"GMT_IS_DATASET"``). See the + ``FAMILIES`` attribute for valid names. If ``None``, will determine the data + family from the ``kind`` parameter. + geometry + A valid GMT data geometry name (e.g., ``"GMT_IS_POINT"``). See the + ``GEOMETRIES`` attribute for valid names. If ``None``, will determine the + data geometry from the ``kind`` parameter. mode How the data is to be read from the file. This option varies depending on the given family. See the GMT API documentation for details. @@ -1124,10 +1134,14 @@ def read_data( ) # Determine the family, geometry and data container from kind - family, geometry, dtype = { + _family, _geometry, dtype = { "dataset": ("GMT_IS_DATASET", "GMT_IS_PLP", _GMT_DATASET), "grid": ("GMT_IS_GRID", "GMT_IS_SURFACE", _GMT_GRID), }[kind] + if family is None: + family = _family + if geometry is None: + geometry = _geometry data_ptr = c_read_data( self.session_pointer, From 9a6628d53739ec774de42756f306c2de844de672 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 19 Jul 2024 09:03:48 +0800 Subject: [PATCH 16/21] Fix.typos Co-authored-by: Wei Ji <23487320+weiji14@users.noreply.github.com> --- pygmt/tests/test_clib_read_data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pygmt/tests/test_clib_read_data.py b/pygmt/tests/test_clib_read_data.py index 3bbfe81c5c2..d25b11e3738 100644 --- a/pygmt/tests/test_clib_read_data.py +++ b/pygmt/tests/test_clib_read_data.py @@ -74,7 +74,7 @@ def test_clib_read_data_grid(expected_xrgrid): grid = lib.read_data("@static_earth_relief.nc", kind="grid").contents xrgrid = grid.to_dataarray() xr.testing.assert_equal(xrgrid, expected_xrgrid) - assert grid.header.contents.n_bands == 1 # Explicitely check n_bands + assert grid.header.contents.n_bands == 1 # Explicitly check n_bands def test_clib_read_data_grid_two_steps(expected_xrgrid): @@ -93,7 +93,7 @@ def test_clib_read_data_grid_two_steps(expected_xrgrid): assert header.wesn[:] == [-55.0, -47.0, -24.0, -10.0] assert header.z_min == 190.0 assert header.z_max == 981.0 - assert header.n_bands == 1 # Explicitely check n_bands + assert header.n_bands == 1 # Explicitly check n_bands assert not grid.data # The data is not read yet # Read the data @@ -115,7 +115,7 @@ def test_clib_read_data_grid_actual_image(): assert header.n_rows == 180 assert header.n_columns == 360 assert header.wesn[:] == [-180.0, 180.0, -90.0, 90.0] - # Explicitely check n_bands. Only one band is read for 3-band images. + # Explicitly check n_bands. Only one band is read for 3-band images. assert header.n_bands == 1 if _HAS_RIOXARRAY: # Full check if rioxarray is installed. From 5bf40f787ffdcdf84d1acc8a8d3c57dd59b933b1 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 19 Jul 2024 09:04:15 +0800 Subject: [PATCH 17/21] Fix a typo Co-authored-by: Wei Ji <23487320+weiji14@users.noreply.github.com> --- pygmt/clib/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 96696b2be85..6db1202a785 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1085,7 +1085,7 @@ def read_data( Parameters ---------- - input + infile The input file name. kind The data kind of the input file. Valid values are ``"dataset"`` and From 85fad50fd731487752b38d954780f8d64a394640 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 19 Jul 2024 11:32:06 +0800 Subject: [PATCH 18/21] Add link to the upstream documentation --- pygmt/clib/session.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 6db1202a785..6ae47490e3d 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1100,7 +1100,9 @@ def read_data( data geometry from the ``kind`` parameter. mode How the data is to be read from the file. This option varies depending on - the given family. See the GMT API documentation for details. + the given family. See the + :gmt-docs:`GMT API documentation ` + for details. region Subregion of the data, in the form of [xmin, xmax, ymin, ymax, zmin, zmax]. If ``None``, the whole data is read. From 3a63040d9395d4ca83278e4f3d4cb46b5f090629 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 19 Jul 2024 11:43:45 +0800 Subject: [PATCH 19/21] Explicitly set engine to rasterio Co-authored-by: Wei Ji <23487320+weiji14@users.noreply.github.com> --- pygmt/tests/test_clib_read_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/tests/test_clib_read_data.py b/pygmt/tests/test_clib_read_data.py index d25b11e3738..13a64ab019f 100644 --- a/pygmt/tests/test_clib_read_data.py +++ b/pygmt/tests/test_clib_read_data.py @@ -120,7 +120,7 @@ def test_clib_read_data_grid_actual_image(): if _HAS_RIOXARRAY: # Full check if rioxarray is installed. xrimage = image.to_dataarray() - expected_xrimage = xr.open_dataarray(which("@earth_day_01d_p")) + expected_xrimage = xr.open_dataarray(which("@earth_day_01d_p"), engine="rasterio") assert expected_xrimage.band.size == 3 # 3-band image. xr.testing.assert_equal( xrimage, From 8009581869ff9db6dcf9b7c1f74df0aad9956daf Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 19 Jul 2024 14:16:44 +0800 Subject: [PATCH 20/21] Fix styling issues --- pygmt/clib/session.py | 2 +- pygmt/tests/test_clib_read_data.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 6ae47490e3d..b09a23f59aa 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1119,7 +1119,7 @@ def read_data( ------ GMTCLibError If the GMT API function fails to read the data. - """ + """ # noqa: W505 c_read_data = self.get_libgmt_func( "GMT_Read_Data", argtypes=[ diff --git a/pygmt/tests/test_clib_read_data.py b/pygmt/tests/test_clib_read_data.py index 13a64ab019f..43978b291c2 100644 --- a/pygmt/tests/test_clib_read_data.py +++ b/pygmt/tests/test_clib_read_data.py @@ -120,7 +120,9 @@ def test_clib_read_data_grid_actual_image(): if _HAS_RIOXARRAY: # Full check if rioxarray is installed. xrimage = image.to_dataarray() - expected_xrimage = xr.open_dataarray(which("@earth_day_01d_p"), engine="rasterio") + expected_xrimage = xr.open_dataarray( + which("@earth_day_01d_p"), engine="rasterio" + ) assert expected_xrimage.band.size == 3 # 3-band image. xr.testing.assert_equal( xrimage, From cc8ed2b8e583d7e74ee1b4ce9cb74ff212e22846 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 19 Jul 2024 14:18:13 +0800 Subject: [PATCH 21/21] Explain GMT_READ_NORMAL in docstrings --- pygmt/clib/session.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index b09a23f59aa..1b8b5483a28 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1102,7 +1102,8 @@ def read_data( How the data is to be read from the file. This option varies depending on the given family. See the :gmt-docs:`GMT API documentation ` - for details. + for details. Default is ``GMT_READ_NORMAL`` which corresponds to the default + read mode value of 0 in the ``GMT_enum_read`` enum. region Subregion of the data, in the form of [xmin, xmax, ymin, ymax, zmin, zmax]. If ``None``, the whole data is read.