Merge branch 'main' into pass-arg-list-to-module

GenericMappingTools · Apr 7, 2024 · faa4a4c · faa4a4c
2 parents 72e684b + a32049d
commit faa4a4c
Show file tree

Hide file tree

Showing 13 changed files with 380 additions and 149 deletions.
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
@@ -44,7 +44,7 @@ jobs:
 
       # Install Miniconda with conda-forge dependencies
       - name: Setup Miniconda
-        uses: conda-incubator/[email protected].1
+        uses: conda-incubator/[email protected].3
         with:
           auto-activate-base: true
           activate-environment: ""  # base environment
@@ -86,7 +86,7 @@ jobs:
 
       # Run the benchmark tests
       - name: Run benchmarks
-        uses: CodSpeedHQ/action@v2.2.1
+        uses: CodSpeedHQ/action@v2.3.0
         with:
           run: |
             python -c "import pygmt; pygmt.show_versions()"

diff --git a/.github/workflows/ci_tests.yaml b/.github/workflows/ci_tests.yaml
@@ -70,7 +70,7 @@ jobs:
           - python-version: '3.10'
             numpy-version: '1.23'
             pandas-version: '=1.5'
-            xarray-version: '=2022.03'
+            xarray-version: '=2022.06'
             optional-packages: ''
           - python-version: '3.12'
             numpy-version: '1.26'

diff --git a/doc/minversions.md b/doc/minversions.md
@@ -12,7 +12,7 @@ after their initial release.
 
 | PyGMT Version | GMT | Python | NumPy | Pandas | Xarray |
 |---|---|---|---|---|---|
-| [Dev][]* [[Docs][Docs Dev]] | >=6.3.0 | >=3.10 | >=1.23 | >=1.5 | >=2022.03 |
+| [Dev][]* [[Docs][Docs Dev]] | >=6.3.0 | >=3.10 | >=1.23 | >=1.5 | >=2022.06 |
 | [v0.11.0][] [[Docs][Docs v0.11.0]] | >=6.3.0 | >=3.9 | >=1.23 |  |  |
 | [v0.10.0][] [[Docs][Docs v0.10.0]] | >=6.3.0 | >=3.9 | >=1.22 |  |  |
 | [v0.9.0][] [[Docs][Docs v0.9.0]] | >=6.3.0 | >=3.8 | >=1.21 |  |  |

diff --git a/environment.yml b/environment.yml
@@ -9,7 +9,7 @@ dependencies:
     - ghostscript=10.03.0
     - numpy>=1.23
     - pandas>=1.5
-    - xarray>=2022.03
+    - xarray>=2022.06
     - netCDF4
     - packaging
     # Optional dependencies
@@ -27,7 +27,7 @@ dependencies:
     - codespell
     - ruff>=0.3.0
     # Dev dependencies (unit testing)
-    - matplotlib
+    - matplotlib-base
     - pytest-cov
     - pytest-doctestplus
     - pytest-mpl

diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py
@@ -1709,6 +1709,35 @@ def virtualfile_out(
             with self.open_virtualfile(family, geometry, "GMT_OUT", None) as vfile:
                 yield vfile
 
+    def inquire_virtualfile(self, vfname: str) -> int:
+        """
+        Get the family of a virtual file.
+
+        Parameters
+        ----------
+        vfname
+            Name of the virtual file to inquire.
+
+        Returns
+        -------
+        family
+            The integer value for the family of the virtual file.
+
+        Examples
+        --------
+        >>> from pygmt.clib import Session
+        >>> with Session() as lib:
+        ...     with lib.virtualfile_out(kind="dataset") as vfile:
+        ...         family = lib.inquire_virtualfile(vfile)
+        ...         assert family == lib["GMT_IS_DATASET"]
+        """
+        c_inquire_virtualfile = self.get_libgmt_func(
+            "GMT_Inquire_VirtualFile",
+            argtypes=[ctp.c_void_p, ctp.c_char_p],
+            restype=ctp.c_uint,
+        )
+        return c_inquire_virtualfile(self.session_pointer, vfname.encode())
+
     def read_virtualfile(
         self, vfname: str, kind: Literal["dataset", "grid", None] = None
     ):
@@ -1775,7 +1804,7 @@ def read_virtualfile(
     def virtualfile_to_dataset(
         self,
         vfname: str,
-        output_type: Literal["pandas", "numpy", "file"] = "pandas",
+        output_type: Literal["pandas", "numpy", "file", "strings"] = "pandas",
         column_names: list[str] | None = None,
         dtype: type | dict[str, type] | None = None,
         index_col: str | int | None = None,
@@ -1796,6 +1825,7 @@ def virtualfile_to_dataset(
             - ``"pandas"`` will return a :class:`pandas.DataFrame` object.
             - ``"numpy"`` will return a :class:`numpy.ndarray` object.
             - ``"file"`` means the result was saved to a file and will return ``None``.
+            - ``"strings"`` will return the trailing text only as an array of strings.
         column_names
             The column names for the :class:`pandas.DataFrame` output.
         dtype
@@ -1841,6 +1871,16 @@ def virtualfile_to_dataset(
         ...                 assert result is None
         ...                 assert Path(outtmp.name).stat().st_size > 0
         ...
+        ...     # strings output
+        ...     with Session() as lib:
+        ...         with lib.virtualfile_out(kind="dataset") as vouttbl:
+        ...             lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
+        ...             outstr = lib.virtualfile_to_dataset(
+        ...                 vfname=vouttbl, output_type="strings"
+        ...             )
+        ...     assert isinstance(outstr, np.ndarray)
+        ...     assert outstr.dtype.kind in ("S", "U")
+        ...
         ...     # numpy output
         ...     with Session() as lib:
         ...         with lib.virtualfile_out(kind="dataset") as vouttbl:
@@ -1869,6 +1909,9 @@ def virtualfile_to_dataset(
         ...                 column_names=["col1", "col2", "col3", "coltext"],
         ...             )
         ...     assert isinstance(outpd2, pd.DataFrame)
+        >>> outstr
+        array(['TEXT1 TEXT23', 'TEXT4 TEXT567', 'TEXT8 TEXT90',
+           'TEXT123 TEXT456789'], dtype='<U18')
         >>> outnp
         array([[1.0, 2.0, 3.0, 'TEXT1 TEXT23'],
                [4.0, 5.0, 6.0, 'TEXT4 TEXT567'],
@@ -1890,11 +1933,14 @@ def virtualfile_to_dataset(
         if output_type == "file":  # Already written to file, so return None
             return None
 
-        # Read the virtual file as a GMT dataset and convert to pandas.DataFrame
-        result = self.read_virtualfile(vfname, kind="dataset").contents.to_dataframe(
-            column_names=column_names,
-            dtype=dtype,
-            index_col=index_col,
+        # Read the virtual file as a _GMT_DATASET object
+        result = self.read_virtualfile(vfname, kind="dataset").contents
+
+        if output_type == "strings":  # strings output
+            return result.to_strings()
+
+        result = result.to_dataframe(
+            column_names=column_names, dtype=dtype, index_col=index_col
         )
         if output_type == "numpy":  # numpy.ndarray output
             return result.to_numpy()

diff --git a/pygmt/datatypes/dataset.py b/pygmt/datatypes/dataset.py
@@ -144,6 +144,17 @@ class _GMT_DATASEGMENT(ctp.Structure):  # noqa: N801
         ("hidden", ctp.c_void_p),
     ]
 
+    def to_strings(self) -> np.ndarray[Any, np.dtype[np.str_]]:
+        """
+        Convert the trailing text column to an array of strings.
+        """
+        textvector = []
+        for table in self.table[: self.n_tables]:
+            for segment in table.contents.segment[: table.contents.n_segments]:
+                if segment.contents.text:
+                    textvector.extend(segment.contents.text[: segment.contents.n_rows])
+        return np.char.decode(textvector) if textvector else np.array([], dtype=str)
+
     def to_dataframe(
         self,
         column_names: pd.Index | None = None,
@@ -194,7 +205,11 @@ def to_dataframe(
         ...         with lib.virtualfile_out(kind="dataset") as vouttbl:
         ...             lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
         ...             ds = lib.read_virtualfile(vouttbl, kind="dataset")
+        ...             text = ds.contents.to_strings()
         ...             df = ds.contents.to_dataframe()
+        >>> text
+        array(['TEXT1 TEXT23', 'TEXT4 TEXT567', 'TEXT8 TEXT90',
+           'TEXT123 TEXT456789'], dtype='<U18')
         >>> df
               0     1     2                   3
         0   1.0   2.0   3.0        TEXT1 TEXT23
@@ -207,28 +222,19 @@ def to_dataframe(
         vectors = []
         # Deal with numeric columns
         for icol in range(self.n_columns):
-            colvector = []
-            for itbl in range(self.n_tables):
-                dtbl = self.table[itbl].contents
-                for iseg in range(dtbl.n_segments):
-                    dseg = dtbl.segment[iseg].contents
-                    colvector.append(
-                        np.ctypeslib.as_array(dseg.data[icol], shape=(dseg.n_rows,))
-                    )
+            colvector = [
+                np.ctypeslib.as_array(
+                    seg.contents.data[icol], shape=(seg.contents.n_rows,)
+                )
+                for tbl in self.table[: self.n_tables]
+                for seg in tbl.contents.segment[: tbl.contents.n_segments]
+            ]
             vectors.append(pd.Series(data=np.concatenate(colvector)))
 
         # Deal with trailing text column
-        textvector = []
-        for itbl in range(self.n_tables):
-            dtbl = self.table[itbl].contents
-            for iseg in range(dtbl.n_segments):
-                dseg = dtbl.segment[iseg].contents
-                if dseg.text:
-                    textvector.extend(dseg.text[: dseg.n_rows])
-        if textvector:
-            vectors.append(
-                pd.Series(data=np.char.decode(textvector), dtype=pd.StringDtype())
-            )
+        textvector = self.to_strings()
+        if len(textvector) != 0:
+            vectors.append(pd.Series(data=textvector, dtype=pd.StringDtype()))
 
         if len(vectors) == 0:
             # Return an empty DataFrame if no columns are found.

diff --git a/pygmt/datatypes/grid.py b/pygmt/datatypes/grid.py
@@ -1,104 +1,8 @@
 """
-Wrapper for the GMT_GRID data type and the GMT_GRID_HEADER data structure.
+Wrapper for the GMT_GRID data type.
 """
 
 import ctypes as ctp
-from typing import ClassVar
-
-# Constants for lengths of grid header variables.
-#
-# Note: Ideally we should be able to get these constants from the GMT shared library
-# using the ``lib["GMT_GRID_UNIT_LEN80"]`` syntax, but it causes cyclic import error.
-# So we have to hardcode the values here.
-GMT_GRID_UNIT_LEN80 = 80
-GMT_GRID_TITLE_LEN80 = 80
-GMT_GRID_COMMAND_LEN320 = 320
-GMT_GRID_REMARK_LEN160 = 160
-
-# GMT uses single-precision for grids by default, but can be built to use
-# double-precision. Currently, only single-precision is supported.
-gmt_grdfloat = ctp.c_float
-
-
-class _GMT_GRID_HEADER(ctp.Structure):  # noqa: N801
-    """
-    GMT grid header structure for metadata about the grid.
-
-    The class is used in the `GMT_GRID`/`GMT_IMAGE`/`GMT_CUBE` data structure. See the
-    GMT source code gmt_resources.h for the original C structure definitions.
-    """
-
-    _fields_: ClassVar = [
-        # Number of columns
-        ("n_columns", ctp.c_uint32),
-        # Number of rows
-        ("n_rows", ctp.c_uint32),
-        # Grid registration, 0 for gridline and 1 for pixel
-        ("registration", ctp.c_uint32),
-        # Minimum/maximum x and y coordinates
-        ("wesn", ctp.c_double * 4),
-        # Minimum z value
-        ("z_min", ctp.c_double),
-        # Maximum z value
-        ("z_max", ctp.c_double),
-        # x and y increments
-        ("inc", ctp.c_double * 2),
-        # Grid values must be multiplied by this factor
-        ("z_scale_factor", ctp.c_double),
-        # After scaling, add this offset
-        ("z_add_offset", ctp.c_double),
-        # Units in x-directions, in the form "long_name [units]"
-        ("x_units", ctp.c_char * GMT_GRID_UNIT_LEN80),
-        # Units in y-direction, in the form "long_name [units]"
-        ("y_units", ctp.c_char * GMT_GRID_UNIT_LEN80),
-        # Grid value units, in the form "long_name [units]"
-        ("z_units", ctp.c_char * GMT_GRID_UNIT_LEN80),
-        # Name of data set
-        ("title", ctp.c_char * GMT_GRID_TITLE_LEN80),
-        # Name of generating command
-        ("command", ctp.c_char * GMT_GRID_COMMAND_LEN320),
-        # Comments for this data set
-        ("remark", ctp.c_char * GMT_GRID_REMARK_LEN160),
-        # Below are items used internally by GMT
-        # Number of data points (n_columns * n_rows) [paddings are excluded]
-        ("nm", ctp.c_size_t),
-        # Actual number of items (not bytes) required to hold this grid (mx * my),
-        # per band (for images)
-        ("size", ctp.c_size_t),
-        # Bits per data value (e.g., 32 for ints/floats; 8 for bytes).
-        # Only used for ERSI ArcInfo ASCII Exchange grids.
-        ("bits", ctp.c_uint),
-        # For complex grid.
-        # 0 for normal
-        # GMT_GRID_IS_COMPLEX_REAL = real part of complex grid
-        # GMT_GRID_IS_COMPLEX_IMAG = imag part of complex grid
-        ("complex_mode", ctp.c_uint),
-        # Grid format
-        ("type", ctp.c_uint),
-        # Number of bands [1]. Used with GMT_IMAGE containers
-        ("n_bands", ctp.c_uint),
-        # Actual x-dimension in memory. mx = n_columns + pad[0] + pad[1]
-        ("mx", ctp.c_uint),
-        # Actual y-dimension in memory. my = n_rows + pad[2] + pad[3]
-        ("my", ctp.c_uint),
-        # Paddings on west, east, south, north sides [2,2,2,2]
-        ("pad", ctp.c_uint * 4),
-        # Three or four char codes T|B R|C S|R|S (grd) or B|L|P + A|a (img)
-        # describing array layout in mem and interleaving
-        ("mem_layout", ctp.c_char * 4),
-        # Missing value as stored in grid file
-        ("nan_value", gmt_grdfloat),
-        # 0.0 for gridline grids and 0.5 for pixel grids
-        ("xy_off", ctp.c_double),
-        # Referencing system string in PROJ.4 format
-        ("ProjRefPROJ4", ctp.c_char_p),
-        # Referencing system string in WKT format
-        ("ProjRefWKT", ctp.c_char_p),
-        # Referencing system EPSG code
-        ("ProjRefEPSG", ctp.c_int),
-        # Lower-level information for GMT use only
-        ("hidden", ctp.c_void_p),
-    ]
 
 
 class _GMT_GRID(ctp.Structure):  # noqa: N801