**Breaking**: data_kind: Now 'matrix' represents a 2-D numpy array an…

…d unrecognized data types fall back to 'vectors' (#3351) Co-authored-by: Wei Ji <[email protected]>
GenericMappingTools · Oct 16, 2024 · a5c0aa2 · a5c0aa2
1 parent 8a30c8e
commit a5c0aa2
Show file tree

Hide file tree

Showing 5 changed files with 70 additions and 36 deletions.
diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py
@@ -1787,10 +1787,7 @@ def virtualfile_in(  # noqa: PLR0912
             "grid": self.virtualfile_from_grid,
             "image": tempfile_from_image,
             "stringio": self.virtualfile_from_stringio,
-            # Note: virtualfile_from_matrix is not used because a matrix can be
-            # converted to vectors instead, and using vectors allows for better
-            # handling of string type inputs (e.g. for datetime data types)
-            "matrix": self.virtualfile_from_vectors,
+            "matrix": self.virtualfile_from_matrix,
             "vectors": self.virtualfile_from_vectors,
         }[kind]
 
@@ -1807,29 +1804,33 @@ def virtualfile_in(  # noqa: PLR0912
                 warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2)
             _data = (data,) if not isinstance(data, pathlib.PurePath) else (str(data),)
         elif kind == "vectors":
-            _data = [x, y]
-            if z is not None:
-                _data.append(z)
-            if extra_arrays:
-                _data.extend(extra_arrays)
-        elif kind == "matrix":  # turn 2-D arrays into list of vectors
-            if hasattr(data, "items") and not hasattr(data, "to_frame"):
+            if data is None:
+                # data is None, so data must be given via x/y/z.
+                _data = [x, y]
+                if z is not None:
+                    _data.append(z)
+                if extra_arrays:
+                    _data.extend(extra_arrays)
+            elif hasattr(data, "items") and not hasattr(data, "to_frame"):
                 # pandas.DataFrame or xarray.Dataset types.
                 # pandas.Series will be handled below like a 1-D numpy.ndarray.
                 _data = [array for _, array in data.items()]
-            elif hasattr(data, "ndim") and data.ndim == 2 and data.dtype.kind in "iuf":
-                # Just use virtualfile_from_matrix for 2-D numpy.ndarray
-                # which are signed integer (i), unsigned integer (u) or
-                # floating point (f) types
-                _virtualfile_from = self.virtualfile_from_matrix
-                _data = (data,)
             else:
                 # Python list, tuple, numpy.ndarray, and pandas.Series types
                 _data = np.atleast_2d(np.asanyarray(data).T)
+        elif kind == "matrix":
+            # GMT can only accept a 2-D matrix which are signed integer (i), unsigned
+            # integer (u) or floating point (f) types. For other data types, we need to
+            # use virtualfile_from_vectors instead, which turns the matrix into a list
+            # of vectors and allows for better handling of non-integer/float type inputs
+            # (e.g. for string or datetime data types).
+            _data = (data,)
+            if data.dtype.kind not in "iuf":
+                _virtualfile_from = self.virtualfile_from_vectors
+                _data = data.T
 
         # Finally create the virtualfile from the data, to be passed into GMT
         file_context = _virtualfile_from(*_data)
-
         return file_context
 
     def virtualfile_from_data(

diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py
@@ -207,8 +207,12 @@ def data_kind(
     - ``"grid"``: a :class:`xarray.DataArray` object that is not 3-D
     - ``"image"``: a 3-D :class:`xarray.DataArray` object
     - ``"stringio"``: a :class:`io.StringIO` object
-    - ``"matrix"``: anything else that is not ``None``
-    - ``"vectors"``: ``data`` is ``None`` and ``required=True``
+    - ``"matrix"``: a 2-D array-like object that implements ``__array_interface__``
+      (e.g., :class:`numpy.ndarray`)
+    - ``"vectors"``: ``data`` is ``None`` and ``required=True``, or any unrecognized
+      data. Common data types include, a :class:`pandas.DataFrame` object, a dictionary
+      with array-like values, a 1-D/3-D :class:`numpy.ndarray` object, or array-like
+      objects.
 
     Parameters
     ----------
@@ -268,27 +272,27 @@ def data_kind(
 
     The "matrix"`` kind:
 
-    >>> data_kind(data=np.arange(10))  # 1-D numpy.ndarray
-    'matrix'
     >>> data_kind(data=np.arange(10).reshape((5, 2)))  # 2-D numpy.ndarray
     'matrix'
+
+    The "vectors" kind:
+
+    >>> data_kind(data=np.arange(10))  # 1-D numpy.ndarray
+    'vectors'
     >>> data_kind(data=np.arange(60).reshape((3, 4, 5)))  # 3-D numpy.ndarray
-    'matrix'
+    'vectors'
     >>> data_kind(xr.DataArray(np.arange(12), name="x").to_dataset())  # xarray.Dataset
-    'matrix'
+    'vectors'
     >>> data_kind(data=[1, 2, 3])  # 1-D sequence
-    'matrix'
+    'vectors'
     >>> data_kind(data=[[1, 2, 3], [4, 5, 6]])  # sequence of sequences
-    'matrix'
+    'vectors'
     >>> data_kind(data={"x": [1, 2, 3], "y": [4, 5, 6]})  # dictionary
-    'matrix'
+    'vectors'
     >>> data_kind(data=pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}))  # pd.DataFrame
-    'matrix'
+    'vectors'
     >>> data_kind(data=pd.Series([1, 2, 3], name="x"))  # pd.Series
-    'matrix'
-
-    The "vectors" kind:
-
+    'vectors'
     >>> data_kind(data=None)
     'vectors'
     """
@@ -312,7 +316,10 @@ def data_kind(
             # geopandas.GeoDataFrame or shapely.geometry).
             # Reference: https://gist.github.com/sgillies/2217756
             kind = "geojson"
-        case x if x is not None:  # Any not-None is considered as a matrix.
+        case x if hasattr(x, "__array_interface__") and data.ndim == 2:
+            # 2-D Array-like objects that implements ``__array_interface__`` (e.g.,
+            # numpy.ndarray).
+            # Reference: https://numpy.org/doc/stable/reference/arrays.interface.html
             kind = "matrix"
         case _:  # Fall back to "vectors" if data is None and required=True.
             kind = "vectors"

diff --git a/pygmt/src/legend.py b/pygmt/src/legend.py
@@ -91,7 +91,7 @@ def legend(
             kwargs["F"] = box
 
     kind = data_kind(spec)
-    if kind not in {"vectors", "file", "stringio"}:  # kind="vectors" means spec is None
+    if spec is not None and kind not in {"file", "stringio"}:
         raise GMTInvalidInput(f"Unrecognized data type: {type(spec)}")
     if kind == "file" and is_nonstr_iter(spec):
         raise GMTInvalidInput("Only one legend specification file is allowed.")

diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py
@@ -195,7 +195,7 @@ def x2sys_cross(
         match data_kind(track):
             case "file":
                 file_contexts.append(contextlib.nullcontext(track))
-            case "matrix":
+            case "vectors":
                 # find suffix (-E) of trackfiles used (e.g. xyz, csv, etc) from
                 # $X2SYS_HOME/TAGNAME/TAGNAME.tag file
                 tagfile = Path(

diff --git a/pygmt/tests/test_clib_virtualfile_in.py b/pygmt/tests/test_clib_virtualfile_in.py
@@ -9,9 +9,11 @@
 import pandas as pd
 import pytest
 import xarray as xr
+from packaging.version import Version
 from pygmt import clib
+from pygmt.clib import __gmt_version__
 from pygmt.exceptions import GMTInvalidInput
-from pygmt.helpers import GMTTempFile
+from pygmt.helpers import GMTTempFile, data_kind
 
 POINTS_DATA = Path(__file__).parent / "data" / "points.txt"
 
@@ -101,3 +103,27 @@ def test_virtualfile_in_fail_non_valid_data(data):
                 z=data[:, 2],
                 data=data,
             )
+
+
+@pytest.mark.xfail(
+    condition=Version(__gmt_version__) <= Version("6.5.0"),
+    reason="Upstream bug fixed in https://github.com/GenericMappingTools/gmt/pull/8600",
+)
+def test_virtualfile_in_matrix_string_dtype():
+    """
+    Pass a string dtype matrix should work and the matrix should be passed via a series
+    of vectors.
+    """
+    data = np.array([["11:30W", "30:30S"], ["12:30W", "30:00S"]])
+    assert data_kind(data) == "matrix"  # data is recognized as "matrix" kind
+    assert data.dtype.type == np.str_
+    assert data.dtype.kind not in "iuf"  # dtype is not in numeric dtypes
+
+    with clib.Session() as lib:
+        with lib.virtualfile_in(data=data) as vintbl:
+            with GMTTempFile() as outfile:
+                lib.call_module("info", [vintbl, "-C", f"->{outfile.name}"])
+                output = outfile.read(keep_tabs=False)
+                assert output == "347.5 348.5 -30.5 -30\n"
+                # Should check that lib.virtualfile_from_vectors is called once,
+                # not lib.virtualfile_from_matrix, but it's technically complicated.