diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 0e6996c4e13..e7efe125c25 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1787,10 +1787,7 @@ def virtualfile_in( # noqa: PLR0912 "grid": self.virtualfile_from_grid, "image": tempfile_from_image, "stringio": self.virtualfile_from_stringio, - # Note: virtualfile_from_matrix is not used because a matrix can be - # converted to vectors instead, and using vectors allows for better - # handling of string type inputs (e.g. for datetime data types) - "matrix": self.virtualfile_from_vectors, + "matrix": self.virtualfile_from_matrix, "vectors": self.virtualfile_from_vectors, }[kind] @@ -1807,29 +1804,33 @@ def virtualfile_in( # noqa: PLR0912 warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2) _data = (data,) if not isinstance(data, pathlib.PurePath) else (str(data),) elif kind == "vectors": - _data = [x, y] - if z is not None: - _data.append(z) - if extra_arrays: - _data.extend(extra_arrays) - elif kind == "matrix": # turn 2-D arrays into list of vectors - if hasattr(data, "items") and not hasattr(data, "to_frame"): + if data is None: + # data is None, so data must be given via x/y/z. + _data = [x, y] + if z is not None: + _data.append(z) + if extra_arrays: + _data.extend(extra_arrays) + elif hasattr(data, "items") and not hasattr(data, "to_frame"): # pandas.DataFrame or xarray.Dataset types. # pandas.Series will be handled below like a 1-D numpy.ndarray. _data = [array for _, array in data.items()] - elif hasattr(data, "ndim") and data.ndim == 2 and data.dtype.kind in "iuf": - # Just use virtualfile_from_matrix for 2-D numpy.ndarray - # which are signed integer (i), unsigned integer (u) or - # floating point (f) types - _virtualfile_from = self.virtualfile_from_matrix - _data = (data,) else: # Python list, tuple, numpy.ndarray, and pandas.Series types _data = np.atleast_2d(np.asanyarray(data).T) + elif kind == "matrix": + # GMT can only accept a 2-D matrix which are signed integer (i), unsigned + # integer (u) or floating point (f) types. For other data types, we need to + # use virtualfile_from_vectors instead, which turns the matrix into a list + # of vectors and allows for better handling of non-integer/float type inputs + # (e.g. for string or datetime data types). + _data = (data,) + if data.dtype.kind not in "iuf": + _virtualfile_from = self.virtualfile_from_vectors + _data = data.T # Finally create the virtualfile from the data, to be passed into GMT file_context = _virtualfile_from(*_data) - return file_context def virtualfile_from_data( diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index 2d1d1f409a8..07c965ae277 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -207,8 +207,12 @@ def data_kind( - ``"grid"``: a :class:`xarray.DataArray` object that is not 3-D - ``"image"``: a 3-D :class:`xarray.DataArray` object - ``"stringio"``: a :class:`io.StringIO` object - - ``"matrix"``: anything else that is not ``None`` - - ``"vectors"``: ``data`` is ``None`` and ``required=True`` + - ``"matrix"``: a 2-D array-like object that implements ``__array_interface__`` + (e.g., :class:`numpy.ndarray`) + - ``"vectors"``: ``data`` is ``None`` and ``required=True``, or any unrecognized + data. Common data types include, a :class:`pandas.DataFrame` object, a dictionary + with array-like values, a 1-D/3-D :class:`numpy.ndarray` object, or array-like + objects. Parameters ---------- @@ -268,27 +272,27 @@ def data_kind( The "matrix"`` kind: - >>> data_kind(data=np.arange(10)) # 1-D numpy.ndarray - 'matrix' >>> data_kind(data=np.arange(10).reshape((5, 2))) # 2-D numpy.ndarray 'matrix' + + The "vectors" kind: + + >>> data_kind(data=np.arange(10)) # 1-D numpy.ndarray + 'vectors' >>> data_kind(data=np.arange(60).reshape((3, 4, 5))) # 3-D numpy.ndarray - 'matrix' + 'vectors' >>> data_kind(xr.DataArray(np.arange(12), name="x").to_dataset()) # xarray.Dataset - 'matrix' + 'vectors' >>> data_kind(data=[1, 2, 3]) # 1-D sequence - 'matrix' + 'vectors' >>> data_kind(data=[[1, 2, 3], [4, 5, 6]]) # sequence of sequences - 'matrix' + 'vectors' >>> data_kind(data={"x": [1, 2, 3], "y": [4, 5, 6]}) # dictionary - 'matrix' + 'vectors' >>> data_kind(data=pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})) # pd.DataFrame - 'matrix' + 'vectors' >>> data_kind(data=pd.Series([1, 2, 3], name="x")) # pd.Series - 'matrix' - - The "vectors" kind: - + 'vectors' >>> data_kind(data=None) 'vectors' """ @@ -312,7 +316,10 @@ def data_kind( # geopandas.GeoDataFrame or shapely.geometry). # Reference: https://gist.github.com/sgillies/2217756 kind = "geojson" - case x if x is not None: # Any not-None is considered as a matrix. + case x if hasattr(x, "__array_interface__") and data.ndim == 2: + # 2-D Array-like objects that implements ``__array_interface__`` (e.g., + # numpy.ndarray). + # Reference: https://numpy.org/doc/stable/reference/arrays.interface.html kind = "matrix" case _: # Fall back to "vectors" if data is None and required=True. kind = "vectors" diff --git a/pygmt/src/legend.py b/pygmt/src/legend.py index ed34bc0d797..ddc26cbd2eb 100644 --- a/pygmt/src/legend.py +++ b/pygmt/src/legend.py @@ -91,7 +91,7 @@ def legend( kwargs["F"] = box kind = data_kind(spec) - if kind not in {"vectors", "file", "stringio"}: # kind="vectors" means spec is None + if spec is not None and kind not in {"file", "stringio"}: raise GMTInvalidInput(f"Unrecognized data type: {type(spec)}") if kind == "file" and is_nonstr_iter(spec): raise GMTInvalidInput("Only one legend specification file is allowed.") diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index 79daf523fec..382f560f6f7 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -195,7 +195,7 @@ def x2sys_cross( match data_kind(track): case "file": file_contexts.append(contextlib.nullcontext(track)) - case "matrix": + case "vectors": # find suffix (-E) of trackfiles used (e.g. xyz, csv, etc) from # $X2SYS_HOME/TAGNAME/TAGNAME.tag file tagfile = Path( diff --git a/pygmt/tests/test_clib_virtualfile_in.py b/pygmt/tests/test_clib_virtualfile_in.py index 95adcaff8ae..d854d556050 100644 --- a/pygmt/tests/test_clib_virtualfile_in.py +++ b/pygmt/tests/test_clib_virtualfile_in.py @@ -9,9 +9,11 @@ import pandas as pd import pytest import xarray as xr +from packaging.version import Version from pygmt import clib +from pygmt.clib import __gmt_version__ from pygmt.exceptions import GMTInvalidInput -from pygmt.helpers import GMTTempFile +from pygmt.helpers import GMTTempFile, data_kind POINTS_DATA = Path(__file__).parent / "data" / "points.txt" @@ -101,3 +103,27 @@ def test_virtualfile_in_fail_non_valid_data(data): z=data[:, 2], data=data, ) + + +@pytest.mark.xfail( + condition=Version(__gmt_version__) <= Version("6.5.0"), + reason="Upstream bug fixed in https://github.com/GenericMappingTools/gmt/pull/8600", +) +def test_virtualfile_in_matrix_string_dtype(): + """ + Pass a string dtype matrix should work and the matrix should be passed via a series + of vectors. + """ + data = np.array([["11:30W", "30:30S"], ["12:30W", "30:00S"]]) + assert data_kind(data) == "matrix" # data is recognized as "matrix" kind + assert data.dtype.type == np.str_ + assert data.dtype.kind not in "iuf" # dtype is not in numeric dtypes + + with clib.Session() as lib: + with lib.virtualfile_in(data=data) as vintbl: + with GMTTempFile() as outfile: + lib.call_module("info", [vintbl, "-C", f"->{outfile.name}"]) + output = outfile.read(keep_tabs=False) + assert output == "347.5 348.5 -30.5 -30\n" + # Should check that lib.virtualfile_from_vectors is called once, + # not lib.virtualfile_from_matrix, but it's technically complicated.