From 689b0ca4cdb969fcd27c779e0e86f5f12bf9401b Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Wed, 6 Sep 2023 15:49:02 +0200 Subject: [PATCH 01/33] [skip-ci] --- docs/whats-new.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/whats-new.rst b/docs/whats-new.rst index 228b8c25..97b13938 100644 --- a/docs/whats-new.rst +++ b/docs/whats-new.rst @@ -8,6 +8,13 @@ What's New |pypi dwn| |conda dwn| +Coming up next +-------------- + +**Internals** + +- Utilities refactoring. Each class/function have been refactored in more appropriate locations like ``argopy.utils`` or ``argopy.relatec``. + v0.1.14rc2 (27 Jul. 2023) ------------------------- From 3e490333a2da5138f66f5d9daed1f3fd0b67942d Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Wed, 6 Sep 2023 15:56:28 +0200 Subject: [PATCH 02/33] Move TopoFetcher to argopy.related --- argopy/__init__.py | 3 +- argopy/related/__init__.py | 7 ++ argopy/related/topo.py | 203 +++++++++++++++++++++++++++++++++++++ argopy/utilities.py | 200 ------------------------------------ 4 files changed, 212 insertions(+), 201 deletions(-) create mode 100644 argopy/related/__init__.py create mode 100644 argopy/related/topo.py diff --git a/argopy/__init__.py b/argopy/__init__.py index e0020728..7a90d217 100644 --- a/argopy/__init__.py +++ b/argopy/__init__.py @@ -35,12 +35,13 @@ from . import plot # noqa: E402 from .plot import dashboard, ArgoColors # noqa: E402 from .utilities import show_versions, show_options, clear_cache, lscache # noqa: E402 -from .utilities import TopoFetcher, ArgoNVSReferenceTables, OceanOPSDeployments, ArgoDocs # noqa: E402 +from .utilities import ArgoNVSReferenceTables, OceanOPSDeployments, ArgoDocs # noqa: E402 from .utilities import monitor_status as status # noqa: E402 from .options import set_options, reset_options # noqa: E402 from .data_fetchers import CTDRefDataFetcher # noqa: E402 from .stores import ArgoIndex # noqa: E402 from .utils import compute # noqa: E402, F401 +from .related import TopoFetcher # noqa: E402 # __all__ = ( diff --git a/argopy/related/__init__.py b/argopy/related/__init__.py new file mode 100644 index 00000000..58c3812e --- /dev/null +++ b/argopy/related/__init__.py @@ -0,0 +1,7 @@ +from .topo import TopoFetcher + +# +__all__ = ( + # Classes: + "TopoFetcher", +) \ No newline at end of file diff --git a/argopy/related/topo.py b/argopy/related/topo.py new file mode 100644 index 00000000..39b7f4b1 --- /dev/null +++ b/argopy/related/topo.py @@ -0,0 +1,203 @@ +from typing import Union +from ..options import OPTIONS +from ..stores import httpstore +from ..utilities import format_oneline + + +class TopoFetcher: + """ Fetch topographic data through an ERDDAP server for an ocean rectangle + + Example: + >>> from argopy import TopoFetcher + >>> box = [-75, -45, 20, 30] # Lon_min, lon_max, lat_min, lat_max + >>> ds = TopoFetcher(box).to_xarray() + >>> ds = TopoFetcher(box, ds='gebco', stride=[10, 10], cache=True).to_xarray() + + """ + + class ERDDAP: + def __init__(self, server: str, protocol: str = "tabledap"): + self.server = server + self.protocol = protocol + self.response = "nc" + self.dataset_id = "" + self.constraints = "" + + def __init__( + self, + box: list, + ds: str = "gebco", + cache: bool = False, + cachedir: str = "", + api_timeout: int = 0, + stride: list = [1, 1], + server: Union[str] = None, + **kwargs, + ): + """ Instantiate an ERDDAP topo data fetcher + + Parameters + ---------- + ds: str (optional), default: 'gebco' + Dataset to load: + + - 'gebco' will load the GEBCO_2020 Grid, a continuous terrain model for oceans and land at 15 arc-second intervals + stride: list, default [1, 1] + Strides along longitude and latitude. This allows to change the output resolution + cache: bool (optional) + Cache data or not (default: False) + cachedir: str (optional) + Path to cache folder + api_timeout: int (optional) + Erddap request time out in seconds. Set to OPTIONS['api_timeout'] by default. + """ + timeout = OPTIONS["api_timeout"] if api_timeout == 0 else api_timeout + self.fs = httpstore( + cache=cache, cachedir=cachedir, timeout=timeout, size_policy="head" + ) + self.definition = "Erddap topographic data fetcher" + + self.BOX = box + self.stride = stride + if ds == "gebco": + self.definition = "NOAA erddap gebco data fetcher for a space region" + self.server = server if server is not None else "https://coastwatch.pfeg.noaa.gov/erddap" + self.server_name = "NOAA" + self.dataset_id = "gebco" + + self._init_erddap() + + def _init_erddap(self): + # Init erddap + self.erddap = self.ERDDAP(server=self.server, protocol="griddap") + self.erddap.response = "nc" + + if self.dataset_id == "gebco": + self.erddap.dataset_id = "GEBCO_2020" + else: + raise ValueError( + "Invalid database short name for %s erddap" % self.server_name + ) + return self + + def _cname(self) -> str: + """ Fetcher one line string definition helper """ + cname = "?" + + if hasattr(self, "BOX"): + BOX = self.BOX + cname = ("[x=%0.2f/%0.2f; y=%0.2f/%0.2f]") % ( + BOX[0], + BOX[1], + BOX[2], + BOX[3], + ) + return cname + + def __repr__(self): + summary = [""] + summary.append("Name: %s" % self.definition) + summary.append("API: %s" % self.server) + summary.append("Domain: %s" % format_oneline(self.cname())) + return "\n".join(summary) + + def cname(self): + """ Return a unique string defining the constraints """ + return self._cname() + + @property + def cachepath(self): + """ Return path to cached file(s) for this request + + Returns + ------- + list(str) + """ + return [self.fs.cachepath(uri) for uri in self.uri] + + def define_constraints(self): + """ Define request constraints """ + # Eg: https://coastwatch.pfeg.noaa.gov/erddap/griddap/GEBCO_2020.nc?elevation%5B(34):5:(42)%5D%5B(-21):7:(-12)%5D + self.erddap.constraints = "%s(%0.2f):%i:(%0.2f)%s%s(%0.2f):%i:(%0.2f)%s" % ( + "%5B", + self.BOX[2], + self.stride[1], + self.BOX[3], + "%5D", + "%5B", + self.BOX[0], + self.stride[0], + self.BOX[1], + "%5D", + ) + return None + + # @property + # def _minimal_vlist(self): + # """ Return the minimal list of variables to retrieve """ + # vlist = list() + # vlist.append("latitude") + # vlist.append("longitude") + # vlist.append("elevation") + # return vlist + + def url_encode(self, url): + """ Return safely encoded list of urls + + This is necessary because fsspec cannot handle in cache paths/urls with a '[' character + """ + + # return urls + def safe_for_fsspec_cache(url): + url = url.replace("[", "%5B") # This is the one really necessary + url = url.replace("]", "%5D") # For consistency + return url + + return safe_for_fsspec_cache(url) + + def get_url(self): + """ Return the URL to download data requested + + Returns + ------- + str + """ + # First part of the URL: + protocol = self.erddap.protocol + dataset_id = self.erddap.dataset_id + response = self.erddap.response + url = f"{self.erddap.server}/{protocol}/{dataset_id}.{response}?" + + # Add variables to retrieve: + variables = ["elevation"] + variables = ",".join(variables) + url += f"{variables}" + + # Add constraints: + self.define_constraints() # Define constraint to select this box of data (affect self.erddap.constraints) + url += f"{self.erddap.constraints}" + + return self.url_encode(url) + + @property + def uri(self): + """ List of files to load for a request + + Returns + ------- + list(str) + """ + return [self.get_url()] + + def to_xarray(self, errors: str = "ignore"): + """ Load Topographic data and return a xarray.DataSet """ + + # Download data + if len(self.uri) == 1: + ds = self.fs.open_dataset(self.uri[0]) + + return ds + + def load(self, errors: str = "ignore"): + """ Load Topographic data and return a xarray.DataSet """ + return self.to_xarray(errors=errors) diff --git a/argopy/utilities.py b/argopy/utilities.py index 777a8dab..9ff569f9 100644 --- a/argopy/utilities.py +++ b/argopy/utilities.py @@ -2042,206 +2042,6 @@ def _subsample_bins(x, y, target_values): return remapped -class TopoFetcher: - """ Fetch topographic data through an ERDDAP server for an ocean rectangle - - Example: - >>> from argopy import TopoFetcher - >>> box = [-75, -45, 20, 30] # Lon_min, lon_max, lat_min, lat_max - >>> ds = TopoFetcher(box).to_xarray() - >>> ds = TopoFetcher(box, ds='gebco', stride=[10, 10], cache=True).to_xarray() - - """ - - class ERDDAP: - def __init__(self, server: str, protocol: str = "tabledap"): - self.server = server - self.protocol = protocol - self.response = "nc" - self.dataset_id = "" - self.constraints = "" - - def __init__( - self, - box: list, - ds: str = "gebco", - cache: bool = False, - cachedir: str = "", - api_timeout: int = 0, - stride: list = [1, 1], - server: Union[str] = None, - **kwargs, - ): - """ Instantiate an ERDDAP topo data fetcher - - Parameters - ---------- - ds: str (optional), default: 'gebco' - Dataset to load: - - - 'gebco' will load the GEBCO_2020 Grid, a continuous terrain model for oceans and land at 15 arc-second intervals - stride: list, default [1, 1] - Strides along longitude and latitude. This allows to change the output resolution - cache: bool (optional) - Cache data or not (default: False) - cachedir: str (optional) - Path to cache folder - api_timeout: int (optional) - Erddap request time out in seconds. Set to OPTIONS['api_timeout'] by default. - """ - from .stores import httpstore - timeout = OPTIONS["api_timeout"] if api_timeout == 0 else api_timeout - self.fs = httpstore( - cache=cache, cachedir=cachedir, timeout=timeout, size_policy="head" - ) - self.definition = "Erddap topographic data fetcher" - - self.BOX = box - self.stride = stride - if ds == "gebco": - self.definition = "NOAA erddap gebco data fetcher for a space region" - self.server = server if server is not None else "https://coastwatch.pfeg.noaa.gov/erddap" - self.server_name = "NOAA" - self.dataset_id = "gebco" - - self._init_erddap() - - def _init_erddap(self): - # Init erddap - self.erddap = self.ERDDAP(server=self.server, protocol="griddap") - self.erddap.response = "nc" - - if self.dataset_id == "gebco": - self.erddap.dataset_id = "GEBCO_2020" - else: - raise ValueError( - "Invalid database short name for %s erddap" % self.server_name - ) - return self - - def _cname(self) -> str: - """ Fetcher one line string definition helper """ - cname = "?" - - if hasattr(self, "BOX"): - BOX = self.BOX - cname = ("[x=%0.2f/%0.2f; y=%0.2f/%0.2f]") % ( - BOX[0], - BOX[1], - BOX[2], - BOX[3], - ) - return cname - - def __repr__(self): - summary = [""] - summary.append("Name: %s" % self.definition) - summary.append("API: %s" % self.server) - summary.append("Domain: %s" % format_oneline(self.cname())) - return "\n".join(summary) - - def cname(self): - """ Return a unique string defining the constraints """ - return self._cname() - - @property - def cachepath(self): - """ Return path to cached file(s) for this request - - Returns - ------- - list(str) - """ - return [self.fs.cachepath(uri) for uri in self.uri] - - def define_constraints(self): - """ Define request constraints """ - # Eg: https://coastwatch.pfeg.noaa.gov/erddap/griddap/GEBCO_2020.nc?elevation%5B(34):5:(42)%5D%5B(-21):7:(-12)%5D - self.erddap.constraints = "%s(%0.2f):%i:(%0.2f)%s%s(%0.2f):%i:(%0.2f)%s" % ( - "%5B", - self.BOX[2], - self.stride[1], - self.BOX[3], - "%5D", - "%5B", - self.BOX[0], - self.stride[0], - self.BOX[1], - "%5D", - ) - return None - - # @property - # def _minimal_vlist(self): - # """ Return the minimal list of variables to retrieve """ - # vlist = list() - # vlist.append("latitude") - # vlist.append("longitude") - # vlist.append("elevation") - # return vlist - - def url_encode(self, url): - """ Return safely encoded list of urls - - This is necessary because fsspec cannot handle in cache paths/urls with a '[' character - """ - - # return urls - def safe_for_fsspec_cache(url): - url = url.replace("[", "%5B") # This is the one really necessary - url = url.replace("]", "%5D") # For consistency - return url - - return safe_for_fsspec_cache(url) - - def get_url(self): - """ Return the URL to download data requested - - Returns - ------- - str - """ - # First part of the URL: - protocol = self.erddap.protocol - dataset_id = self.erddap.dataset_id - response = self.erddap.response - url = f"{self.erddap.server}/{protocol}/{dataset_id}.{response}?" - - # Add variables to retrieve: - variables = ["elevation"] - variables = ",".join(variables) - url += f"{variables}" - - # Add constraints: - self.define_constraints() # Define constraint to select this box of data (affect self.erddap.constraints) - url += f"{self.erddap.constraints}" - - return self.url_encode(url) - - @property - def uri(self): - """ List of files to load for a request - - Returns - ------- - list(str) - """ - return [self.get_url()] - - def to_xarray(self, errors: str = "ignore"): - """ Load Topographic data and return a xarray.DataSet """ - - # Download data - if len(self.uri) == 1: - ds = self.fs.open_dataset(self.uri[0]) - - return ds - - def load(self, errors: str = "ignore"): - """ Load Topographic data and return a xarray.DataSet """ - return self.to_xarray(errors=errors) - - def argo_split_path(this_path): # noqa C901 """ Split path from a GDAC ftp style Argo netcdf file and return information From beae1af4c5e63e5e8c22a91088d9573059ca67eb Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Wed, 6 Sep 2023 16:04:33 +0200 Subject: [PATCH 03/33] [skip-ci] --- argopy/__init__.py | 4 +- argopy/related/__init__.py | 4 +- argopy/related/ocean_ops_deployments.py | 377 ++++++++++++++++++++++ argopy/related/{topo.py => topography.py} | 0 argopy/utilities.py | 372 --------------------- 5 files changed, 382 insertions(+), 375 deletions(-) create mode 100644 argopy/related/ocean_ops_deployments.py rename argopy/related/{topo.py => topography.py} (100%) diff --git a/argopy/__init__.py b/argopy/__init__.py index 7a90d217..8c8f6cf7 100644 --- a/argopy/__init__.py +++ b/argopy/__init__.py @@ -35,13 +35,13 @@ from . import plot # noqa: E402 from .plot import dashboard, ArgoColors # noqa: E402 from .utilities import show_versions, show_options, clear_cache, lscache # noqa: E402 -from .utilities import ArgoNVSReferenceTables, OceanOPSDeployments, ArgoDocs # noqa: E402 +from .utilities import ArgoNVSReferenceTables, ArgoDocs # noqa: E402 from .utilities import monitor_status as status # noqa: E402 from .options import set_options, reset_options # noqa: E402 from .data_fetchers import CTDRefDataFetcher # noqa: E402 from .stores import ArgoIndex # noqa: E402 from .utils import compute # noqa: E402, F401 -from .related import TopoFetcher # noqa: E402 +from .related import TopoFetcher, OceanOPSDeployments # noqa: E402 # __all__ = ( diff --git a/argopy/related/__init__.py b/argopy/related/__init__.py index 58c3812e..f2c85631 100644 --- a/argopy/related/__init__.py +++ b/argopy/related/__init__.py @@ -1,7 +1,9 @@ -from .topo import TopoFetcher +from .topography import TopoFetcher +from .ocean_ops_deployments import OceanOPSDeployments # __all__ = ( # Classes: "TopoFetcher", + "OceanOPSDeployments", ) \ No newline at end of file diff --git a/argopy/related/ocean_ops_deployments.py b/argopy/related/ocean_ops_deployments.py new file mode 100644 index 00000000..a1a2dea6 --- /dev/null +++ b/argopy/related/ocean_ops_deployments.py @@ -0,0 +1,377 @@ +import pandas as pd +import numpy as np +from ..stores import httpstore +from ..errors import DataNotFound +from ..plot import scatter_map + + +class OceanOPSDeployments: + """Use the OceanOPS API for metadata access to retrieve Argo floats deployment information. + + The API is documented here: https://www.ocean-ops.org/api/swagger/?url=https://www.ocean-ops.org/api/1/oceanops-api.yaml + + Description of deployment status name: + + =========== == ==== + Status Id Description + =========== == ==== + PROBABLE 0 Starting status for some platforms, when there is only a few metadata available, like rough deployment location and date. The platform may be deployed + CONFIRMED 1 Automatically set when a ship is attached to the deployment information. The platform is ready to be deployed, deployment is planned + REGISTERED 2 Starting status for most of the networks, when deployment planning is not done. The deployment is certain, and a notification has been sent via the OceanOPS system + OPERATIONAL 6 Automatically set when the platform is emitting a pulse and observations are distributed within a certain time interval + INACTIVE 4 The platform is not emitting a pulse since a certain time + CLOSED 5 The platform is not emitting a pulse since a long time, it is considered as dead + =========== == ==== + + Examples + -------- + + Import the class: + + >>> from argopy.related import OceanOPSDeployments + >>> from argopy import OceanOPSDeployments + + Possibly define the space/time box to work with: + + >>> box = [-20, 0, 42, 51] + >>> box = [-20, 0, 42, 51, '2020-01', '2021-01'] + >>> box = [-180, 180, -90, 90, '2020-01', None] + + Instantiate the metadata fetcher: + + >>> deployment = OceanOPSDeployments() + >>> deployment = OceanOPSDeployments(box) + >>> deployment = OceanOPSDeployments(box, deployed_only=True) # Remove planification + + Load information: + + >>> df = deployment.to_dataframe() + >>> data = deployment.to_json() + + Useful attributes and methods: + + >>> deployment.uri + >>> deployment.uri_decoded + >>> deployment.status_code + >>> fig, ax = deployment.plot_status() + >>> plan_virtualfleet = deployment.plan + + """ + api = "https://www.ocean-ops.org" + """URL to the API""" + + model = "api/1/data/platform" + """This model represents a Platform entity and is used to retrieve a platform information (schema model + named 'Ptf').""" + + api_server_check = 'https://www.ocean-ops.org/api/1/oceanops-api.yaml' + """URL to check if the API is alive""" + + def __init__(self, box: list = None, deployed_only: bool = False): + """ + + Parameters + ---------- + box: list, optional, default=None + Define the domain to load the Argo deployment plan for. By default, **box** is set to None to work with the + global deployment plan starting from the current date. + The list expects one of the following format: + + - [lon_min, lon_max, lat_min, lat_max] + - [lon_min, lon_max, lat_min, lat_max, date_min] + - [lon_min, lon_max, lat_min, lat_max, date_min, date_max] + + Longitude and latitude values must be floats. Dates are strings. + If **box** is provided with a regional domain definition (only 4 values given), then ``date_min`` will be + set to the current date. + + deployed_only: bool, optional, default=False + Return only floats already deployed. If set to False (default), will return the full + deployment plan (floats with all possible status). If set to True, will return only floats with one of the + following status: ``OPERATIONAL``, ``INACTIVE``, and ``CLOSED``. + """ + if box is None: + box = [None, None, None, None, pd.to_datetime('now', utc=True).strftime("%Y-%m-%d"), None] + elif len(box) == 4: + box.append(pd.to_datetime('now', utc=True).strftime("%Y-%m-%d")) + box.append(None) + elif len(box) == 5: + box.append(None) + + if len(box) != 6: + raise ValueError("The 'box' argument must be: None or of lengths 4 or 5 or 6\n%s" % str(box)) + + self.box = box + self.deployed_only = deployed_only + self.data = None + + self.fs = httpstore(cache=False) + + def __format(self, x, typ: str) -> str: + """ string formatting helper """ + if typ == "lon": + return str(x) if x is not None else "-" + elif typ == "lat": + return str(x) if x is not None else "-" + elif typ == "tim": + return pd.to_datetime(x).strftime("%Y-%m-%d") if x is not None else "-" + else: + return str(x) + + def __repr__(self): + summary = [""] + summary.append("API: %s/%s" % (self.api, self.model)) + summary.append("Domain: %s" % self.box_name) + summary.append("Deployed only: %s" % self.deployed_only) + if self.data is not None: + summary.append("Nb of floats in the deployment plan: %s" % self.size) + else: + summary.append("Nb of floats in the deployment plan: - [Data not retrieved yet]") + return '\n'.join(summary) + + def __encode_inc(self, inc): + """Return encoded uri expression for 'include' parameter + + Parameters + ---------- + inc: str + + Returns + ------- + str + """ + return inc.replace("\"", "%22").replace("[", "%5B").replace("]", "%5D") + + def __encode_exp(self, exp): + """Return encoded uri expression for 'exp' parameter + + Parameters + ---------- + exp: str + + Returns + ------- + str + """ + return exp.replace("\"", "%22").replace("'", "%27").replace(" ", "%20").replace(">", "%3E").replace("<", "%3C") + + def __get_uri(self, encoded=False): + uri = "exp=%s&include=%s" % (self.exp(encoded=encoded), self.include(encoded=encoded)) + url = "%s/%s?%s" % (self.api, self.model, uri) + return url + + def include(self, encoded=False): + """Return an Ocean-Ops API 'include' expression + + This is used to determine which variables the API call should return + + Parameters + ---------- + encoded: bool, default=False + + Returns + ------- + str + """ + # inc = ["ref", "ptfDepl.lat", "ptfDepl.lon", "ptfDepl.deplDate", "ptfStatus", "wmos"] + # inc = ["ref", "ptfDepl.lat", "ptfDepl.lon", "ptfDepl.deplDate", "ptfStatus.id", "ptfStatus.name", "wmos"] + # inc = ["ref", "ptfDepl.lat", "ptfDepl.lon", "ptfDepl.deplDate", "ptfStatus.id", "ptfStatus.name"] + inc = ["ref", "ptfDepl.lat", "ptfDepl.lon", "ptfDepl.deplDate", "ptfStatus.id", "ptfStatus.name", + "ptfStatus.description", + "program.nameShort", "program.country.nameShort", "ptfModel.nameShort", "ptfDepl.noSite"] + inc = "[%s]" % ",".join(["\"%s\"" % v for v in inc]) + return inc if not encoded else self.__encode_inc(inc) + + def exp(self, encoded=False): + """Return an Ocean-Ops API deployment search expression for an argopy region box definition + + Parameters + ---------- + encoded: bool, default=False + + Returns + ------- + str + """ + exp, arg = "networkPtfs.network.name='Argo'", [] + if self.box[0] is not None: + exp += " and ptfDepl.lon>=$var%i" % (len(arg) + 1) + arg.append(str(self.box[0])) + if self.box[1] is not None: + exp += " and ptfDepl.lon<=$var%i" % (len(arg) + 1) + arg.append(str(self.box[1])) + if self.box[2] is not None: + exp += " and ptfDepl.lat>=$var%i" % (len(arg) + 1) + arg.append(str(self.box[2])) + if self.box[3] is not None: + exp += " and ptfDepl.lat<=$var%i" % (len(arg) + 1) + arg.append(str(self.box[3])) + if len(self.box) > 4: + if self.box[4] is not None: + exp += " and ptfDepl.deplDate>=$var%i" % (len(arg) + 1) + arg.append("\"%s\"" % pd.to_datetime(self.box[4]).strftime("%Y-%m-%d %H:%M:%S")) + if self.box[5] is not None: + exp += " and ptfDepl.deplDate<=$var%i" % (len(arg) + 1) + arg.append("\"%s\"" % pd.to_datetime(self.box[5]).strftime("%Y-%m-%d %H:%M:%S")) + + if self.deployed_only: + exp += " and ptfStatus>=$var%i" % (len(arg) + 1) + arg.append(str(4)) # Allow for: 4, 5 or 6 + + exp = "[\"%s\", %s]" % (exp, ", ".join(arg)) + return exp if not encoded else self.__encode_exp(exp) + + @property + def size(self): + return len(self.data['data']) if self.data is not None else None + + @property + def status_code(self): + """Return a :class:`pandas.DataFrame` with the definition of status""" + status = {'status_code': [0, 1, 2, 6, 4, 5], + 'status_name': ['PROBABLE', 'CONFIRMED', 'REGISTERED', 'OPERATIONAL', 'INACTIVE', 'CLOSED'], + 'description': [ + 'Starting status for some platforms, when there is only a few metadata available, like rough deployment location and date. The platform may be deployed', + 'Automatically set when a ship is attached to the deployment information. The platform is ready to be deployed, deployment is planned', + 'Starting status for most of the networks, when deployment planning is not done. The deployment is certain, and a notification has been sent via the OceanOPS system', + 'Automatically set when the platform is emitting a pulse and observations are distributed within a certain time interval', + 'The platform is not emitting a pulse since a certain time', + 'The platform is not emitting a pulse since a long time, it is considered as dead', + ], + } + return pd.DataFrame(status).set_index('status_code') + + @property + def box_name(self): + """Return a string to print the box property""" + BOX = self.box + cname = ("[lon=%s/%s; lat=%s/%s]") % ( + self.__format(BOX[0], "lon"), + self.__format(BOX[1], "lon"), + self.__format(BOX[2], "lat"), + self.__format(BOX[3], "lat"), + ) + if len(BOX) == 6: + cname = ("[lon=%s/%s; lat=%s/%s; t=%s/%s]") % ( + self.__format(BOX[0], "lon"), + self.__format(BOX[1], "lon"), + self.__format(BOX[2], "lat"), + self.__format(BOX[3], "lat"), + self.__format(BOX[4], "tim"), + self.__format(BOX[5], "tim"), + ) + return cname + + @property + def uri(self): + """Return encoded URL to post an Ocean-Ops API request + + Returns + ------- + str + """ + return self.__get_uri(encoded=True) + + @property + def uri_decoded(self): + """Return decoded URL to post an Ocean-Ops API request + + Returns + ------- + str + """ + return self.__get_uri(encoded=False) + + @property + def plan(self): + """Return a dictionary to be used as argument in a :class:`virtualargofleet.VirtualFleet` + + This method is for dev, but will be moved to the VirtualFleet software utilities + """ + df = self.to_dataframe() + plan = df[['lon', 'lat', 'date']].rename(columns={"date": "time"}).to_dict('series') + for key in plan.keys(): + plan[key] = plan[key].to_list() + plan['time'] = np.array(plan['time'], dtype='datetime64') + return plan + + def to_json(self): + """Return OceanOPS API request response as a json object""" + if self.data is None: + self.data = self.fs.open_json(self.uri) + return self.data + + def to_dataframe(self): + """Return the deployment plan as :class:`pandas.DataFrame` + + Returns + ------- + :class:`pandas.DataFrame` + """ + data = self.to_json() + if data['total'] == 0: + raise DataNotFound('Your search matches no results') + + # res = {'date': [], 'lat': [], 'lon': [], 'wmo': [], 'status_name': [], 'status_code': []} + # res = {'date': [], 'lat': [], 'lon': [], 'wmo': [], 'status_name': [], 'status_code': [], 'ship_name': []} + res = {'date': [], 'lat': [], 'lon': [], 'wmo': [], 'status_name': [], 'status_code': [], 'program': [], + 'country': [], 'model': []} + # status = {'REGISTERED': None, 'OPERATIONAL': None, 'INACTIVE': None, 'CLOSED': None, + # 'CONFIRMED': None, 'OPERATIONAL': None, 'PROBABLE': None, 'REGISTERED': None} + + for irow, ptf in enumerate(data['data']): + # if irow == 0: + # print(ptf) + res['lat'].append(ptf['ptfDepl']['lat']) + res['lon'].append(ptf['ptfDepl']['lon']) + res['date'].append(ptf['ptfDepl']['deplDate']) + res['wmo'].append(ptf['ref']) + # res['wmo'].append(ptf['wmos'][-1]['wmo']) + # res['wmo'].append(float_wmo(ptf['ref'])) # will not work for some CONFIRMED, PROBABLE or REGISTERED floats + # res['wmo'].append(float_wmo(ptf['wmos'][-1]['wmo'])) + res['status_code'].append(ptf['ptfStatus']['id']) + res['status_name'].append(ptf['ptfStatus']['name']) + + # res['ship_name'].append(ptf['ptfDepl']['shipName']) + program = ptf['program']['nameShort'].replace("_", " ") if ptf['program']['nameShort'] else ptf['program'][ + 'nameShort'] + res['program'].append(program) + res['country'].append(ptf['program']['country']['nameShort']) + res['model'].append(ptf['ptfModel']['nameShort']) + + # if status[ptf['ptfStatus']['name']] is None: + # status[ptf['ptfStatus']['name']] = ptf['ptfStatus']['description'] + + df = pd.DataFrame(res) + df = df.astype({'date': 'datetime64[s]'}) + df = df.sort_values(by='date').reset_index(drop=True) + # df = df[ (df['status_name'] == 'CLOSED') | (df['status_name'] == 'OPERATIONAL')] # Select only floats that have been deployed and returned data + # print(status) + return df + + def plot_status(self, + **kwargs + ): + """Quick plot of the deployment plan + + Named arguments are passed to :class:`plot.scatter_map` + + Returns + ------- + fig: :class:`matplotlib.figure.Figure` + ax: :class:`matplotlib.axes.Axes` + """ + df = self.to_dataframe() + fig, ax = scatter_map(df, + x='lon', + y='lat', + hue='status_code', + traj=False, + cmap='deployment_status', + **kwargs) + ax.set_title("Argo network deployment plan\n%s\nSource: OceanOPS API as of %s" + % (self.box_name, pd.to_datetime('now', utc=True).strftime("%Y-%m-%d %H:%M:%S")), + fontsize=12 + ) + return fig, ax + diff --git a/argopy/related/topo.py b/argopy/related/topography.py similarity index 100% rename from argopy/related/topo.py rename to argopy/related/topography.py diff --git a/argopy/utilities.py b/argopy/utilities.py index 9ff569f9..fc6c8e37 100644 --- a/argopy/utilities.py +++ b/argopy/utilities.py @@ -2941,378 +2941,6 @@ def all_tbl_name(self): return all_tables -class OceanOPSDeployments: - """Use the OceanOPS API for metadata access to retrieve Argo floats deployment information. - - The API is documented here: https://www.ocean-ops.org/api/swagger/?url=https://www.ocean-ops.org/api/1/oceanops-api.yaml - - Description of deployment status name: - - =========== == ==== - Status Id Description - =========== == ==== - PROBABLE 0 Starting status for some platforms, when there is only a few metadata available, like rough deployment location and date. The platform may be deployed - CONFIRMED 1 Automatically set when a ship is attached to the deployment information. The platform is ready to be deployed, deployment is planned - REGISTERED 2 Starting status for most of the networks, when deployment planning is not done. The deployment is certain, and a notification has been sent via the OceanOPS system - OPERATIONAL 6 Automatically set when the platform is emitting a pulse and observations are distributed within a certain time interval - INACTIVE 4 The platform is not emitting a pulse since a certain time - CLOSED 5 The platform is not emitting a pulse since a long time, it is considered as dead - =========== == ==== - - Examples - -------- - - Import the utility class: - - >>> from argopy.utilities import OceanOPSDeployments - >>> from argopy import OceanOPSDeployments - - Possibly define the space/time box to work with: - - >>> box = [-20, 0, 42, 51] - >>> box = [-20, 0, 42, 51, '2020-01', '2021-01'] - >>> box = [-180, 180, -90, 90, '2020-01', None] - - Instantiate the metadata fetcher: - - >>> deployment = OceanOPSDeployments() - >>> deployment = OceanOPSDeployments(box) - >>> deployment = OceanOPSDeployments(box, deployed_only=True) # Remove planification - - Load information: - - >>> df = deployment.to_dataframe() - >>> data = deployment.to_json() - - Useful attributes and methods: - - >>> deployment.uri - >>> deployment.uri_decoded - >>> deployment.status_code - >>> fig, ax = deployment.plot_status() - >>> plan_virtualfleet = deployment.plan - - """ - api = "https://www.ocean-ops.org" - """URL to the API""" - - model = "api/1/data/platform" - """This model represents a Platform entity and is used to retrieve a platform information (schema model - named 'Ptf').""" - - api_server_check = 'https://www.ocean-ops.org/api/1/oceanops-api.yaml' - """URL to check if the API is alive""" - - def __init__(self, box: list = None, deployed_only: bool = False): - """ - - Parameters - ---------- - box: list, optional, default=None - Define the domain to load the Argo deployment plan for. By default, **box** is set to None to work with the - global deployment plan starting from the current date. - The list expects one of the following format: - - - [lon_min, lon_max, lat_min, lat_max] - - [lon_min, lon_max, lat_min, lat_max, date_min] - - [lon_min, lon_max, lat_min, lat_max, date_min, date_max] - - Longitude and latitude values must be floats. Dates are strings. - If **box** is provided with a regional domain definition (only 4 values given), then ``date_min`` will be - set to the current date. - - deployed_only: bool, optional, default=False - Return only floats already deployed. If set to False (default), will return the full - deployment plan (floats with all possible status). If set to True, will return only floats with one of the - following status: ``OPERATIONAL``, ``INACTIVE``, and ``CLOSED``. - """ - if box is None: - box = [None, None, None, None, pd.to_datetime('now', utc=True).strftime("%Y-%m-%d"), None] - elif len(box) == 4: - box.append(pd.to_datetime('now', utc=True).strftime("%Y-%m-%d")) - box.append(None) - elif len(box) == 5: - box.append(None) - - if len(box) != 6: - raise ValueError("The 'box' argument must be: None or of lengths 4 or 5 or 6\n%s" % str(box)) - - self.box = box - self.deployed_only = deployed_only - self.data = None - - from .stores import httpstore - self.fs = httpstore(cache=False) - - def __format(self, x, typ: str) -> str: - """ string formatting helper """ - if typ == "lon": - return str(x) if x is not None else "-" - elif typ == "lat": - return str(x) if x is not None else "-" - elif typ == "tim": - return pd.to_datetime(x).strftime("%Y-%m-%d") if x is not None else "-" - else: - return str(x) - - def __repr__(self): - summary = [""] - summary.append("API: %s/%s" % (self.api, self.model)) - summary.append("Domain: %s" % self.box_name) - summary.append("Deployed only: %s" % self.deployed_only) - if self.data is not None: - summary.append("Nb of floats in the deployment plan: %s" % self.size) - else: - summary.append("Nb of floats in the deployment plan: - [Data not retrieved yet]") - return '\n'.join(summary) - - def __encode_inc(self, inc): - """Return encoded uri expression for 'include' parameter - - Parameters - ---------- - inc: str - - Returns - ------- - str - """ - return inc.replace("\"", "%22").replace("[", "%5B").replace("]", "%5D") - - def __encode_exp(self, exp): - """Return encoded uri expression for 'exp' parameter - - Parameters - ---------- - exp: str - - Returns - ------- - str - """ - return exp.replace("\"", "%22").replace("'", "%27").replace(" ", "%20").replace(">", "%3E").replace("<", "%3C") - - def __get_uri(self, encoded=False): - uri = "exp=%s&include=%s" % (self.exp(encoded=encoded), self.include(encoded=encoded)) - url = "%s/%s?%s" % (self.api, self.model, uri) - return url - - def include(self, encoded=False): - """Return an Ocean-Ops API 'include' expression - - This is used to determine which variables the API call should return - - Parameters - ---------- - encoded: bool, default=False - - Returns - ------- - str - """ - # inc = ["ref", "ptfDepl.lat", "ptfDepl.lon", "ptfDepl.deplDate", "ptfStatus", "wmos"] - # inc = ["ref", "ptfDepl.lat", "ptfDepl.lon", "ptfDepl.deplDate", "ptfStatus.id", "ptfStatus.name", "wmos"] - # inc = ["ref", "ptfDepl.lat", "ptfDepl.lon", "ptfDepl.deplDate", "ptfStatus.id", "ptfStatus.name"] - inc = ["ref", "ptfDepl.lat", "ptfDepl.lon", "ptfDepl.deplDate", "ptfStatus.id", "ptfStatus.name", - "ptfStatus.description", - "program.nameShort", "program.country.nameShort", "ptfModel.nameShort", "ptfDepl.noSite"] - inc = "[%s]" % ",".join(["\"%s\"" % v for v in inc]) - return inc if not encoded else self.__encode_inc(inc) - - def exp(self, encoded=False): - """Return an Ocean-Ops API deployment search expression for an argopy region box definition - - Parameters - ---------- - encoded: bool, default=False - - Returns - ------- - str - """ - exp, arg = "networkPtfs.network.name='Argo'", [] - if self.box[0] is not None: - exp += " and ptfDepl.lon>=$var%i" % (len(arg) + 1) - arg.append(str(self.box[0])) - if self.box[1] is not None: - exp += " and ptfDepl.lon<=$var%i" % (len(arg) + 1) - arg.append(str(self.box[1])) - if self.box[2] is not None: - exp += " and ptfDepl.lat>=$var%i" % (len(arg) + 1) - arg.append(str(self.box[2])) - if self.box[3] is not None: - exp += " and ptfDepl.lat<=$var%i" % (len(arg) + 1) - arg.append(str(self.box[3])) - if len(self.box) > 4: - if self.box[4] is not None: - exp += " and ptfDepl.deplDate>=$var%i" % (len(arg) + 1) - arg.append("\"%s\"" % pd.to_datetime(self.box[4]).strftime("%Y-%m-%d %H:%M:%S")) - if self.box[5] is not None: - exp += " and ptfDepl.deplDate<=$var%i" % (len(arg) + 1) - arg.append("\"%s\"" % pd.to_datetime(self.box[5]).strftime("%Y-%m-%d %H:%M:%S")) - - if self.deployed_only: - exp += " and ptfStatus>=$var%i" % (len(arg) + 1) - arg.append(str(4)) # Allow for: 4, 5 or 6 - - exp = "[\"%s\", %s]" % (exp, ", ".join(arg)) - return exp if not encoded else self.__encode_exp(exp) - - @property - def size(self): - return len(self.data['data']) if self.data is not None else None - - @property - def status_code(self): - """Return a :class:`pandas.DataFrame` with the definition of status""" - status = {'status_code': [0, 1, 2, 6, 4, 5], - 'status_name': ['PROBABLE', 'CONFIRMED', 'REGISTERED', 'OPERATIONAL', 'INACTIVE', 'CLOSED'], - 'description': [ - 'Starting status for some platforms, when there is only a few metadata available, like rough deployment location and date. The platform may be deployed', - 'Automatically set when a ship is attached to the deployment information. The platform is ready to be deployed, deployment is planned', - 'Starting status for most of the networks, when deployment planning is not done. The deployment is certain, and a notification has been sent via the OceanOPS system', - 'Automatically set when the platform is emitting a pulse and observations are distributed within a certain time interval', - 'The platform is not emitting a pulse since a certain time', - 'The platform is not emitting a pulse since a long time, it is considered as dead', - ], - } - return pd.DataFrame(status).set_index('status_code') - - @property - def box_name(self): - """Return a string to print the box property""" - BOX = self.box - cname = ("[lon=%s/%s; lat=%s/%s]") % ( - self.__format(BOX[0], "lon"), - self.__format(BOX[1], "lon"), - self.__format(BOX[2], "lat"), - self.__format(BOX[3], "lat"), - ) - if len(BOX) == 6: - cname = ("[lon=%s/%s; lat=%s/%s; t=%s/%s]") % ( - self.__format(BOX[0], "lon"), - self.__format(BOX[1], "lon"), - self.__format(BOX[2], "lat"), - self.__format(BOX[3], "lat"), - self.__format(BOX[4], "tim"), - self.__format(BOX[5], "tim"), - ) - return cname - - @property - def uri(self): - """Return encoded URL to post an Ocean-Ops API request - - Returns - ------- - str - """ - return self.__get_uri(encoded=True) - - @property - def uri_decoded(self): - """Return decoded URL to post an Ocean-Ops API request - - Returns - ------- - str - """ - return self.__get_uri(encoded=False) - - @property - def plan(self): - """Return a dictionary to be used as argument in a :class:`virtualargofleet.VirtualFleet` - - This method is for dev, but will be moved to the VirtualFleet software utilities - """ - df = self.to_dataframe() - plan = df[['lon', 'lat', 'date']].rename(columns={"date": "time"}).to_dict('series') - for key in plan.keys(): - plan[key] = plan[key].to_list() - plan['time'] = np.array(plan['time'], dtype='datetime64') - return plan - - def to_json(self): - """Return OceanOPS API request response as a json object""" - if self.data is None: - self.data = self.fs.open_json(self.uri) - return self.data - - def to_dataframe(self): - """Return the deployment plan as :class:`pandas.DataFrame` - - Returns - ------- - :class:`pandas.DataFrame` - """ - data = self.to_json() - if data['total'] == 0: - raise DataNotFound('Your search matches no results') - - # res = {'date': [], 'lat': [], 'lon': [], 'wmo': [], 'status_name': [], 'status_code': []} - # res = {'date': [], 'lat': [], 'lon': [], 'wmo': [], 'status_name': [], 'status_code': [], 'ship_name': []} - res = {'date': [], 'lat': [], 'lon': [], 'wmo': [], 'status_name': [], 'status_code': [], 'program': [], - 'country': [], 'model': []} - # status = {'REGISTERED': None, 'OPERATIONAL': None, 'INACTIVE': None, 'CLOSED': None, - # 'CONFIRMED': None, 'OPERATIONAL': None, 'PROBABLE': None, 'REGISTERED': None} - - for irow, ptf in enumerate(data['data']): - # if irow == 0: - # print(ptf) - res['lat'].append(ptf['ptfDepl']['lat']) - res['lon'].append(ptf['ptfDepl']['lon']) - res['date'].append(ptf['ptfDepl']['deplDate']) - res['wmo'].append(ptf['ref']) - # res['wmo'].append(ptf['wmos'][-1]['wmo']) - # res['wmo'].append(float_wmo(ptf['ref'])) # will not work for some CONFIRMED, PROBABLE or REGISTERED floats - # res['wmo'].append(float_wmo(ptf['wmos'][-1]['wmo'])) - res['status_code'].append(ptf['ptfStatus']['id']) - res['status_name'].append(ptf['ptfStatus']['name']) - - # res['ship_name'].append(ptf['ptfDepl']['shipName']) - program = ptf['program']['nameShort'].replace("_", " ") if ptf['program']['nameShort'] else ptf['program'][ - 'nameShort'] - res['program'].append(program) - res['country'].append(ptf['program']['country']['nameShort']) - res['model'].append(ptf['ptfModel']['nameShort']) - - # if status[ptf['ptfStatus']['name']] is None: - # status[ptf['ptfStatus']['name']] = ptf['ptfStatus']['description'] - - df = pd.DataFrame(res) - df = df.astype({'date': 'datetime64[s]'}) - df = df.sort_values(by='date').reset_index(drop=True) - # df = df[ (df['status_name'] == 'CLOSED') | (df['status_name'] == 'OPERATIONAL')] # Select only floats that have been deployed and returned data - # print(status) - return df - - def plot_status(self, - **kwargs - ): - """Quick plot of the deployment plan - - Named arguments are passed to :class:`plot.scatter_map` - - Returns - ------- - fig: :class:`matplotlib.figure.Figure` - ax: :class:`matplotlib.axes.Axes` - """ - from .plot.plot import scatter_map - df = self.to_dataframe() - fig, ax = scatter_map(df, - x='lon', - y='lat', - hue='status_code', - traj=False, - cmap='deployment_status', - **kwargs) - ax.set_title("Argo network deployment plan\n%s\nSource: OceanOPS API as of %s" - % (self.box_name, pd.to_datetime('now', utc=True).strftime("%Y-%m-%d %H:%M:%S")), - fontsize=12 - ) - return fig, ax - @deprecated def cast_types(ds): # noqa: C901 From 3d3cff9f74583503dfaaa00358e617adbf7fdc06 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Wed, 6 Sep 2023 16:15:44 +0200 Subject: [PATCH 04/33] [skip-ci] --- argopy/__init__.py | 4 +- argopy/related/__init__.py | 4 + argopy/related/argo_documentation.py | 348 ++++++++++++++ argopy/related/ocean_ops_deployments.py | 193 +++++--- argopy/related/reference_tables.py | 245 ++++++++++ argopy/related/topography.py | 30 +- argopy/utilities.py | 583 ------------------------ 7 files changed, 744 insertions(+), 663 deletions(-) create mode 100644 argopy/related/argo_documentation.py create mode 100644 argopy/related/reference_tables.py diff --git a/argopy/__init__.py b/argopy/__init__.py index 8c8f6cf7..8f2c8392 100644 --- a/argopy/__init__.py +++ b/argopy/__init__.py @@ -35,13 +35,13 @@ from . import plot # noqa: E402 from .plot import dashboard, ArgoColors # noqa: E402 from .utilities import show_versions, show_options, clear_cache, lscache # noqa: E402 -from .utilities import ArgoNVSReferenceTables, ArgoDocs # noqa: E402 +from .utilities import ArgoDocs # noqa: E402 from .utilities import monitor_status as status # noqa: E402 from .options import set_options, reset_options # noqa: E402 from .data_fetchers import CTDRefDataFetcher # noqa: E402 from .stores import ArgoIndex # noqa: E402 from .utils import compute # noqa: E402, F401 -from .related import TopoFetcher, OceanOPSDeployments # noqa: E402 +from .related import TopoFetcher, OceanOPSDeployments, ArgoNVSReferenceTables # noqa: E402 # __all__ = ( diff --git a/argopy/related/__init__.py b/argopy/related/__init__.py index f2c85631..bc82a46e 100644 --- a/argopy/related/__init__.py +++ b/argopy/related/__init__.py @@ -1,9 +1,13 @@ from .topography import TopoFetcher from .ocean_ops_deployments import OceanOPSDeployments +from .reference_tables import ArgoNVSReferenceTables +from .argo_documentation import ArgoDocs # __all__ = ( # Classes: "TopoFetcher", "OceanOPSDeployments", + "ArgoNVSReferenceTables", + "ArgoDocs", ) \ No newline at end of file diff --git a/argopy/related/argo_documentation.py b/argopy/related/argo_documentation.py new file mode 100644 index 00000000..0ed70a39 --- /dev/null +++ b/argopy/related/argo_documentation.py @@ -0,0 +1,348 @@ +import pandas as pd +from functools import lru_cache +from ..stores import httpstore +from ..options import OPTIONS + + +class ArgoDocs: + """ADMT documentation helper class + + Examples + -------- + >>> ArgoDocs().list + >>> ArgoDocs().search("CDOM") + >>> ArgoDocs().search("CDOM", where='abstract') + + >>> ArgoDocs(35385) + >>> ArgoDocs(35385).ris + >>> ArgoDocs(35385).abstract + >>> ArgoDocs(35385).show() + >>> ArgoDocs(35385).open_pdf() + >>> ArgoDocs(35385).open_pdf(page=12) + + """ + _catalogue = [ + { + "category": "Argo data formats", + "title": "Argo user's manual", + "doi": "10.13155/29825", + "id": 29825 + }, + { + "category": "Quality control", + "title": "Argo Quality Control Manual for CTD and Trajectory Data", + "doi": "10.13155/33951", + "id": 33951 + }, + { + "category": "Quality control", + "title": "Argo quality control manual for dissolved oxygen concentration", + "doi": "10.13155/46542", + "id": 46542 + }, + { + "category": "Quality control", + "title": "Argo quality control manual for biogeochemical data", + "doi": "10.13155/40879", + "id": 40879 + }, + { + "category": "Quality control", + "title": "BGC-Argo quality control manual for the Chlorophyll-A concentration", + "doi": "10.13155/35385", + "id": 35385 + }, + { + "category": "Quality control", + "title": "BGC-Argo quality control manual for nitrate concentration", + "doi": "10.13155/84370", + "id": 84370 + }, + { + "category": "Quality control", + "title": "Quality control for BGC-Argo radiometry", + "doi": "10.13155/62466", + "id": 62466 + }, + { + "category": "Cookbooks", + "title": "Argo DAC profile cookbook", + "doi": "10.13155/41151", + "id": 41151 + }, + { + "category": "Cookbooks", + "title": "Argo DAC trajectory cookbook", + "doi": "10.13155/29824", + "id": 29824 + }, + { + "category": "Cookbooks", + "title": "DMQC Cookbook for Core Argo parameters", + "doi": "10.13155/78994", + "id": 78994 + }, + { + "category": "Cookbooks", + "title": "Processing Argo oxygen data at the DAC level", + "doi": "10.13155/39795", + "id": 39795 + }, + { + "category": "Cookbooks", + "title": "Processing Bio-Argo particle backscattering at the DAC level", + "doi": "10.13155/39459", + "id": 39459 + }, + { + "category": "Cookbooks", + "title": "Processing BGC-Argo chlorophyll-A concentration at the DAC level", + "doi": "10.13155/39468", + "id": 39468 + }, + { + "category": "Cookbooks", + "title": "Processing Argo measurement timing information at the DAC level", + "doi": "10.13155/47998", + "id": 47998 + }, + { + "category": "Cookbooks", + "title": "Processing BGC-Argo CDOM concentration at the DAC level", + "doi": "10.13155/54541", + "id": 54541 + }, + { + "category": "Cookbooks", + "title": "Processing Bio-Argo nitrate concentration at the DAC Level", + "doi": "10.13155/46121", + "id": 46121 + }, + { + "category": "Cookbooks", + "title": "Processing BGC-Argo Radiometric data at the DAC level", + "doi": "10.13155/51541", + "id": 51541 + }, + { + "category": "Cookbooks", + "title": "Processing BGC-Argo pH data at the DAC level", + "doi": "10.13155/57195", + "id": 57195 + }, + { + "category": "Cookbooks", + "title": "Description of the Argo GDAC File Checks: Data Format and Consistency Checks", + "doi": "10.13155/46120", + "id": 46120 + }, + { + "category": "Cookbooks", + "title": "Description of the Argo GDAC File Merge Process", + "doi": "10.13155/52154", + "id": 52154 + }, + { + "category": "Cookbooks", + "title": "BGC-Argo synthetic profile file processing and format on Coriolis GDAC", + "doi": "10.13155/55637", + "id": 55637 + }, + { + "category": "Cookbooks", + "title": "Argo GDAC cookbook", + "doi": "10.13155/46202", + "id": 46202 + } + ] + + class RIS: + """RIS file structure from TXT file""" + + def __init__(self, file=None, fs=None): + self.record = None + self.fs = fs + if file: + self.parse(file) + + def parse(self, file): + """Parse input file""" + # log.debug(file) + + with self.fs.open(file, 'r', encoding="utf-8") as f: + TXTlines = f.readlines() + lines = [] + # Eliminate blank lines + for line in TXTlines: + line = line.strip() + if len(line) > 0: + lines.append(line) + TXTlines = lines + + # + record = {} + for line in TXTlines: + # print("\n>", line) + if len(line) > 2: + if line[2] == " ": + tag = line[0:2] + field = line[3:] + # print("ok", {tag: field}) + record[tag] = [field] + else: + # print("-", line) + record[tag].append(line) + elif len(line) == 2: + record[line] = [] + # else: + # print("*", line) + + for key in record.keys(): + record[key] = "; ".join(record[key]) + + self.record = record + + @lru_cache + def __init__(self, docid=None, cache=False): + self.docid = None + self._ris = None + self._risfile = None + self._fs = httpstore(cache=cache, cachedir=OPTIONS['cachedir']) + self._doiserver = "https://dx.doi.org" + self._archimer = "https://archimer.ifremer.fr" + + if isinstance(docid, int): + if docid in [doc['id'] for doc in self._catalogue]: + self.docid = docid + else: + raise ValueError("Unknown document id") + elif isinstance(docid, str): + start_with = lambda f, x: f[0:len(x)] == x if len(x) <= len(f) else False # noqa: E731 + if start_with(docid, '10.13155/') and docid in [doc['doi'] for doc in self._catalogue]: + self.docid = [doc['id'] for doc in self._catalogue if docid == doc['doi']][0] + else: + raise ValueError("'docid' must be an integer or a valid Argo DOI") + + def __repr__(self): + summary = [""] + if self.docid is not None: + doc = [doc for doc in self._catalogue if doc['id'] == self.docid][0] + summary.append("Title: %s" % doc['title']) + summary.append("DOI: %s" % doc['doi']) + summary.append("url: https://dx.doi.org/%s" % doc['doi']) + summary.append("last pdf: %s" % self.pdf) + if 'AF' in self.ris: + summary.append("Authors: %s" % self.ris['AF']) + summary.append("Abstract: %s" % self.ris['AB']) + else: + summary.append("- %i documents with a DOI are available in the catalogue" % len(self._catalogue)) + summary.append("- Use the method 'search' to find a document id") + summary.append("- Use the property 'list' to check out the catalogue") + return "\n".join(summary) + + @property + def list(self): + """List of all available documents as a :class:`pandas.DataFrame`""" + return pd.DataFrame(self._catalogue) + + @property + def js(self): + """Internal json record for a document""" + if self.docid is not None: + return [doc for doc in self._catalogue if doc['id'] == self.docid][0] + else: + raise ValueError("Select a document first !") + + @property + def ris(self): + """RIS record of a document""" + if self.docid is not None: + if self._ris is None: + # Fetch RIS metadata for this document: + import re + file = self._fs.download_url("%s/%s" % (self._doiserver, self.js['doi'])) + x = re.search(r']*)rel="nofollow">TXT<\/a>', + str(file)) + export_txt_url = x[1].replace("https://archimer.ifremer.fr", self._archimer) + self._risfile = export_txt_url + self._ris = self.RIS(export_txt_url, fs=self._fs).record + return self._ris + else: + raise ValueError("Select a document first !") + + @property + def abstract(self): + """Abstract of a document""" + if self.docid is not None: + return self.ris['AB'] + else: + raise ValueError("Select a document first !") + + @property + def pdf(self): + """Link to the online pdf version of a document""" + if self.docid is not None: + return self.ris['UR'] + else: + raise ValueError("Select a document first !") + + def show(self, height=800): + """Insert document in pdf in a notebook cell + + Parameters + ---------- + height: int + Height in pixels of the cell + """ + if self.docid is not None: + from IPython.core.display import HTML + return HTML( + '' % (self.ris['UR'], height)) + else: + raise ValueError("Select a document first !") + + def open_pdf(self, page=None, url_only=False): + """Open document in new browser tab + + Parameters + ---------- + page: int, optional + Open directly a specific page number + """ + url = self.pdf + url += '#view=FitV&pagemode=thumbs' + if page: + url += '&page=%i' % page + if self.docid is not None: + if not url_only: + import webbrowser + webbrowser.open_new(url) + else: + return url + else: + raise ValueError("Select a document first !") + + def search(self, txt, where='title'): + """Search for string in all documents title or abstract + + Parameters + ---------- + txt: str + where: str, default='title' + Where to search, can be 'title' or 'abstract' + + Returns + ------- + list + + """ + results = [] + for doc in self.list.iterrows(): + docid = doc[1]['id'] + if where == 'title': + if txt.lower() in ArgoDocs(docid).js['title'].lower(): + results.append(docid) + elif where == 'abstract': + if txt.lower() in ArgoDocs(docid).abstract.lower(): + results.append(docid) + return results diff --git a/argopy/related/ocean_ops_deployments.py b/argopy/related/ocean_ops_deployments.py index a1a2dea6..3350ebd6 100644 --- a/argopy/related/ocean_ops_deployments.py +++ b/argopy/related/ocean_ops_deployments.py @@ -57,6 +57,7 @@ class OceanOPSDeployments: >>> plan_virtualfleet = deployment.plan """ + api = "https://www.ocean-ops.org" """URL to the API""" @@ -64,7 +65,7 @@ class OceanOPSDeployments: """This model represents a Platform entity and is used to retrieve a platform information (schema model named 'Ptf').""" - api_server_check = 'https://www.ocean-ops.org/api/1/oceanops-api.yaml' + api_server_check = "https://www.ocean-ops.org/api/1/oceanops-api.yaml" """URL to check if the API is alive""" def __init__(self, box: list = None, deployed_only: bool = False): @@ -91,15 +92,25 @@ def __init__(self, box: list = None, deployed_only: bool = False): following status: ``OPERATIONAL``, ``INACTIVE``, and ``CLOSED``. """ if box is None: - box = [None, None, None, None, pd.to_datetime('now', utc=True).strftime("%Y-%m-%d"), None] + box = [ + None, + None, + None, + None, + pd.to_datetime("now", utc=True).strftime("%Y-%m-%d"), + None, + ] elif len(box) == 4: - box.append(pd.to_datetime('now', utc=True).strftime("%Y-%m-%d")) + box.append(pd.to_datetime("now", utc=True).strftime("%Y-%m-%d")) box.append(None) elif len(box) == 5: box.append(None) if len(box) != 6: - raise ValueError("The 'box' argument must be: None or of lengths 4 or 5 or 6\n%s" % str(box)) + raise ValueError( + "The 'box' argument must be: None or of lengths 4 or 5 or 6\n%s" + % str(box) + ) self.box = box self.deployed_only = deployed_only @@ -108,7 +119,7 @@ def __init__(self, box: list = None, deployed_only: bool = False): self.fs = httpstore(cache=False) def __format(self, x, typ: str) -> str: - """ string formatting helper """ + """string formatting helper""" if typ == "lon": return str(x) if x is not None else "-" elif typ == "lat": @@ -126,8 +137,10 @@ def __repr__(self): if self.data is not None: summary.append("Nb of floats in the deployment plan: %s" % self.size) else: - summary.append("Nb of floats in the deployment plan: - [Data not retrieved yet]") - return '\n'.join(summary) + summary.append( + "Nb of floats in the deployment plan: - [Data not retrieved yet]" + ) + return "\n".join(summary) def __encode_inc(self, inc): """Return encoded uri expression for 'include' parameter @@ -140,7 +153,7 @@ def __encode_inc(self, inc): ------- str """ - return inc.replace("\"", "%22").replace("[", "%5B").replace("]", "%5D") + return inc.replace('"', "%22").replace("[", "%5B").replace("]", "%5D") def __encode_exp(self, exp): """Return encoded uri expression for 'exp' parameter @@ -153,10 +166,19 @@ def __encode_exp(self, exp): ------- str """ - return exp.replace("\"", "%22").replace("'", "%27").replace(" ", "%20").replace(">", "%3E").replace("<", "%3C") + return ( + exp.replace('"', "%22") + .replace("'", "%27") + .replace(" ", "%20") + .replace(">", "%3E") + .replace("<", "%3C") + ) def __get_uri(self, encoded=False): - uri = "exp=%s&include=%s" % (self.exp(encoded=encoded), self.include(encoded=encoded)) + uri = "exp=%s&include=%s" % ( + self.exp(encoded=encoded), + self.include(encoded=encoded), + ) url = "%s/%s?%s" % (self.api, self.model, uri) return url @@ -176,10 +198,20 @@ def include(self, encoded=False): # inc = ["ref", "ptfDepl.lat", "ptfDepl.lon", "ptfDepl.deplDate", "ptfStatus", "wmos"] # inc = ["ref", "ptfDepl.lat", "ptfDepl.lon", "ptfDepl.deplDate", "ptfStatus.id", "ptfStatus.name", "wmos"] # inc = ["ref", "ptfDepl.lat", "ptfDepl.lon", "ptfDepl.deplDate", "ptfStatus.id", "ptfStatus.name"] - inc = ["ref", "ptfDepl.lat", "ptfDepl.lon", "ptfDepl.deplDate", "ptfStatus.id", "ptfStatus.name", - "ptfStatus.description", - "program.nameShort", "program.country.nameShort", "ptfModel.nameShort", "ptfDepl.noSite"] - inc = "[%s]" % ",".join(["\"%s\"" % v for v in inc]) + inc = [ + "ref", + "ptfDepl.lat", + "ptfDepl.lon", + "ptfDepl.deplDate", + "ptfStatus.id", + "ptfStatus.name", + "ptfStatus.description", + "program.nameShort", + "program.country.nameShort", + "ptfModel.nameShort", + "ptfDepl.noSite", + ] + inc = "[%s]" % ",".join(['"%s"' % v for v in inc]) return inc if not encoded else self.__encode_inc(inc) def exp(self, encoded=False): @@ -209,37 +241,49 @@ def exp(self, encoded=False): if len(self.box) > 4: if self.box[4] is not None: exp += " and ptfDepl.deplDate>=$var%i" % (len(arg) + 1) - arg.append("\"%s\"" % pd.to_datetime(self.box[4]).strftime("%Y-%m-%d %H:%M:%S")) + arg.append( + '"%s"' % pd.to_datetime(self.box[4]).strftime("%Y-%m-%d %H:%M:%S") + ) if self.box[5] is not None: exp += " and ptfDepl.deplDate<=$var%i" % (len(arg) + 1) - arg.append("\"%s\"" % pd.to_datetime(self.box[5]).strftime("%Y-%m-%d %H:%M:%S")) + arg.append( + '"%s"' % pd.to_datetime(self.box[5]).strftime("%Y-%m-%d %H:%M:%S") + ) if self.deployed_only: exp += " and ptfStatus>=$var%i" % (len(arg) + 1) arg.append(str(4)) # Allow for: 4, 5 or 6 - exp = "[\"%s\", %s]" % (exp, ", ".join(arg)) + exp = '["%s", %s]' % (exp, ", ".join(arg)) return exp if not encoded else self.__encode_exp(exp) @property def size(self): - return len(self.data['data']) if self.data is not None else None + return len(self.data["data"]) if self.data is not None else None @property def status_code(self): """Return a :class:`pandas.DataFrame` with the definition of status""" - status = {'status_code': [0, 1, 2, 6, 4, 5], - 'status_name': ['PROBABLE', 'CONFIRMED', 'REGISTERED', 'OPERATIONAL', 'INACTIVE', 'CLOSED'], - 'description': [ - 'Starting status for some platforms, when there is only a few metadata available, like rough deployment location and date. The platform may be deployed', - 'Automatically set when a ship is attached to the deployment information. The platform is ready to be deployed, deployment is planned', - 'Starting status for most of the networks, when deployment planning is not done. The deployment is certain, and a notification has been sent via the OceanOPS system', - 'Automatically set when the platform is emitting a pulse and observations are distributed within a certain time interval', - 'The platform is not emitting a pulse since a certain time', - 'The platform is not emitting a pulse since a long time, it is considered as dead', - ], - } - return pd.DataFrame(status).set_index('status_code') + status = { + "status_code": [0, 1, 2, 6, 4, 5], + "status_name": [ + "PROBABLE", + "CONFIRMED", + "REGISTERED", + "OPERATIONAL", + "INACTIVE", + "CLOSED", + ], + "description": [ + "Starting status for some platforms, when there is only a few metadata available, like rough deployment location and date. The platform may be deployed", + "Automatically set when a ship is attached to the deployment information. The platform is ready to be deployed, deployment is planned", + "Starting status for most of the networks, when deployment planning is not done. The deployment is certain, and a notification has been sent via the OceanOPS system", + "Automatically set when the platform is emitting a pulse and observations are distributed within a certain time interval", + "The platform is not emitting a pulse since a certain time", + "The platform is not emitting a pulse since a long time, it is considered as dead", + ], + } + return pd.DataFrame(status).set_index("status_code") @property def box_name(self): @@ -289,10 +333,14 @@ def plan(self): This method is for dev, but will be moved to the VirtualFleet software utilities """ df = self.to_dataframe() - plan = df[['lon', 'lat', 'date']].rename(columns={"date": "time"}).to_dict('series') + plan = ( + df[["lon", "lat", "date"]] + .rename(columns={"date": "time"}) + .to_dict("series") + ) for key in plan.keys(): plan[key] = plan[key].to_list() - plan['time'] = np.array(plan['time'], dtype='datetime64') + plan["time"] = np.array(plan["time"], dtype="datetime64") return plan def to_json(self): @@ -309,49 +357,59 @@ def to_dataframe(self): :class:`pandas.DataFrame` """ data = self.to_json() - if data['total'] == 0: - raise DataNotFound('Your search matches no results') + if data["total"] == 0: + raise DataNotFound("Your search matches no results") # res = {'date': [], 'lat': [], 'lon': [], 'wmo': [], 'status_name': [], 'status_code': []} # res = {'date': [], 'lat': [], 'lon': [], 'wmo': [], 'status_name': [], 'status_code': [], 'ship_name': []} - res = {'date': [], 'lat': [], 'lon': [], 'wmo': [], 'status_name': [], 'status_code': [], 'program': [], - 'country': [], 'model': []} + res = { + "date": [], + "lat": [], + "lon": [], + "wmo": [], + "status_name": [], + "status_code": [], + "program": [], + "country": [], + "model": [], + } # status = {'REGISTERED': None, 'OPERATIONAL': None, 'INACTIVE': None, 'CLOSED': None, # 'CONFIRMED': None, 'OPERATIONAL': None, 'PROBABLE': None, 'REGISTERED': None} - for irow, ptf in enumerate(data['data']): + for irow, ptf in enumerate(data["data"]): # if irow == 0: # print(ptf) - res['lat'].append(ptf['ptfDepl']['lat']) - res['lon'].append(ptf['ptfDepl']['lon']) - res['date'].append(ptf['ptfDepl']['deplDate']) - res['wmo'].append(ptf['ref']) + res["lat"].append(ptf["ptfDepl"]["lat"]) + res["lon"].append(ptf["ptfDepl"]["lon"]) + res["date"].append(ptf["ptfDepl"]["deplDate"]) + res["wmo"].append(ptf["ref"]) # res['wmo'].append(ptf['wmos'][-1]['wmo']) # res['wmo'].append(float_wmo(ptf['ref'])) # will not work for some CONFIRMED, PROBABLE or REGISTERED floats # res['wmo'].append(float_wmo(ptf['wmos'][-1]['wmo'])) - res['status_code'].append(ptf['ptfStatus']['id']) - res['status_name'].append(ptf['ptfStatus']['name']) + res["status_code"].append(ptf["ptfStatus"]["id"]) + res["status_name"].append(ptf["ptfStatus"]["name"]) # res['ship_name'].append(ptf['ptfDepl']['shipName']) - program = ptf['program']['nameShort'].replace("_", " ") if ptf['program']['nameShort'] else ptf['program'][ - 'nameShort'] - res['program'].append(program) - res['country'].append(ptf['program']['country']['nameShort']) - res['model'].append(ptf['ptfModel']['nameShort']) + program = ( + ptf["program"]["nameShort"].replace("_", " ") + if ptf["program"]["nameShort"] + else ptf["program"]["nameShort"] + ) + res["program"].append(program) + res["country"].append(ptf["program"]["country"]["nameShort"]) + res["model"].append(ptf["ptfModel"]["nameShort"]) # if status[ptf['ptfStatus']['name']] is None: # status[ptf['ptfStatus']['name']] = ptf['ptfStatus']['description'] df = pd.DataFrame(res) - df = df.astype({'date': 'datetime64[s]'}) - df = df.sort_values(by='date').reset_index(drop=True) + df = df.astype({"date": "datetime64[s]"}) + df = df.sort_values(by="date").reset_index(drop=True) # df = df[ (df['status_name'] == 'CLOSED') | (df['status_name'] == 'OPERATIONAL')] # Select only floats that have been deployed and returned data # print(status) return df - def plot_status(self, - **kwargs - ): + def plot_status(self, **kwargs): """Quick plot of the deployment plan Named arguments are passed to :class:`plot.scatter_map` @@ -362,16 +420,21 @@ def plot_status(self, ax: :class:`matplotlib.axes.Axes` """ df = self.to_dataframe() - fig, ax = scatter_map(df, - x='lon', - y='lat', - hue='status_code', - traj=False, - cmap='deployment_status', - **kwargs) - ax.set_title("Argo network deployment plan\n%s\nSource: OceanOPS API as of %s" - % (self.box_name, pd.to_datetime('now', utc=True).strftime("%Y-%m-%d %H:%M:%S")), - fontsize=12 - ) + fig, ax = scatter_map( + df, + x="lon", + y="lat", + hue="status_code", + traj=False, + cmap="deployment_status", + **kwargs + ) + ax.set_title( + "Argo network deployment plan\n%s\nSource: OceanOPS API as of %s" + % ( + self.box_name, + pd.to_datetime("now", utc=True).strftime("%Y-%m-%d %H:%M:%S"), + ), + fontsize=12, + ) return fig, ax - diff --git a/argopy/related/reference_tables.py b/argopy/related/reference_tables.py new file mode 100644 index 00000000..622eee7f --- /dev/null +++ b/argopy/related/reference_tables.py @@ -0,0 +1,245 @@ +import pandas as pd +from functools import lru_cache +import collections +from ..stores import httpstore +from ..options import OPTIONS + + +class ArgoNVSReferenceTables: + """Argo Reference Tables + + Utility function to retrieve Argo Reference Tables from a NVS server. + + By default, this relies on: https://vocab.nerc.ac.uk/collection + + Examples + -------- + Methods: + + >>> R = ArgoNVSReferenceTables() + >>> R.search('sensor') + >>> R.tbl(3) + >>> R.tbl('R09') + + Properties: + + >>> R.all_tbl_name + >>> R.all_tbl + >>> R.valid_ref + + """ + + valid_ref = [ + "R01", + "RR2", + "RD2", + "RP2", + "R03", + "R04", + "R05", + "R06", + "R07", + "R08", + "R09", + "R10", + "R11", + "R12", + "R13", + "R15", + "RMC", + "RTV", + "R16", + # "R18", + "R19", + "R20", + "R21", + "R22", + "R23", + "R24", + "R25", + "R26", + "R27", + # "R28", + # "R29", + # "R30", + "R40", + ] + """List of all available Reference Tables""" + + def __init__( + self, + nvs="https://vocab.nerc.ac.uk/collection", + cache: bool = True, + cachedir: str = "", + ): + """Argo Reference Tables from NVS""" + + cachedir = OPTIONS["cachedir"] if cachedir == "" else cachedir + self.fs = httpstore(cache=cache, cachedir=cachedir) + self.nvs = nvs + + def _valid_ref(self, rtid): + if rtid not in self.valid_ref: + rtid = "R%0.2d" % rtid + if rtid not in self.valid_ref: + raise ValueError( + "Invalid Argo Reference Table, should be one in: %s" + % ", ".join(self.valid_ref) + ) + return rtid + + def _jsConcept2df(self, data): + """Return all skos:Concept as class:`pandas.DataFrame`""" + content = { + "altLabel": [], + "prefLabel": [], + "definition": [], + "deprecated": [], + "id": [], + } + for k in data["@graph"]: + if k["@type"] == "skos:Collection": + Collection_name = k["alternative"] + elif k["@type"] == "skos:Concept": + content["altLabel"].append(k["altLabel"]) + content["prefLabel"].append(k["prefLabel"]["@value"]) + content["definition"].append(k["definition"]["@value"]) + content["deprecated"].append(k["deprecated"]) + content["id"].append(k["@id"]) + df = pd.DataFrame.from_dict(content) + df.name = Collection_name + return df + + def _jsCollection(self, data): + """Return last skos:Collection information as data""" + for k in data["@graph"]: + if k["@type"] == "skos:Collection": + name = k["alternative"] + desc = k["description"] + rtid = k["@id"] + return (name, desc, rtid) + + def get_url(self, rtid, fmt="ld+json"): + """Return URL toward a given reference table for a given format + + Parameters + ---------- + rtid: {str, int} + Name or number of the reference table to retrieve. Eg: 'R01', 12 + fmt: str, default: "ld+json" + Format of the NVS server response. Can be: "ld+json", "rdf+xml" or "text/turtle". + + Returns + ------- + str + """ + rtid = self._valid_ref(rtid) + if fmt == "ld+json": + fmt_ext = "?_profile=nvs&_mediatype=application/ld+json" + elif fmt == "rdf+xml": + fmt_ext = "?_profile=nvs&_mediatype=application/rdf+xml" + elif fmt == "text/turtle": + fmt_ext = "?_profile=nvs&_mediatype=text/turtle" + else: + raise ValueError( + "Invalid format. Must be in: 'ld+json', 'rdf+xml' or 'text/turtle'." + ) + url = "{}/{}/current/{}".format + return url(self.nvs, rtid, fmt_ext) + + @lru_cache + def tbl(self, rtid): + """Return an Argo Reference table + + Parameters + ---------- + rtid: {str, int} + Name or number of the reference table to retrieve. Eg: 'R01', 12 + + Returns + ------- + class:`pandas.DataFrame` + """ + rtid = self._valid_ref(rtid) + js = self.fs.open_json(self.get_url(rtid)) + df = self._jsConcept2df(js) + return df + + def tbl_name(self, rtid): + """Return name of an Argo Reference table + + Parameters + ---------- + rtid: {str, int} + Name or number of the reference table to retrieve. Eg: 'R01', 12 + + Returns + ------- + tuple('short name', 'description', 'NVS id link') + """ + rtid = self._valid_ref(rtid) + js = self.fs.open_json(self.get_url(rtid)) + return self._jsCollection(js) + + def search(self, txt, where="all"): + """Search for string in tables title and/or description + + Parameters + ---------- + txt: str + where: str, default='all' + Where to search, can be: 'title', 'description', 'all' + + Returns + ------- + list of table id matching the search + """ + results = [] + for tbl_id in self.all_tbl_name: + title = self.tbl_name(tbl_id)[0] + description = self.tbl_name(tbl_id)[1] + if where == "title": + if txt.lower() in title.lower(): + results.append(tbl_id) + elif where == "description": + if txt.lower() in description.lower(): + results.append(tbl_id) + elif where == "all": + if txt.lower() in description.lower() or txt.lower() in title.lower(): + results.append(tbl_id) + return results + + @property + def all_tbl(self): + """Return all Argo Reference tables + + Returns + ------- + OrderedDict + Dictionary with all table short names as key and table content as class:`pandas.DataFrame` + """ + URLs = [self.get_url(rtid) for rtid in self.valid_ref] + df_list = self.fs.open_mfjson(URLs, preprocess=self._jsConcept2df) + all_tables = {} + [all_tables.update({t.name: t}) for t in df_list] + all_tables = collections.OrderedDict(sorted(all_tables.items())) + return all_tables + + @property + def all_tbl_name(self): + """Return names of all Argo Reference tables + + Returns + ------- + OrderedDict + Dictionary with all table short names as key and table names as tuple('short name', 'description', 'NVS id link') + """ + URLs = [self.get_url(rtid) for rtid in self.valid_ref] + name_list = self.fs.open_mfjson(URLs, preprocess=self._jsCollection) + all_tables = {} + [ + all_tables.update({rtid.split("/")[-3]: (name, desc, rtid)}) + for name, desc, rtid in name_list + ] + all_tables = collections.OrderedDict(sorted(all_tables.items())) + return all_tables diff --git a/argopy/related/topography.py b/argopy/related/topography.py index 39b7f4b1..dde36773 100644 --- a/argopy/related/topography.py +++ b/argopy/related/topography.py @@ -5,7 +5,7 @@ class TopoFetcher: - """ Fetch topographic data through an ERDDAP server for an ocean rectangle + """Fetch topographic data through an ERDDAP server for an ocean rectangle Example: >>> from argopy import TopoFetcher @@ -34,7 +34,7 @@ def __init__( server: Union[str] = None, **kwargs, ): - """ Instantiate an ERDDAP topo data fetcher + """Instantiate an ERDDAP topo data fetcher Parameters ---------- @@ -61,7 +61,11 @@ def __init__( self.stride = stride if ds == "gebco": self.definition = "NOAA erddap gebco data fetcher for a space region" - self.server = server if server is not None else "https://coastwatch.pfeg.noaa.gov/erddap" + self.server = ( + server + if server is not None + else "https://coastwatch.pfeg.noaa.gov/erddap" + ) self.server_name = "NOAA" self.dataset_id = "gebco" @@ -81,7 +85,7 @@ def _init_erddap(self): return self def _cname(self) -> str: - """ Fetcher one line string definition helper """ + """Fetcher one line string definition helper""" cname = "?" if hasattr(self, "BOX"): @@ -102,12 +106,12 @@ def __repr__(self): return "\n".join(summary) def cname(self): - """ Return a unique string defining the constraints """ + """Return a unique string defining the constraints""" return self._cname() @property def cachepath(self): - """ Return path to cached file(s) for this request + """Return path to cached file(s) for this request Returns ------- @@ -116,7 +120,7 @@ def cachepath(self): return [self.fs.cachepath(uri) for uri in self.uri] def define_constraints(self): - """ Define request constraints """ + """Define request constraints""" # Eg: https://coastwatch.pfeg.noaa.gov/erddap/griddap/GEBCO_2020.nc?elevation%5B(34):5:(42)%5D%5B(-21):7:(-12)%5D self.erddap.constraints = "%s(%0.2f):%i:(%0.2f)%s%s(%0.2f):%i:(%0.2f)%s" % ( "%5B", @@ -142,9 +146,9 @@ def define_constraints(self): # return vlist def url_encode(self, url): - """ Return safely encoded list of urls + """Return safely encoded list of urls - This is necessary because fsspec cannot handle in cache paths/urls with a '[' character + This is necessary because fsspec cannot handle in cache paths/urls with a '[' character """ # return urls @@ -156,7 +160,7 @@ def safe_for_fsspec_cache(url): return safe_for_fsspec_cache(url) def get_url(self): - """ Return the URL to download data requested + """Return the URL to download data requested Returns ------- @@ -181,7 +185,7 @@ def get_url(self): @property def uri(self): - """ List of files to load for a request + """List of files to load for a request Returns ------- @@ -190,7 +194,7 @@ def uri(self): return [self.get_url()] def to_xarray(self, errors: str = "ignore"): - """ Load Topographic data and return a xarray.DataSet """ + """Load Topographic data and return a xarray.DataSet""" # Download data if len(self.uri) == 1: @@ -199,5 +203,5 @@ def to_xarray(self, errors: str = "ignore"): return ds def load(self, errors: str = "ignore"): - """ Load Topographic data and return a xarray.DataSet """ + """Load Topographic data and return a xarray.DataSet""" return self.to_xarray(errors=errors) diff --git a/argopy/utilities.py b/argopy/utilities.py index fc6c8e37..831b05fb 100644 --- a/argopy/utilities.py +++ b/argopy/utilities.py @@ -2705,243 +2705,6 @@ def get_ea_profile_page(WMO, CYC=None, **kwargs): return [url.format(this_id) for this_id in sorted(df["ID"])] -class ArgoNVSReferenceTables: - """Argo Reference Tables - - Utility function to retrieve Argo Reference Tables from a NVS server. - - By default, this relies on: https://vocab.nerc.ac.uk/collection - - Examples - -------- - Methods: - - >>> R = ArgoNVSReferenceTables() - >>> R.search('sensor') - >>> R.tbl(3) - >>> R.tbl('R09') - - Properties: - - >>> R.all_tbl_name - >>> R.all_tbl - >>> R.valid_ref - - """ - valid_ref = [ - "R01", - "RR2", - "RD2", - "RP2", - "R03", - "R04", - "R05", - "R06", - "R07", - "R08", - "R09", - "R10", - "R11", - "R12", - "R13", - "R15", - "RMC", - "RTV", - "R16", - # "R18", - "R19", - "R20", - "R21", - "R22", - "R23", - "R24", - "R25", - "R26", - "R27", - # "R28", - # "R29", - # "R30", - "R40", - ] - """List of all available Reference Tables""" - - def __init__(self, - nvs="https://vocab.nerc.ac.uk/collection", - cache: bool = True, - cachedir: str = "", - ): - """Argo Reference Tables from NVS""" - from .stores import httpstore - cachedir = OPTIONS["cachedir"] if cachedir == "" else cachedir - self.fs = httpstore(cache=cache, cachedir=cachedir) - self.nvs = nvs - - def _valid_ref(self, rtid): - if rtid not in self.valid_ref: - rtid = "R%0.2d" % rtid - if rtid not in self.valid_ref: - raise ValueError( - "Invalid Argo Reference Table, should be one in: %s" - % ", ".join(self.valid_ref) - ) - return rtid - - def _jsConcept2df(self, data): - """Return all skos:Concept as class:`pandas.DataFrame`""" - content = { - "altLabel": [], - "prefLabel": [], - "definition": [], - "deprecated": [], - "id": [], - } - for k in data["@graph"]: - if k["@type"] == "skos:Collection": - Collection_name = k["alternative"] - elif k["@type"] == "skos:Concept": - content["altLabel"].append(k["altLabel"]) - content["prefLabel"].append(k["prefLabel"]["@value"]) - content["definition"].append(k["definition"]["@value"]) - content["deprecated"].append(k["deprecated"]) - content["id"].append(k["@id"]) - df = pd.DataFrame.from_dict(content) - df.name = Collection_name - return df - - def _jsCollection(self, data): - """Return last skos:Collection information as data""" - for k in data["@graph"]: - if k["@type"] == "skos:Collection": - name = k["alternative"] - desc = k["description"] - rtid = k["@id"] - return (name, desc, rtid) - - def get_url(self, rtid, fmt="ld+json"): - """Return URL toward a given reference table for a given format - - Parameters - ---------- - rtid: {str, int} - Name or number of the reference table to retrieve. Eg: 'R01', 12 - fmt: str, default: "ld+json" - Format of the NVS server response. Can be: "ld+json", "rdf+xml" or "text/turtle". - - Returns - ------- - str - """ - rtid = self._valid_ref(rtid) - if fmt == "ld+json": - fmt_ext = "?_profile=nvs&_mediatype=application/ld+json" - elif fmt == "rdf+xml": - fmt_ext = "?_profile=nvs&_mediatype=application/rdf+xml" - elif fmt == "text/turtle": - fmt_ext = "?_profile=nvs&_mediatype=text/turtle" - else: - raise ValueError("Invalid format. Must be in: 'ld+json', 'rdf+xml' or 'text/turtle'.") - url = "{}/{}/current/{}".format - return url(self.nvs, rtid, fmt_ext) - - @lru_cache - def tbl(self, rtid): - """Return an Argo Reference table - - Parameters - ---------- - rtid: {str, int} - Name or number of the reference table to retrieve. Eg: 'R01', 12 - - Returns - ------- - class:`pandas.DataFrame` - """ - rtid = self._valid_ref(rtid) - js = self.fs.open_json(self.get_url(rtid)) - df = self._jsConcept2df(js) - return df - - def tbl_name(self, rtid): - """Return name of an Argo Reference table - - Parameters - ---------- - rtid: {str, int} - Name or number of the reference table to retrieve. Eg: 'R01', 12 - - Returns - ------- - tuple('short name', 'description', 'NVS id link') - """ - rtid = self._valid_ref(rtid) - js = self.fs.open_json(self.get_url(rtid)) - return self._jsCollection(js) - - def search(self, txt, where='all'): - """Search for string in tables title and/or description - - Parameters - ---------- - txt: str - where: str, default='all' - Where to search, can be: 'title', 'description', 'all' - - Returns - ------- - list of table id matching the search - """ - results = [] - for tbl_id in self.all_tbl_name: - title = self.tbl_name(tbl_id)[0] - description = self.tbl_name(tbl_id)[1] - if where == 'title': - if txt.lower() in title.lower(): - results.append(tbl_id) - elif where == 'description': - if txt.lower() in description.lower(): - results.append(tbl_id) - elif where == 'all': - if txt.lower() in description.lower() or txt.lower() in title.lower(): - results.append(tbl_id) - return results - - @property - def all_tbl(self): - """Return all Argo Reference tables - - Returns - ------- - OrderedDict - Dictionary with all table short names as key and table content as class:`pandas.DataFrame` - """ - URLs = [self.get_url(rtid) for rtid in self.valid_ref] - df_list = self.fs.open_mfjson(URLs, preprocess=self._jsConcept2df) - all_tables = {} - [all_tables.update({t.name: t}) for t in df_list] - all_tables = collections.OrderedDict(sorted(all_tables.items())) - return all_tables - - @property - def all_tbl_name(self): - """Return names of all Argo Reference tables - - Returns - ------- - OrderedDict - Dictionary with all table short names as key and table names as tuple('short name', 'description', 'NVS id link') - """ - URLs = [self.get_url(rtid) for rtid in self.valid_ref] - name_list = self.fs.open_mfjson(URLs, preprocess=self._jsCollection) - all_tables = {} - [ - all_tables.update({rtid.split("/")[-3]: (name, desc, rtid)}) - for name, desc, rtid in name_list - ] - all_tables = collections.OrderedDict(sorted(all_tables.items())) - return all_tables - - - @deprecated def cast_types(ds): # noqa: C901 """ Make sure variables are of the appropriate types according to Argo @@ -3314,352 +3077,6 @@ def log_argopy_callerstack(level='debug'): log.warning(msg) -class ArgoDocs: - """ADMT documentation helper class - - Examples - -------- - >>> ArgoDocs().list - >>> ArgoDocs().search("CDOM") - >>> ArgoDocs().search("CDOM", where='abstract') - - >>> ArgoDocs(35385) - >>> ArgoDocs(35385).ris - >>> ArgoDocs(35385).abstract - >>> ArgoDocs(35385).show() - >>> ArgoDocs(35385).open_pdf() - >>> ArgoDocs(35385).open_pdf(page=12) - - """ - _catalogue = [ - { - "category": "Argo data formats", - "title": "Argo user's manual", - "doi": "10.13155/29825", - "id": 29825 - }, - { - "category": "Quality control", - "title": "Argo Quality Control Manual for CTD and Trajectory Data", - "doi": "10.13155/33951", - "id": 33951 - }, - { - "category": "Quality control", - "title": "Argo quality control manual for dissolved oxygen concentration", - "doi": "10.13155/46542", - "id": 46542 - }, - { - "category": "Quality control", - "title": "Argo quality control manual for biogeochemical data", - "doi": "10.13155/40879", - "id": 40879 - }, - { - "category": "Quality control", - "title": "BGC-Argo quality control manual for the Chlorophyll-A concentration", - "doi": "10.13155/35385", - "id": 35385 - }, - { - "category": "Quality control", - "title": "BGC-Argo quality control manual for nitrate concentration", - "doi": "10.13155/84370", - "id": 84370 - }, - { - "category": "Quality control", - "title": "Quality control for BGC-Argo radiometry", - "doi": "10.13155/62466", - "id": 62466 - }, - { - "category": "Cookbooks", - "title": "Argo DAC profile cookbook", - "doi": "10.13155/41151", - "id": 41151 - }, - { - "category": "Cookbooks", - "title": "Argo DAC trajectory cookbook", - "doi": "10.13155/29824", - "id": 29824 - }, - { - "category": "Cookbooks", - "title": "DMQC Cookbook for Core Argo parameters", - "doi": "10.13155/78994", - "id": 78994 - }, - { - "category": "Cookbooks", - "title": "Processing Argo oxygen data at the DAC level", - "doi": "10.13155/39795", - "id": 39795 - }, - { - "category": "Cookbooks", - "title": "Processing Bio-Argo particle backscattering at the DAC level", - "doi": "10.13155/39459", - "id": 39459 - }, - { - "category": "Cookbooks", - "title": "Processing BGC-Argo chlorophyll-A concentration at the DAC level", - "doi": "10.13155/39468", - "id": 39468 - }, - { - "category": "Cookbooks", - "title": "Processing Argo measurement timing information at the DAC level", - "doi": "10.13155/47998", - "id": 47998 - }, - { - "category": "Cookbooks", - "title": "Processing BGC-Argo CDOM concentration at the DAC level", - "doi": "10.13155/54541", - "id": 54541 - }, - { - "category": "Cookbooks", - "title": "Processing Bio-Argo nitrate concentration at the DAC Level", - "doi": "10.13155/46121", - "id": 46121 - }, - { - "category": "Cookbooks", - "title": "Processing BGC-Argo Radiometric data at the DAC level", - "doi": "10.13155/51541", - "id": 51541 - }, - { - "category": "Cookbooks", - "title": "Processing BGC-Argo pH data at the DAC level", - "doi": "10.13155/57195", - "id": 57195 - }, - { - "category": "Cookbooks", - "title": "Description of the Argo GDAC File Checks: Data Format and Consistency Checks", - "doi": "10.13155/46120", - "id": 46120 - }, - { - "category": "Cookbooks", - "title": "Description of the Argo GDAC File Merge Process", - "doi": "10.13155/52154", - "id": 52154 - }, - { - "category": "Cookbooks", - "title": "BGC-Argo synthetic profile file processing and format on Coriolis GDAC", - "doi": "10.13155/55637", - "id": 55637 - }, - { - "category": "Cookbooks", - "title": "Argo GDAC cookbook", - "doi": "10.13155/46202", - "id": 46202 - } - ] - - class RIS: - """RIS file structure from TXT file""" - - def __init__(self, file=None, fs=None): - self.record = None - self.fs = fs - if file: - self.parse(file) - - def parse(self, file): - """Parse input file""" - # log.debug(file) - - with self.fs.open(file, 'r', encoding="utf-8") as f: - TXTlines = f.readlines() - lines = [] - # Eliminate blank lines - for line in TXTlines: - line = line.strip() - if len(line) > 0: - lines.append(line) - TXTlines = lines - - # - record = {} - for line in TXTlines: - # print("\n>", line) - if len(line) > 2: - if line[2] == " ": - tag = line[0:2] - field = line[3:] - # print("ok", {tag: field}) - record[tag] = [field] - else: - # print("-", line) - record[tag].append(line) - elif len(line) == 2: - record[line] = [] - # else: - # print("*", line) - - for key in record.keys(): - record[key] = "; ".join(record[key]) - - self.record = record - - @lru_cache - def __init__(self, docid=None, cache=False): - from .stores import httpstore - - self.docid = None - self._ris = None - self._risfile = None - self._fs = httpstore(cache=cache, cachedir=OPTIONS['cachedir']) - self._doiserver = "https://dx.doi.org" - self._archimer = "https://archimer.ifremer.fr" - - if isinstance(docid, int): - if docid in [doc['id'] for doc in self._catalogue]: - self.docid = docid - else: - raise ValueError("Unknown document id") - elif isinstance(docid, str): - start_with = lambda f, x: f[0:len(x)] == x if len(x) <= len(f) else False # noqa: E731 - if start_with(docid, '10.13155/') and docid in [doc['doi'] for doc in self._catalogue]: - self.docid = [doc['id'] for doc in self._catalogue if docid == doc['doi']][0] - else: - raise ValueError("'docid' must be an integer or a valid Argo DOI") - - def __repr__(self): - summary = [""] - if self.docid is not None: - doc = [doc for doc in self._catalogue if doc['id'] == self.docid][0] - summary.append("Title: %s" % doc['title']) - summary.append("DOI: %s" % doc['doi']) - summary.append("url: https://dx.doi.org/%s" % doc['doi']) - summary.append("last pdf: %s" % self.pdf) - if 'AF' in self.ris: - summary.append("Authors: %s" % self.ris['AF']) - summary.append("Abstract: %s" % self.ris['AB']) - else: - summary.append("- %i documents with a DOI are available in the catalogue" % len(self._catalogue)) - summary.append("- Use the method 'search' to find a document id") - summary.append("- Use the property 'list' to check out the catalogue") - return "\n".join(summary) - - @property - def list(self): - """List of all available documents as a :class:`pandas.DataFrame`""" - return pd.DataFrame(self._catalogue) - - @property - def js(self): - """Internal json record for a document""" - if self.docid is not None: - return [doc for doc in self._catalogue if doc['id'] == self.docid][0] - else: - raise ValueError("Select a document first !") - - @property - def ris(self): - """RIS record of a document""" - if self.docid is not None: - if self._ris is None: - # Fetch RIS metadata for this document: - import re - file = self._fs.download_url("%s/%s" % (self._doiserver, self.js['doi'])) - x = re.search(r']*)rel="nofollow">TXT<\/a>', - str(file)) - export_txt_url = x[1].replace("https://archimer.ifremer.fr", self._archimer) - self._risfile = export_txt_url - self._ris = self.RIS(export_txt_url, fs=self._fs).record - return self._ris - else: - raise ValueError("Select a document first !") - - @property - def abstract(self): - """Abstract of a document""" - if self.docid is not None: - return self.ris['AB'] - else: - raise ValueError("Select a document first !") - - @property - def pdf(self): - """Link to the online pdf version of a document""" - if self.docid is not None: - return self.ris['UR'] - else: - raise ValueError("Select a document first !") - - def show(self, height=800): - """Insert document in pdf in a notebook cell - - Parameters - ---------- - height: int - Height in pixels of the cell - """ - if self.docid is not None: - from IPython.core.display import HTML - return HTML( - '' % (self.ris['UR'], height)) - else: - raise ValueError("Select a document first !") - - def open_pdf(self, page=None, url_only=False): - """Open document in new browser tab - - Parameters - ---------- - page: int, optional - Open directly a specific page number - """ - url = self.pdf - url += '#view=FitV&pagemode=thumbs' - if page: - url += '&page=%i' % page - if self.docid is not None: - if not url_only: - import webbrowser - webbrowser.open_new(url) - else: - return url - else: - raise ValueError("Select a document first !") - - def search(self, txt, where='title'): - """Search for string in all documents title or abstract - - Parameters - ---------- - txt: str - where: str, default='title' - Where to search, can be 'title' or 'abstract' - - Returns - ------- - list - - """ - results = [] - for doc in self.list.iterrows(): - docid = doc[1]['id'] - if where == 'title': - if txt.lower() in ArgoDocs(docid).js['title'].lower(): - results.append(docid) - elif where == 'abstract': - if txt.lower() in ArgoDocs(docid).abstract.lower(): - results.append(docid) - return results - - def drop_variables_not_in_all_datasets(ds_collection): """Drop variables that are not in all datasets (the lowest common denominator) From 3db2954dccc4576e127f8079563a6421b3c6ba00 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Wed, 6 Sep 2023 16:27:30 +0200 Subject: [PATCH 05/33] Update __init__.py --- argopy/__init__.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/argopy/__init__.py b/argopy/__init__.py index 8f2c8392..72602786 100644 --- a/argopy/__init__.py +++ b/argopy/__init__.py @@ -35,13 +35,12 @@ from . import plot # noqa: E402 from .plot import dashboard, ArgoColors # noqa: E402 from .utilities import show_versions, show_options, clear_cache, lscache # noqa: E402 -from .utilities import ArgoDocs # noqa: E402 from .utilities import monitor_status as status # noqa: E402 from .options import set_options, reset_options # noqa: E402 from .data_fetchers import CTDRefDataFetcher # noqa: E402 from .stores import ArgoIndex # noqa: E402 from .utils import compute # noqa: E402, F401 -from .related import TopoFetcher, OceanOPSDeployments, ArgoNVSReferenceTables # noqa: E402 +from .related import TopoFetcher, OceanOPSDeployments, ArgoNVSReferenceTables, ArgoDocs # noqa: E402 # __all__ = ( @@ -73,7 +72,6 @@ "errors", "plot", "ArgoColors", # Class - # "plotters", # Deprec, to be removed after 0.1.13 "stores", "tutorial", # Constants From b64fabe2ecc2c957cd5fb3400450baf265d9f9ec Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Wed, 6 Sep 2023 16:27:50 +0200 Subject: [PATCH 06/33] refactor tests locations --- argopy/tests/test_related.py | 303 +++++++++++++++++++++++++++++++++ argopy/tests/test_utilities.py | 287 ------------------------------- 2 files changed, 303 insertions(+), 287 deletions(-) create mode 100644 argopy/tests/test_related.py diff --git a/argopy/tests/test_related.py b/argopy/tests/test_related.py new file mode 100644 index 00000000..82f2028c --- /dev/null +++ b/argopy/tests/test_related.py @@ -0,0 +1,303 @@ +import pytest +import tempfile +import xarray as xr +import pandas as pd +from collections import ChainMap, OrderedDict +import shutil + +from mocked_http import mocked_httpserver, mocked_server_address +from utils import ( + requires_matplotlib, + requires_cartopy, + requires_oops, + has_matplotlib, + has_cartopy, + has_ipython, +) +from argopy.related import ( + TopoFetcher, + ArgoNVSReferenceTables, + OceanOPSDeployments, + ArgoDocs, +) +from argopy.utilities import ( + is_list_of_strings, +) + +if has_matplotlib: + import matplotlib as mpl + +if has_cartopy: + import cartopy + +if has_ipython: + import IPython + + +class Test_TopoFetcher(): + box = [81, 123, -67, -54] + + def setup_class(self): + """setup any state specific to the execution of the given class""" + # Create the cache folder here, so that it's not the same for the pandas and pyarrow tests + self.cachedir = tempfile.mkdtemp() + + def teardown_class(self): + """Cleanup once we are finished.""" + def remove_test_dir(): + shutil.rmtree(self.cachedir) + remove_test_dir() + + def make_a_fetcher(self, cached=False): + opts = {'ds': 'gebco', 'stride': [10, 10], 'server': mocked_server_address} + if cached: + opts = ChainMap(opts, {'cache': True, 'cachedir': self.cachedir}) + return TopoFetcher(self.box, **opts) + + def assert_fetcher(self, f): + ds = f.to_xarray() + assert isinstance(ds, xr.Dataset) + assert 'elevation' in ds.data_vars + + def test_load_mocked_server(self, mocked_httpserver): + """This will easily ensure that the module scope fixture is available to all methods !""" + assert True + + params = [True, False] + ids_params = ["cached=%s" % p for p in params] + @pytest.mark.parametrize("params", params, indirect=False, ids=ids_params) + def test_fetching(self, params): + fetcher = self.make_a_fetcher(cached=params) + self.assert_fetcher(fetcher) + + +class Test_ArgoNVSReferenceTables: + + def setup_class(self): + """setup any state specific to the execution of the given class""" + # Create the cache folder here, so that it's not the same for the pandas and pyarrow tests + self.cachedir = tempfile.mkdtemp() + self.nvs = ArgoNVSReferenceTables(cache=True, cachedir=self.cachedir, nvs=mocked_server_address) + + def teardown_class(self): + """Cleanup once we are finished.""" + def remove_test_dir(): + shutil.rmtree(self.cachedir) + remove_test_dir() + + def test_load_mocked_server(self, mocked_httpserver): + """This will easily ensure that the module scope fixture is available to all methods !""" + assert True + + def test_valid_ref(self): + assert is_list_of_strings(self.nvs.valid_ref) + + opts = [3, 'R09'] + opts_ids = ["rtid is a %s" % type(o) for o in opts] + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_tbl(self, opts): + assert isinstance(self.nvs.tbl(opts), pd.DataFrame) + + opts = [3, 'R09'] + opts_ids = ["rtid is a %s" % type(o) for o in opts] + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_tbl_name(self, opts): + names = self.nvs.tbl_name(opts) + assert isinstance(names, tuple) + assert isinstance(names[0], str) + assert isinstance(names[1], str) + assert isinstance(names[2], str) + + def test_all_tbl(self): + all = self.nvs.all_tbl + assert isinstance(all, OrderedDict) + assert isinstance(all[list(all.keys())[0]], pd.DataFrame) + + def test_all_tbl_name(self): + all = self.nvs.all_tbl_name + assert isinstance(all, OrderedDict) + assert isinstance(all[list(all.keys())[0]], tuple) + + opts = ["ld+json", "rdf+xml", "text/turtle", "invalid"] + opts_ids = ["fmt=%s" % o for o in opts] + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_get_url(self, opts): + if opts != 'invalid': + url = self.nvs.get_url(3, fmt=opts) + assert isinstance(url, str) + if "json" in opts: + data = self.nvs.fs.open_json(url) + assert isinstance(data, dict) + elif "xml" in opts: + data = self.nvs.fs.fs.cat_file(url) + assert data[0:5] == b' Date: Wed, 6 Sep 2023 16:31:32 +0200 Subject: [PATCH 07/33] Update api-hidden.rst --- docs/api-hidden.rst | 60 ++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/docs/api-hidden.rst b/docs/api-hidden.rst index c7c307da..ff4e968d 100644 --- a/docs/api-hidden.rst +++ b/docs/api-hidden.rst @@ -69,25 +69,9 @@ argopy.utilities.isalive argopy.utilities.isAPIconnected - argopy.utilities.ArgoNVSReferenceTables - argopy.utilities.ArgoNVSReferenceTables.search - argopy.utilities.ArgoNVSReferenceTables.valid_ref - argopy.utilities.ArgoNVSReferenceTables.all_tbl - argopy.utilities.ArgoNVSReferenceTables.all_tbl_name - argopy.utilities.ArgoNVSReferenceTables.tbl - argopy.utilities.ArgoNVSReferenceTables.tbl_name - argopy.utilities.groupby_remap argopy.utilities.linear_interpolation_remap - argopy.utilities.TopoFetcher.cname - argopy.utilities.TopoFetcher.define_constraints - argopy.utilities.TopoFetcher.get_url - argopy.utilities.TopoFetcher.load - argopy.utilities.TopoFetcher.to_xarray - argopy.utilities.TopoFetcher.cachepath - argopy.utilities.TopoFetcher.uri - argopy.utilities.list_standard_variables argopy.utilities.list_multiprofile_file_variables argopy.utilities.load_dict @@ -105,25 +89,41 @@ argopy.utilities.get_coriolis_profile_id argopy.utilities.get_ea_profile_page - argopy.utilities.OceanOPSDeployments - argopy.utilities.OceanOPSDeployments.to_dataframe - argopy.utilities.OceanOPSDeployments.status_code - - argopy.utilities.ArgoDocs - argopy.utilities.ArgoDocs.list - argopy.utilities.ArgoDocs.search - argopy.utilities.ArgoDocs.ris - argopy.utilities.ArgoDocs.abstract - argopy.utilities.ArgoDocs.pdf - argopy.utilities.ArgoDocs.open_pdf - argopy.utilities.ArgoDocs.show - argopy.utilities.ArgoDocs.js - argopy.utilities.drop_variables_not_in_all_datasets argopy.utilities.fill_variables_not_in_all_datasets argopy.utils.compute.MyThreadPoolExecutor + argopy.related.TopoFetcher.cname + argopy.related.TopoFetcher.define_constraints + argopy.related.TopoFetcher.get_url + argopy.related.TopoFetcher.load + argopy.related.TopoFetcher.to_xarray + argopy.related.TopoFetcher.cachepath + argopy.related.TopoFetcher.uri + + argopy.related.ArgoNVSReferenceTables + argopy.related.ArgoNVSReferenceTables.search + argopy.related.ArgoNVSReferenceTables.valid_ref + argopy.related.ArgoNVSReferenceTables.all_tbl + argopy.related.ArgoNVSReferenceTables.all_tbl_name + argopy.related.ArgoNVSReferenceTables.tbl + argopy.related.ArgoNVSReferenceTables.tbl_name + + argopy.related.OceanOPSDeployments + argopy.related.OceanOPSDeployments.to_dataframe + argopy.related.OceanOPSDeployments.status_code + + argopy.related.ArgoDocs + argopy.related.ArgoDocs.list + argopy.related.ArgoDocs.search + argopy.related.ArgoDocs.ris + argopy.related.ArgoDocs.abstract + argopy.related.ArgoDocs.pdf + argopy.related.ArgoDocs.open_pdf + argopy.related.ArgoDocs.show + argopy.related.ArgoDocs.js + argopy.plot argopy.plot.dashboard From ec22c17fc809bdeb3475eb4f6f06cdf0d77a3999 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Wed, 6 Sep 2023 16:37:19 +0200 Subject: [PATCH 08/33] update MyThreadPoolExecutor access [skip-ci] --- argopy/__init__.py | 2 +- argopy/stores/filesystems.py | 2 +- argopy/utils/__init__.py | 7 +++++++ argopy/utils/{compute.py => monitored_threadpool.py} | 6 +++--- docs/api-hidden.rst | 2 +- 5 files changed, 13 insertions(+), 6 deletions(-) rename argopy/utils/{compute.py => monitored_threadpool.py} (99%) diff --git a/argopy/__init__.py b/argopy/__init__.py index 72602786..58a9fe29 100644 --- a/argopy/__init__.py +++ b/argopy/__init__.py @@ -39,7 +39,7 @@ from .options import set_options, reset_options # noqa: E402 from .data_fetchers import CTDRefDataFetcher # noqa: E402 from .stores import ArgoIndex # noqa: E402 -from .utils import compute # noqa: E402, F401 +from .utils import monitored_threadpool # noqa: E402, F401 from .related import TopoFetcher, OceanOPSDeployments, ArgoNVSReferenceTables, ArgoDocs # noqa: E402 # diff --git a/argopy/stores/filesystems.py b/argopy/stores/filesystems.py index b37aedd9..aede839b 100644 --- a/argopy/stores/filesystems.py +++ b/argopy/stores/filesystems.py @@ -54,7 +54,7 @@ drop_variables_not_in_all_datasets, fill_variables_not_in_all_datasets, ) -from ..utils.compute import MyThreadPoolExecutor as MyExecutor +from ..utils import MyThreadPoolExecutor as MyExecutor log = logging.getLogger("argopy.stores") diff --git a/argopy/utils/__init__.py b/argopy/utils/__init__.py index e69de29b..c93ca7db 100644 --- a/argopy/utils/__init__.py +++ b/argopy/utils/__init__.py @@ -0,0 +1,7 @@ +from .monitored_threadpool import MyThreadPoolExecutor + + +__all__ = ( + # Classes: + "MyThreadPoolExecutor", +) diff --git a/argopy/utils/compute.py b/argopy/utils/monitored_threadpool.py similarity index 99% rename from argopy/utils/compute.py rename to argopy/utils/monitored_threadpool.py index 5b4a0385..5b637230 100644 --- a/argopy/utils/compute.py +++ b/argopy/utils/monitored_threadpool.py @@ -1,5 +1,5 @@ """ -This sub-module provides utilities for miscellaneous computation tasks +This sub-module provides utilities for miscellaneous computation tasks with multitheading We construct the MyThreadPoolExecutor class, we create a series of classes using multiple inheritance to implement monitoring features @@ -527,13 +527,13 @@ class c(proto_MonitoredPoolExecutor_terminal): class MyThreadPoolExecutor(c): """ - This is a low-level helper class not intended to be used directly. + This is a low-level helper class not intended to be used directly by users Examples -------- :: - from argopy.utils.compute import MyThreadPoolExecutor as MyExecutor + from argopy.utils import MyThreadPoolExecutor as MyExecutor from random import random from time import sleep import numpy as np diff --git a/docs/api-hidden.rst b/docs/api-hidden.rst index ff4e968d..67b13130 100644 --- a/docs/api-hidden.rst +++ b/docs/api-hidden.rst @@ -92,7 +92,7 @@ argopy.utilities.drop_variables_not_in_all_datasets argopy.utilities.fill_variables_not_in_all_datasets - argopy.utils.compute.MyThreadPoolExecutor + argopy.utils.MyThreadPoolExecutor argopy.related.TopoFetcher.cname argopy.related.TopoFetcher.define_constraints From 44025d2e49bc3b6472356883ecd95e2cd86ecdbc Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Thu, 7 Sep 2023 08:30:35 +0200 Subject: [PATCH 09/33] Update __init__.py --- argopy/related/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argopy/related/__init__.py b/argopy/related/__init__.py index 87520fa9..84c62f22 100644 --- a/argopy/related/__init__.py +++ b/argopy/related/__init__.py @@ -2,7 +2,7 @@ from .ocean_ops_deployments import OceanOPSDeployments from .reference_tables import ArgoNVSReferenceTables from .argo_documentation import ArgoDocs -from .gdac_snapshot import ArgoDOI +from .doi_snapshot import ArgoDOI # From 6891cbd9acd767e2341d57031fda0340163efcf5 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Thu, 7 Sep 2023 08:30:42 +0200 Subject: [PATCH 10/33] Delete gdac_snapshot.py --- argopy/related/gdac_snapshot.py | 389 -------------------------------- 1 file changed, 389 deletions(-) delete mode 100644 argopy/related/gdac_snapshot.py diff --git a/argopy/related/gdac_snapshot.py b/argopy/related/gdac_snapshot.py deleted file mode 100644 index 6c46b417..00000000 --- a/argopy/related/gdac_snapshot.py +++ /dev/null @@ -1,389 +0,0 @@ -import pandas as pd -import numpy as np -import warnings -from typing import Union - -# from matplotlib.colors import to_hex -# from IPython.display import IFrame - -from ..stores import httpstore - - -class DOIrecord: - """Helper class for an Argo GDAC snapshot DOI record - - Examples - -------- - d = DOIrecord() - d = DOIrecord('42182') - d = DOIrecord('42182#103075') - d = DOIrecord(hashtag='103075') - d = DOIrecord(hashtag='103088') - - d.doi - d.dx - d.isvalid - d.date - d.network - d.data - d.file - - """ - root = "" - - def __init__( - self, - doi: str = "10.17882/42182", - hashtag: str = None, - fs: httpstore = None, - autoload: bool = True, - api_root: str = "https://www.seanoe.org/api/", - ): - self.api_root = api_root - self._fs = fs # A httpstore will be created if necessary if self.load() is called - self._data = None - - self._doi = doi - self._hashtag = hashtag - if "#" in doi: - self._doi = doi.split("#")[0] - self._hashtag = doi.split("#")[-1] - - if autoload: - self.load() - - @property - def doi(self) -> str: - """DOI component (without hashtag)""" - return self._doi - - @property - def hashtag(self) -> str: - """Hashtag of the full doi""" - return self._hashtag - - @property - def dx(self) -> str: - """DOI url""" - return "https:/dx.doi.org/%s" % str(self) - - def isvalid(self) -> bool: - return "42182" in self.doi - - @property - def data(self) -> dict: - """ "Internal DOI record data - - Trigger data (down)load if not available - """ - if self._data is None: - self.load() - return self._data - - @property - def date(self) -> pd.Timestamp: - """Date associated with the DOI record""" - return self.data["date"] - - @property - def network(self) -> str: - """Network of the Argo data pointed by the DOI - - Returns - ------- - str: 'core+BGC+deep' or 'BGC' - """ - return "BGC" if "BGC" in self.data["title"] else "core+BGC+deep" - - @property - def file(self) -> list: - """Return a pretty list of files properties associated with this DOI""" - results = [] - for f in self.data["files"]: - r = {"openAccess": bool(f["openAccess"])} - if bool(f["openAccess"]): - r["path"] = f["fileUrl"] - else: - r["path"] = None - r["update"] = pd.to_datetime(f["lastUpdateDate"]) - r["date"] = pd.to_datetime(f["fragment"]["date"]) - r["size"] = f["size"] - r["network"] = "BGC" if "BGC" in f["fragment"]["title"] else "core+BGC+deep" - results.append(r) - return results - - @property - def uri(self) -> str: - """url to API call to retrieve DOI data""" - if self.hashtag is None: - url = "find-by-id/{id}".format - else: - url = "find-by-fragment/{id}?fragmentId={hashtag}".format - return self.api_root + url(id=self.doi.split("/")[-1], hashtag=self.hashtag) - - def __str__(self): - # txt = "%s/%s" % (self.root, self.doi) - txt = "%s" % (self.doi) - if self.hashtag is not None: - txt = "%s#%s" % (txt, self._hashtag) - return txt - - def _process_data(self, data: dict) -> dict: - """Synthetic dict from data return by API""" - Nfiles = len(data["files"]) - if Nfiles > 1: - # Sort files resources by date (most recent first) - data["files"].sort( - key=lambda x: x.get("fragment").get("date"), reverse=True - ) - - return { - "title": data["title"]["en"], - "date": pd.to_datetime(data["date"]), - "authors": data["authors"], - "files": data["files"], - "Nfiles": Nfiles, - # 'description': data['description'], - # 'keywords': data['keywords'], - # 'licenceUrl': data['licenceUrl'], - } - - def load(self, cache: bool = False): - """Load DOI record data from API call""" - if self._data is None: - if self._fs is None: - self._fs = httpstore(cache=cache) - - data = self._fs.open_json(self.uri) - self._data = self._process_data(data) - - return self - - def from_dict(self, d: dict): - """Load DOI record data from a dictionary""" - if ( - "title" in d - and "en" in d["title"] - and "date" in d - and "authors" in d - and "files" in d - ): - self._data = self._process_data(d) - return self - - def search(self, **kwargs): - raise ValueError("") - - def _repr_file(self, file, with_label=False) -> str: - """Return a pretty string from a single file dict""" - def sizeof_fmt(num, suffix="B"): - for unit in ("", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"): - if abs(num) < 1024.0: - return f"{num:3.1f}{unit}{suffix}" - num /= 1024.0 - return f"{num:.1f}Yi{suffix}" - - summary = [] - if with_label: - summary.append("%s" % file["label"]["en"]) - - if bool(file["openAccess"]): - summary.append("%s" % file["fileUrl"]) - else: - summary.append("%s" % file["fileName"]) - - attrs = [] - attrs.append("%s" % sizeof_fmt(file["size"])) - attrs.append("openAccess=%s" % file["openAccess"]) - summary.append("(%s)" % (", ".join(attrs))) - - return " ".join(summary) - - def __repr__(self): - summary = [""] - summary.append("DOI: %s" % self.__str__()) - if self._data is not None: - summary.append("Title: %s" % self.data["title"]) - summary.append("Date: %s" % self.date.strftime("%Y-%m-%d")) - summary.append("Network: %s" % self.network) - - if self.data["Nfiles"] == 1: - summary.append("File: %s" % self._repr_file(self.data["files"][0])) - else: - summary.append("File: %i files in total" % (self.data["Nfiles"])) - - summary.append("Files for core+BGC+deep:") - ifound = 0 - for ii, f in enumerate(self.data["files"]): - if "BGC" not in f["fragment"]["title"] and ifound < 10: - summary.append( - " - #%s %s" - % (f["id"], self._repr_file(f, with_label=True)) - ) - ifound += 1 - - summary.append("Files for BGC only:") - ifound = 0 - for ii, f in enumerate(self.data["files"]): - if "BGC" in f["fragment"]["title"] and ifound < 10: - summary.append( - " - #%s %s" - % (f["id"], self._repr_file(f, with_label=True)) - ) - ifound += 1 - - return "\n".join(summary) - - # @property - # def html(self) -> str: - # fs = 12 - # - # def td_msg(bgcolor, txtcolor, txt): - # style = "background-color:%s;" % to_hex(bgcolor, keep_alpha=True) - # style += "border-width:0px;" - # style += "padding: 2px 2px 2px 0px;" - # style += "text-align:left;" - # style += "color:%s" % to_hex(txtcolor, keep_alpha=True) - # return "%s" % (style, str(txt)) - # - # def td_a(bgcolor, txtcolor, txt, link): - # style = "background-color:%s;" % to_hex(bgcolor, keep_alpha=True) - # style += "border-width:0px;" - # style += "padding: 2px 0px 2px 5px;" - # style += "text-align:right;" - # style += "color:%s" % to_hex(txtcolor, keep_alpha=True) - # return "%s" % (style, link, str(txt)) - # - # td_empty = " " - # - # html = [] - # html.append( - # "" - # % fs - # ) - # html.append("") - # - # rows = [] - # - # # 1st row: - # cols = [] - # cols.append(td_msg("dimgray", "w", "doi: ")) - # cols.append(td_msg("green", "w", "%s/" % self.root)) - # cols.append(td_msg("yellowgreen", "w", self.doi)) - # if self.hashtag is not None: - # cols.append(td_msg("darkorange", "w", "#%s" % self.hashtag)) - # cols.append(td_a("white", "w", "↗", self.dx)) - # cols.append(td_empty) - # rows.append("%s" % "\n".join(cols)) - # - # # # 2nd row (if data have been loaded): - # # if self._data is not None: - # # cols = [] - # # cols.append(td_msg('dimgray', 'w', "Title: ")) - # # cols.append(td_msg('white', 'w', "%s" % self.data['title'])) - # # # cols.append(td_msg('yellowgreen', 'w', self.doi)) - # # # if self.hashtag is not None: - # # # cols.append(td_msg("darkorange", 'w', "#%s" % self.hashtag)) - # # # cols.append(td_a("white", 'w', "↗", self.dx)) - # # # cols.append(td_empty) - # # rows.append("%s" % "\n".join(cols)) - # - # # print(rows) - # # # Fix colspan: - # # Nrows = np.max([len(r.split("" % Nrows)) - # # print(rowss) - # - # # Finalize - # html.append("\n".join(rows)) - # html.append("") - # html.append("
", "
") - # html = "\n".join(html) - # return html - - # def _repr_html_(self): - # return self.html - - -class ArgoDOI: - """Helper class for Argo GDAC snapshot DOI access and discovery - - Examples - -------- - from argopy import ArgoDOI - - doi = ArgoDOI() # If you don't know where to start, just load the primary Argo DOI record - doi = ArgoDOI('95141') # To point directly to a snapshot ID - doi = ArgoDOI(hashtag='95141') - doi = ArgoDOI(fs=httpstore(cache=True)) - - doi.search('2020-02') # Return doi closest to a given date - doi.search('2020-02', network='BGC') # Return doi closest to a given date for a specific network - - doi.file # Easy to read list of file(s) associated with a DOI record - doi.dx # http link toward DOI - - """ - - def __init__(self, - hashtag=None, - fs=None, - cache=True): - self._fs = fs if isinstance(fs, httpstore) else httpstore(cache=cache) - if hashtag is not None and '42182#' in hashtag: - hashtag = hashtag.split('42182#')[-1] - self._doi = DOIrecord(hashtag=hashtag, fs=self._fs, autoload=True) - - @property - def doi(self) -> str: - """DOI component (without hashtag)""" - return str(self._doi) - - def __repr__(self): - return self._doi.__repr__() - - def dates(self, network: str = None) -> dict: - d = {} - if network == "BGC": - for f in self._doi.data["files"]: - if "BGC" in f["fragment"]["title"]: - d.update({int(f["id"]): pd.to_datetime(f["fragment"]["date"])}) - else: - for f in self._doi.data["files"]: - if "BGC" not in f["fragment"]["title"]: - d.update({int(f["id"]): pd.to_datetime(f["fragment"]["date"])}) - return d - - def search(self, date: Union[str, pd.Timestamp], network: str = None) -> DOIrecord: - """Search DOI closest to a given date""" - dates = self.dates(network=network) - target = pd.to_datetime(date, utc=True) - close = list(dates.values())[ - np.argmin(np.abs([target - dates[d] for d in dates])) - ] - found = [d for d in dates if dates[d] == close] - results = [] - if len(found) > 0: - for f in found: - results.append(DOIrecord(hashtag=f, fs=self._fs)) - if len(results) == 1: - if (close - target).days > 30: - warnings.warn( - "This snapshot is more than 30 days off your search dates !" - ) - return results[0] - else: - return results - - @property - def file(self) -> list: - """Return a pretty list of files properties associated with this DOI""" - return self._doi.file - - @property - def dx(self) -> str: - """DOI url""" - return self._doi.dx From 8723078dbbf7dbceb6d78bfe1585c4ef6290043a Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Thu, 7 Sep 2023 08:30:48 +0200 Subject: [PATCH 11/33] Create doi_snapshot.py --- argopy/related/doi_snapshot.py | 389 +++++++++++++++++++++++++++++++++ 1 file changed, 389 insertions(+) create mode 100644 argopy/related/doi_snapshot.py diff --git a/argopy/related/doi_snapshot.py b/argopy/related/doi_snapshot.py new file mode 100644 index 00000000..6c46b417 --- /dev/null +++ b/argopy/related/doi_snapshot.py @@ -0,0 +1,389 @@ +import pandas as pd +import numpy as np +import warnings +from typing import Union + +# from matplotlib.colors import to_hex +# from IPython.display import IFrame + +from ..stores import httpstore + + +class DOIrecord: + """Helper class for an Argo GDAC snapshot DOI record + + Examples + -------- + d = DOIrecord() + d = DOIrecord('42182') + d = DOIrecord('42182#103075') + d = DOIrecord(hashtag='103075') + d = DOIrecord(hashtag='103088') + + d.doi + d.dx + d.isvalid + d.date + d.network + d.data + d.file + + """ + root = "" + + def __init__( + self, + doi: str = "10.17882/42182", + hashtag: str = None, + fs: httpstore = None, + autoload: bool = True, + api_root: str = "https://www.seanoe.org/api/", + ): + self.api_root = api_root + self._fs = fs # A httpstore will be created if necessary if self.load() is called + self._data = None + + self._doi = doi + self._hashtag = hashtag + if "#" in doi: + self._doi = doi.split("#")[0] + self._hashtag = doi.split("#")[-1] + + if autoload: + self.load() + + @property + def doi(self) -> str: + """DOI component (without hashtag)""" + return self._doi + + @property + def hashtag(self) -> str: + """Hashtag of the full doi""" + return self._hashtag + + @property + def dx(self) -> str: + """DOI url""" + return "https:/dx.doi.org/%s" % str(self) + + def isvalid(self) -> bool: + return "42182" in self.doi + + @property + def data(self) -> dict: + """ "Internal DOI record data + + Trigger data (down)load if not available + """ + if self._data is None: + self.load() + return self._data + + @property + def date(self) -> pd.Timestamp: + """Date associated with the DOI record""" + return self.data["date"] + + @property + def network(self) -> str: + """Network of the Argo data pointed by the DOI + + Returns + ------- + str: 'core+BGC+deep' or 'BGC' + """ + return "BGC" if "BGC" in self.data["title"] else "core+BGC+deep" + + @property + def file(self) -> list: + """Return a pretty list of files properties associated with this DOI""" + results = [] + for f in self.data["files"]: + r = {"openAccess": bool(f["openAccess"])} + if bool(f["openAccess"]): + r["path"] = f["fileUrl"] + else: + r["path"] = None + r["update"] = pd.to_datetime(f["lastUpdateDate"]) + r["date"] = pd.to_datetime(f["fragment"]["date"]) + r["size"] = f["size"] + r["network"] = "BGC" if "BGC" in f["fragment"]["title"] else "core+BGC+deep" + results.append(r) + return results + + @property + def uri(self) -> str: + """url to API call to retrieve DOI data""" + if self.hashtag is None: + url = "find-by-id/{id}".format + else: + url = "find-by-fragment/{id}?fragmentId={hashtag}".format + return self.api_root + url(id=self.doi.split("/")[-1], hashtag=self.hashtag) + + def __str__(self): + # txt = "%s/%s" % (self.root, self.doi) + txt = "%s" % (self.doi) + if self.hashtag is not None: + txt = "%s#%s" % (txt, self._hashtag) + return txt + + def _process_data(self, data: dict) -> dict: + """Synthetic dict from data return by API""" + Nfiles = len(data["files"]) + if Nfiles > 1: + # Sort files resources by date (most recent first) + data["files"].sort( + key=lambda x: x.get("fragment").get("date"), reverse=True + ) + + return { + "title": data["title"]["en"], + "date": pd.to_datetime(data["date"]), + "authors": data["authors"], + "files": data["files"], + "Nfiles": Nfiles, + # 'description': data['description'], + # 'keywords': data['keywords'], + # 'licenceUrl': data['licenceUrl'], + } + + def load(self, cache: bool = False): + """Load DOI record data from API call""" + if self._data is None: + if self._fs is None: + self._fs = httpstore(cache=cache) + + data = self._fs.open_json(self.uri) + self._data = self._process_data(data) + + return self + + def from_dict(self, d: dict): + """Load DOI record data from a dictionary""" + if ( + "title" in d + and "en" in d["title"] + and "date" in d + and "authors" in d + and "files" in d + ): + self._data = self._process_data(d) + return self + + def search(self, **kwargs): + raise ValueError("") + + def _repr_file(self, file, with_label=False) -> str: + """Return a pretty string from a single file dict""" + def sizeof_fmt(num, suffix="B"): + for unit in ("", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"): + if abs(num) < 1024.0: + return f"{num:3.1f}{unit}{suffix}" + num /= 1024.0 + return f"{num:.1f}Yi{suffix}" + + summary = [] + if with_label: + summary.append("%s" % file["label"]["en"]) + + if bool(file["openAccess"]): + summary.append("%s" % file["fileUrl"]) + else: + summary.append("%s" % file["fileName"]) + + attrs = [] + attrs.append("%s" % sizeof_fmt(file["size"])) + attrs.append("openAccess=%s" % file["openAccess"]) + summary.append("(%s)" % (", ".join(attrs))) + + return " ".join(summary) + + def __repr__(self): + summary = [""] + summary.append("DOI: %s" % self.__str__()) + if self._data is not None: + summary.append("Title: %s" % self.data["title"]) + summary.append("Date: %s" % self.date.strftime("%Y-%m-%d")) + summary.append("Network: %s" % self.network) + + if self.data["Nfiles"] == 1: + summary.append("File: %s" % self._repr_file(self.data["files"][0])) + else: + summary.append("File: %i files in total" % (self.data["Nfiles"])) + + summary.append("Files for core+BGC+deep:") + ifound = 0 + for ii, f in enumerate(self.data["files"]): + if "BGC" not in f["fragment"]["title"] and ifound < 10: + summary.append( + " - #%s %s" + % (f["id"], self._repr_file(f, with_label=True)) + ) + ifound += 1 + + summary.append("Files for BGC only:") + ifound = 0 + for ii, f in enumerate(self.data["files"]): + if "BGC" in f["fragment"]["title"] and ifound < 10: + summary.append( + " - #%s %s" + % (f["id"], self._repr_file(f, with_label=True)) + ) + ifound += 1 + + return "\n".join(summary) + + # @property + # def html(self) -> str: + # fs = 12 + # + # def td_msg(bgcolor, txtcolor, txt): + # style = "background-color:%s;" % to_hex(bgcolor, keep_alpha=True) + # style += "border-width:0px;" + # style += "padding: 2px 2px 2px 0px;" + # style += "text-align:left;" + # style += "color:%s" % to_hex(txtcolor, keep_alpha=True) + # return "%s" % (style, str(txt)) + # + # def td_a(bgcolor, txtcolor, txt, link): + # style = "background-color:%s;" % to_hex(bgcolor, keep_alpha=True) + # style += "border-width:0px;" + # style += "padding: 2px 0px 2px 5px;" + # style += "text-align:right;" + # style += "color:%s" % to_hex(txtcolor, keep_alpha=True) + # return "%s" % (style, link, str(txt)) + # + # td_empty = " " + # + # html = [] + # html.append( + # "" + # % fs + # ) + # html.append("") + # + # rows = [] + # + # # 1st row: + # cols = [] + # cols.append(td_msg("dimgray", "w", "doi: ")) + # cols.append(td_msg("green", "w", "%s/" % self.root)) + # cols.append(td_msg("yellowgreen", "w", self.doi)) + # if self.hashtag is not None: + # cols.append(td_msg("darkorange", "w", "#%s" % self.hashtag)) + # cols.append(td_a("white", "w", "↗", self.dx)) + # cols.append(td_empty) + # rows.append("%s" % "\n".join(cols)) + # + # # # 2nd row (if data have been loaded): + # # if self._data is not None: + # # cols = [] + # # cols.append(td_msg('dimgray', 'w', "Title: ")) + # # cols.append(td_msg('white', 'w', "%s" % self.data['title'])) + # # # cols.append(td_msg('yellowgreen', 'w', self.doi)) + # # # if self.hashtag is not None: + # # # cols.append(td_msg("darkorange", 'w', "#%s" % self.hashtag)) + # # # cols.append(td_a("white", 'w', "↗", self.dx)) + # # # cols.append(td_empty) + # # rows.append("%s" % "\n".join(cols)) + # + # # print(rows) + # # # Fix colspan: + # # Nrows = np.max([len(r.split("" % Nrows)) + # # print(rowss) + # + # # Finalize + # html.append("\n".join(rows)) + # html.append("") + # html.append("
", "
") + # html = "\n".join(html) + # return html + + # def _repr_html_(self): + # return self.html + + +class ArgoDOI: + """Helper class for Argo GDAC snapshot DOI access and discovery + + Examples + -------- + from argopy import ArgoDOI + + doi = ArgoDOI() # If you don't know where to start, just load the primary Argo DOI record + doi = ArgoDOI('95141') # To point directly to a snapshot ID + doi = ArgoDOI(hashtag='95141') + doi = ArgoDOI(fs=httpstore(cache=True)) + + doi.search('2020-02') # Return doi closest to a given date + doi.search('2020-02', network='BGC') # Return doi closest to a given date for a specific network + + doi.file # Easy to read list of file(s) associated with a DOI record + doi.dx # http link toward DOI + + """ + + def __init__(self, + hashtag=None, + fs=None, + cache=True): + self._fs = fs if isinstance(fs, httpstore) else httpstore(cache=cache) + if hashtag is not None and '42182#' in hashtag: + hashtag = hashtag.split('42182#')[-1] + self._doi = DOIrecord(hashtag=hashtag, fs=self._fs, autoload=True) + + @property + def doi(self) -> str: + """DOI component (without hashtag)""" + return str(self._doi) + + def __repr__(self): + return self._doi.__repr__() + + def dates(self, network: str = None) -> dict: + d = {} + if network == "BGC": + for f in self._doi.data["files"]: + if "BGC" in f["fragment"]["title"]: + d.update({int(f["id"]): pd.to_datetime(f["fragment"]["date"])}) + else: + for f in self._doi.data["files"]: + if "BGC" not in f["fragment"]["title"]: + d.update({int(f["id"]): pd.to_datetime(f["fragment"]["date"])}) + return d + + def search(self, date: Union[str, pd.Timestamp], network: str = None) -> DOIrecord: + """Search DOI closest to a given date""" + dates = self.dates(network=network) + target = pd.to_datetime(date, utc=True) + close = list(dates.values())[ + np.argmin(np.abs([target - dates[d] for d in dates])) + ] + found = [d for d in dates if dates[d] == close] + results = [] + if len(found) > 0: + for f in found: + results.append(DOIrecord(hashtag=f, fs=self._fs)) + if len(results) == 1: + if (close - target).days > 30: + warnings.warn( + "This snapshot is more than 30 days off your search dates !" + ) + return results[0] + else: + return results + + @property + def file(self) -> list: + """Return a pretty list of files properties associated with this DOI""" + return self._doi.file + + @property + def dx(self) -> str: + """DOI url""" + return self._doi.dx From 0b8c9d7da899fd4510ca00bfd57a16b50ac67c2f Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Thu, 7 Sep 2023 08:31:19 +0200 Subject: [PATCH 12/33] Refactor MyThreadPoolExecutor as MonitoredThreadPoolExecutor --- argopy/__init__.py | 2 +- argopy/stores/filesystems.py | 2 +- argopy/utils/__init__.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/argopy/__init__.py b/argopy/__init__.py index 1abb83e6..8648d332 100644 --- a/argopy/__init__.py +++ b/argopy/__init__.py @@ -39,7 +39,7 @@ from .options import set_options, reset_options # noqa: E402 from .data_fetchers import CTDRefDataFetcher # noqa: E402 from .stores import ArgoIndex # noqa: E402 -from .utils import monitored_threadpool # noqa: E402, F401 +from .utils import MonitoredThreadPoolExecutor # noqa: E402, F401 from .related import TopoFetcher, OceanOPSDeployments, ArgoNVSReferenceTables, ArgoDocs, ArgoDOI # noqa: E402 diff --git a/argopy/stores/filesystems.py b/argopy/stores/filesystems.py index aede839b..3e08cd3d 100644 --- a/argopy/stores/filesystems.py +++ b/argopy/stores/filesystems.py @@ -54,7 +54,7 @@ drop_variables_not_in_all_datasets, fill_variables_not_in_all_datasets, ) -from ..utils import MyThreadPoolExecutor as MyExecutor +from ..utils import MonitoredThreadPoolExecutor as MyExecutor log = logging.getLogger("argopy.stores") diff --git a/argopy/utils/__init__.py b/argopy/utils/__init__.py index c93ca7db..6408e29e 100644 --- a/argopy/utils/__init__.py +++ b/argopy/utils/__init__.py @@ -1,7 +1,7 @@ -from .monitored_threadpool import MyThreadPoolExecutor +from .monitored_threadpool import MyThreadPoolExecutor as MonitoredThreadPoolExecutor __all__ = ( # Classes: - "MyThreadPoolExecutor", + "MonitoredThreadPoolExecutor", ) From 01669bdcae308460541624a106ac725ad6f80fd0 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Thu, 7 Sep 2023 08:50:11 +0200 Subject: [PATCH 13/33] refactor most checkers --- argopy/fetchers.py | 4 +- argopy/utilities.py | 380 +-------------------------------------- argopy/utils/__init__.py | 15 +- argopy/utils/checkers.py | 377 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 403 insertions(+), 373 deletions(-) create mode 100644 argopy/utils/checkers.py diff --git a/argopy/fetchers.py b/argopy/fetchers.py index da3143a7..50b142df 100755 --- a/argopy/fetchers.py +++ b/argopy/fetchers.py @@ -21,11 +21,13 @@ from .utilities import ( list_available_data_src, list_available_index_src, + get_coriolis_profile_id, +) +from .utils import ( is_box, is_indexbox, check_wmo, check_cyc, - get_coriolis_profile_id, ) from .plot import plot_trajectory, bar_plot, open_sat_altim_report diff --git a/argopy/utilities.py b/argopy/utilities.py index 831b05fb..396abe0b 100644 --- a/argopy/utilities.py +++ b/argopy/utilities.py @@ -56,6 +56,15 @@ FileSystemHasNoCache, DataNotFound, ) +from .utils import ( + is_box, + is_list_of_strings, + is_wmo, check_wmo, + check_cyc, +) +from .related import ( + ArgoNVSReferenceTables, +) try: collectionsAbc = collections.abc @@ -1363,207 +1372,6 @@ def format_oneline(s, max_width=65): return s -def is_indexbox(box: list, errors="raise"): - """ Check if this array matches a 2d or 3d index box definition - - Argopy expects one of the following 2 format to define an index box: - - - box = [lon_min, lon_max, lat_min, lat_max] - - box = [lon_min, lon_max, lat_min, lat_max, datim_min, datim_max] - - This function check for this format compliance. - - Parameters - ---------- - box: list - errors: str, default='raise' - - Returns - ------- - bool - """ - def is_dateconvertible(d): - try: - pd.to_datetime(d) - isit = True - except Exception: - isit = False - return isit - - tests = {} - - # Formats: - tests["index box must be a list"] = lambda b: isinstance(b, list) - tests["index box must be a list with 4 or 6 elements"] = lambda b: len(b) in [4, 6] - - # Types: - tests["lon_min must be numeric"] = lambda b: ( - isinstance(b[0], int) or isinstance(b[0], (np.floating, float)) - ) - tests["lon_max must be numeric"] = lambda b: ( - isinstance(b[1], int) or isinstance(b[1], (np.floating, float)) - ) - tests["lat_min must be numeric"] = lambda b: ( - isinstance(b[2], int) or isinstance(b[2], (np.floating, float)) - ) - tests["lat_max must be numeric"] = lambda b: ( - isinstance(b[3], int) or isinstance(b[3], (np.floating, float)) - ) - if len(box) > 4: - tests[ - "datetim_min must be a string convertible to a Pandas datetime" - ] = lambda b: isinstance(b[-2], str) and is_dateconvertible(b[-2]) - tests[ - "datetim_max must be a string convertible to a Pandas datetime" - ] = lambda b: isinstance(b[-1], str) and is_dateconvertible(b[-1]) - - # Ranges: - tests["lon_min must be in [-180;180] or [0;360]"] = ( - lambda b: b[0] >= -180.0 and b[0] <= 360.0 - ) - tests["lon_max must be in [-180;180] or [0;360]"] = ( - lambda b: b[1] >= -180.0 and b[1] <= 360.0 - ) - tests["lat_min must be in [-90;90]"] = lambda b: b[2] >= -90.0 and b[2] <= 90 - tests["lat_max must be in [-90;90]"] = lambda b: b[3] >= -90.0 and b[3] <= 90.0 - - # Orders: - tests["lon_max must be larger than lon_min"] = lambda b: b[0] < b[1] - tests["lat_max must be larger than lat_min"] = lambda b: b[2] < b[3] - if len(box) > 4: - tests["datetim_max must come after datetim_min"] = lambda b: pd.to_datetime( - b[-2] - ) < pd.to_datetime(b[-1]) - - error = None - for msg, test in tests.items(): - if not test(box): - error = msg - break - - if error and errors == "raise": - raise ValueError("%s: %s" % (box, error)) - elif error: - return False - else: - return True - - -def is_box(box: list, errors="raise"): - """Check if this array matches a 3d or 4d data box definition - - Argopy expects one of the following 2 format to define a box: - - - box = [lon_min, lon_max, lat_min, lat_max, pres_min, pres_max] - - box = [lon_min, lon_max, lat_min, lat_max, pres_min, pres_max, datim_min, datim_max] - - This function check for this format compliance. - - Parameters - ---------- - box: list - errors: 'raise' - - Returns - ------- - bool - """ - - def is_dateconvertible(d): - try: - pd.to_datetime(d) - isit = True - except Exception: - isit = False - return isit - - tests = {} - # print(box) - # Formats: - tests["box must be a list"] = lambda b: isinstance(b, list) - tests["box must be a list with 6 or 8 elements"] = lambda b: len(b) in [6, 8] - - # Types: - tests["lon_min must be numeric"] = lambda b: ( - isinstance(b[0], int) or isinstance(b[0], (np.floating, float)) - ) - tests["lon_max must be numeric"] = lambda b: ( - isinstance(b[1], int) or isinstance(b[1], (np.floating, float)) - ) - tests["lat_min must be numeric"] = lambda b: ( - isinstance(b[2], int) or isinstance(b[2], (np.floating, float)) - ) - tests["lat_max must be numeric"] = lambda b: ( - isinstance(b[3], int) or isinstance(b[3], (np.floating, float)) - ) - tests["pres_min must be numeric"] = lambda b: ( - isinstance(b[4], int) or isinstance(b[4], (np.floating, float)) - ) - tests["pres_max must be numeric"] = lambda b: ( - isinstance(b[5], int) or isinstance(b[5], (np.floating, float)) - ) - if len(box) == 8: - tests[ - "datetim_min must be an object convertible to a Pandas datetime" - ] = lambda b: is_dateconvertible(b[-2]) - tests[ - "datetim_max must be an object convertible to a Pandas datetime" - ] = lambda b: is_dateconvertible(b[-1]) - - # Ranges: - tests["lon_min must be in [-180;180] or [0;360]"] = ( - lambda b: b[0] >= -180.0 and b[0] <= 360.0 - ) - tests["lon_max must be in [-180;180] or [0;360]"] = ( - lambda b: b[1] >= -180.0 and b[1] <= 360.0 - ) - tests["lat_min must be in [-90;90]"] = lambda b: b[2] >= -90.0 and b[2] <= 90 - tests["lat_max must be in [-90;90]"] = lambda b: b[3] >= -90.0 and b[3] <= 90.0 - tests["pres_min must be in [0;10000]"] = lambda b: b[4] >= 0 and b[4] <= 10000 - tests["pres_max must be in [0;10000]"] = lambda b: b[5] >= 0 and b[5] <= 10000 - - # Orders: - tests["lon_max must be larger than lon_min"] = lambda b: b[0] <= b[1] - tests["lat_max must be larger than lat_min"] = lambda b: b[2] <= b[3] - tests["pres_max must be larger than pres_min"] = lambda b: b[4] <= b[5] - if len(box) == 8: - tests["datetim_max must come after datetim_min"] = lambda b: pd.to_datetime( - b[-2] - ) <= pd.to_datetime(b[-1]) - - error = None - for msg, test in tests.items(): - if not test(box): - error = msg - break - - if error and errors == "raise": - raise ValueError("%s: %s" % (box, error)) - elif error: - return False - else: - return True - - -def is_list_of_strings(lst): - return isinstance(lst, list) and all(isinstance(elem, str) for elem in lst) - - -def is_list_of_dicts(lst): - return all(isinstance(x, dict) for x in lst) - - -def is_list_of_datasets(lst): - return all(isinstance(x, xr.Dataset) for x in lst) - - -def is_list_equal(lst1, lst2): - """ Return true if 2 lists contain same elements""" - return len(lst1) == len(lst2) and len(lst1) == sum( - [1 for i, j in zip(lst1, lst2) if i == j] - ) - - def to_list(obj): """Make sure that an expected list is indeed a list""" if not isinstance(obj, list): @@ -1574,176 +1382,6 @@ def to_list(obj): return obj -def check_wmo(lst, errors="raise"): - """ Validate a WMO option and returned it as a list of integers - - Parameters - ---------- - wmo: int - WMO must be an integer or an iterable with elements that can be casted as integers - errors: {'raise', 'warn', 'ignore'} - Possibly raises a ValueError exception or UserWarning, otherwise fails silently. - - Returns - ------- - list(int) - """ - is_wmo(lst, errors=errors) - - # Make sure we deal with a list - lst = to_list(lst) - - # Then cast list elements as integers - return [abs(int(x)) for x in lst] - - -def is_wmo(lst, errors="raise"): # noqa: C901 - """ Check if a WMO is valid - - Parameters - ---------- - wmo: int, list(int), array(int) - WMO must be a single or a list of 5/7 digit positive numbers - errors: {'raise', 'warn', 'ignore'} - Possibly raises a ValueError exception or UserWarning, otherwise fails silently. - - Returns - ------- - bool - True if wmo is indeed a list of integers - """ - - # Make sure we deal with a list - lst = to_list(lst) - - # Error message: - # msg = "WMO must be an integer or an iterable with elements that can be casted as integers" - msg = "WMO must be a single or a list of 5/7 digit positive numbers. Invalid: '{}'".format - - # Then try to cast list elements as integers, return True if ok - result = True - try: - for x in lst: - if not str(x).isdigit(): - result = False - - if (len(str(x)) != 5) and (len(str(x)) != 7): - result = False - - if int(x) <= 0: - result = False - - except Exception: - result = False - if errors == "raise": - raise ValueError(msg(x)) - elif errors == 'warn': - warnings.warn(msg(x)) - - if not result: - if errors == "raise": - raise ValueError(msg(x)) - elif errors == 'warn': - warnings.warn(msg(x)) - else: - return result - - -def check_cyc(lst, errors="raise"): - """ Validate a CYC option and returned it as a list of integers - - Parameters - ---------- - cyc: int - CYC must be an integer or an iterable with elements that can be casted as positive integers - errors: {'raise', 'warn', 'ignore'} - Possibly raises a ValueError exception or UserWarning, otherwise fails silently. - - Returns - ------- - list(int) - """ - is_cyc(lst, errors=errors) - - # Make sure we deal with a list - lst = to_list(lst) - - # Then cast list elements as integers - return [abs(int(x)) for x in lst] - - -def is_cyc(lst, errors="raise"): # noqa: C901 - """ Check if a CYC is valid - Parameters - ---------- - cyc: int, list(int), array(int) - CYC must be a single or a list of at most 4 digit positive numbers - errors: {'raise', 'warn', 'ignore'} - Possibly raises a ValueError exception or UserWarning, otherwise fails silently. - Returns - ------- - bool - True if cyc is indeed a list of integers - """ - # Make sure we deal with a list - lst = to_list(lst) - - # Error message: - msg = "CYC must be a single or a list of at most 4 digit positive numbers. Invalid: '{}'".format - - # Then try to cast list elements as integers, return True if ok - result = True - try: - for x in lst: - if not str(x).isdigit(): - result = False - - if (len(str(x)) > 4): - result = False - - if int(x) < 0: - result = False - - except Exception: - result = False - if errors == "raise": - raise ValueError(msg(x)) - elif errors == 'warn': - warnings.warn(msg(x)) - - if not result: - if errors == "raise": - raise ValueError(msg(x)) - elif errors == 'warn': - warnings.warn(msg(x)) - else: - return result - - -def check_index_cols(column_names: list, convention: str = 'ar_index_global_prof'): - """ - ar_index_global_prof.txt: Index of profile files - Profile directory file of the Argo Global Data Assembly Center - file,date,latitude,longitude,ocean,profiler_type,institution,date_update - - argo_bio-profile_index.txt: bgc Argo profiles index file - The directory file describes all individual bio-profile files of the argo GDAC ftp site. - file,date,latitude,longitude,ocean,profiler_type,institution,parameters,parameter_data_mode,date_update - """ - # Default for 'ar_index_global_prof' - ref = ['file', 'date', 'latitude', 'longitude', 'ocean', 'profiler_type', 'institution', - 'date_update'] - if convention == 'argo_bio-profile_index' or convention == 'argo_synthetic-profile_index': - ref = ['file', 'date', 'latitude', 'longitude', 'ocean', 'profiler_type', 'institution', - 'parameters', 'parameter_data_mode', 'date_update'] - - if not is_list_equal(column_names, ref): - # log.debug("Expected: %s, got: %s" % (";".join(ref), ";".join(column_names))) - raise InvalidDatasetStructure("Unexpected column names in this index !") - else: - return column_names - - def warnUnless(ok, txt): """Function to raise a warning unless condition is True diff --git a/argopy/utils/__init__.py b/argopy/utils/__init__.py index 6408e29e..a80c00f1 100644 --- a/argopy/utils/__init__.py +++ b/argopy/utils/__init__.py @@ -1,7 +1,20 @@ from .monitored_threadpool import MyThreadPoolExecutor as MonitoredThreadPoolExecutor - +from .checkers import ( + is_box, is_indexbox, + is_list_of_strings, is_list_of_dicts, is_list_of_datasets, is_list_equal, + is_wmo, check_wmo, + is_cyc, check_cyc, + check_index_cols, +) __all__ = ( # Classes: "MonitoredThreadPoolExecutor", + + # Checkers: + "is_box", "is_indexbox", + "is_list_of_strings", "is_list_of_dicts", "is_list_of_datasets", "is_list_equal", + "is_wmo", "check_wmo", + "is_cyc", "check_cyc", + "check_index_cols", ) diff --git a/argopy/utils/checkers.py b/argopy/utils/checkers.py new file mode 100644 index 00000000..6342c415 --- /dev/null +++ b/argopy/utils/checkers.py @@ -0,0 +1,377 @@ +import warnings +import numpy as np +import pandas as pd +import xarray as xr +from ..utilities import to_list +from ..errors import InvalidDatasetStructure + + +def is_indexbox(box: list, errors="raise"): + """ Check if this array matches a 2d or 3d index box definition + + Argopy expects one of the following 2 format to define an index box: + + - box = [lon_min, lon_max, lat_min, lat_max] + - box = [lon_min, lon_max, lat_min, lat_max, datim_min, datim_max] + + This function check for this format compliance. + + Parameters + ---------- + box: list + errors: str, default='raise' + + Returns + ------- + bool + """ + def is_dateconvertible(d): + try: + pd.to_datetime(d) + isit = True + except Exception: + isit = False + return isit + + tests = {} + + # Formats: + tests["index box must be a list"] = lambda b: isinstance(b, list) + tests["index box must be a list with 4 or 6 elements"] = lambda b: len(b) in [4, 6] + + # Types: + tests["lon_min must be numeric"] = lambda b: ( + isinstance(b[0], int) or isinstance(b[0], (np.floating, float)) + ) + tests["lon_max must be numeric"] = lambda b: ( + isinstance(b[1], int) or isinstance(b[1], (np.floating, float)) + ) + tests["lat_min must be numeric"] = lambda b: ( + isinstance(b[2], int) or isinstance(b[2], (np.floating, float)) + ) + tests["lat_max must be numeric"] = lambda b: ( + isinstance(b[3], int) or isinstance(b[3], (np.floating, float)) + ) + if len(box) > 4: + tests[ + "datetim_min must be a string convertible to a Pandas datetime" + ] = lambda b: isinstance(b[-2], str) and is_dateconvertible(b[-2]) + tests[ + "datetim_max must be a string convertible to a Pandas datetime" + ] = lambda b: isinstance(b[-1], str) and is_dateconvertible(b[-1]) + + # Ranges: + tests["lon_min must be in [-180;180] or [0;360]"] = ( + lambda b: b[0] >= -180.0 and b[0] <= 360.0 + ) + tests["lon_max must be in [-180;180] or [0;360]"] = ( + lambda b: b[1] >= -180.0 and b[1] <= 360.0 + ) + tests["lat_min must be in [-90;90]"] = lambda b: b[2] >= -90.0 and b[2] <= 90 + tests["lat_max must be in [-90;90]"] = lambda b: b[3] >= -90.0 and b[3] <= 90.0 + + # Orders: + tests["lon_max must be larger than lon_min"] = lambda b: b[0] < b[1] + tests["lat_max must be larger than lat_min"] = lambda b: b[2] < b[3] + if len(box) > 4: + tests["datetim_max must come after datetim_min"] = lambda b: pd.to_datetime( + b[-2] + ) < pd.to_datetime(b[-1]) + + error = None + for msg, test in tests.items(): + if not test(box): + error = msg + break + + if error and errors == "raise": + raise ValueError("%s: %s" % (box, error)) + elif error: + return False + else: + return True + + +def is_box(box: list, errors="raise"): + """Check if this array matches a 3d or 4d data box definition + + Argopy expects one of the following 2 format to define a box: + + - box = [lon_min, lon_max, lat_min, lat_max, pres_min, pres_max] + - box = [lon_min, lon_max, lat_min, lat_max, pres_min, pres_max, datim_min, datim_max] + + This function check for this format compliance. + + Parameters + ---------- + box: list + errors: 'raise' + + Returns + ------- + bool + """ + + def is_dateconvertible(d): + try: + pd.to_datetime(d) + isit = True + except Exception: + isit = False + return isit + + tests = {} + # print(box) + # Formats: + tests["box must be a list"] = lambda b: isinstance(b, list) + tests["box must be a list with 6 or 8 elements"] = lambda b: len(b) in [6, 8] + + # Types: + tests["lon_min must be numeric"] = lambda b: ( + isinstance(b[0], int) or isinstance(b[0], (np.floating, float)) + ) + tests["lon_max must be numeric"] = lambda b: ( + isinstance(b[1], int) or isinstance(b[1], (np.floating, float)) + ) + tests["lat_min must be numeric"] = lambda b: ( + isinstance(b[2], int) or isinstance(b[2], (np.floating, float)) + ) + tests["lat_max must be numeric"] = lambda b: ( + isinstance(b[3], int) or isinstance(b[3], (np.floating, float)) + ) + tests["pres_min must be numeric"] = lambda b: ( + isinstance(b[4], int) or isinstance(b[4], (np.floating, float)) + ) + tests["pres_max must be numeric"] = lambda b: ( + isinstance(b[5], int) or isinstance(b[5], (np.floating, float)) + ) + if len(box) == 8: + tests[ + "datetim_min must be an object convertible to a Pandas datetime" + ] = lambda b: is_dateconvertible(b[-2]) + tests[ + "datetim_max must be an object convertible to a Pandas datetime" + ] = lambda b: is_dateconvertible(b[-1]) + + # Ranges: + tests["lon_min must be in [-180;180] or [0;360]"] = ( + lambda b: b[0] >= -180.0 and b[0] <= 360.0 + ) + tests["lon_max must be in [-180;180] or [0;360]"] = ( + lambda b: b[1] >= -180.0 and b[1] <= 360.0 + ) + tests["lat_min must be in [-90;90]"] = lambda b: b[2] >= -90.0 and b[2] <= 90 + tests["lat_max must be in [-90;90]"] = lambda b: b[3] >= -90.0 and b[3] <= 90.0 + tests["pres_min must be in [0;10000]"] = lambda b: b[4] >= 0 and b[4] <= 10000 + tests["pres_max must be in [0;10000]"] = lambda b: b[5] >= 0 and b[5] <= 10000 + + # Orders: + tests["lon_max must be larger than lon_min"] = lambda b: b[0] <= b[1] + tests["lat_max must be larger than lat_min"] = lambda b: b[2] <= b[3] + tests["pres_max must be larger than pres_min"] = lambda b: b[4] <= b[5] + if len(box) == 8: + tests["datetim_max must come after datetim_min"] = lambda b: pd.to_datetime( + b[-2] + ) <= pd.to_datetime(b[-1]) + + error = None + for msg, test in tests.items(): + if not test(box): + error = msg + break + + if error and errors == "raise": + raise ValueError("%s: %s" % (box, error)) + elif error: + return False + else: + return True + + +def is_list_of_strings(lst): + return isinstance(lst, list) and all(isinstance(elem, str) for elem in lst) + + +def is_list_of_dicts(lst): + return all(isinstance(x, dict) for x in lst) + + +def is_list_of_datasets(lst): + return all(isinstance(x, xr.Dataset) for x in lst) + + +def is_list_equal(lst1, lst2): + """ Return true if 2 lists contain same elements""" + return len(lst1) == len(lst2) and len(lst1) == sum( + [1 for i, j in zip(lst1, lst2) if i == j] + ) + + +def check_wmo(lst, errors="raise"): + """ Validate a WMO option and returned it as a list of integers + + Parameters + ---------- + wmo: int + WMO must be an integer or an iterable with elements that can be casted as integers + errors: {'raise', 'warn', 'ignore'} + Possibly raises a ValueError exception or UserWarning, otherwise fails silently. + + Returns + ------- + list(int) + """ + is_wmo(lst, errors=errors) + + # Make sure we deal with a list + lst = to_list(lst) + + # Then cast list elements as integers + return [abs(int(x)) for x in lst] + + +def is_wmo(lst, errors="raise"): # noqa: C901 + """ Check if a WMO is valid + + Parameters + ---------- + wmo: int, list(int), array(int) + WMO must be a single or a list of 5/7 digit positive numbers + errors: {'raise', 'warn', 'ignore'} + Possibly raises a ValueError exception or UserWarning, otherwise fails silently. + + Returns + ------- + bool + True if wmo is indeed a list of integers + """ + + # Make sure we deal with a list + lst = to_list(lst) + + # Error message: + # msg = "WMO must be an integer or an iterable with elements that can be casted as integers" + msg = "WMO must be a single or a list of 5/7 digit positive numbers. Invalid: '{}'".format + + # Then try to cast list elements as integers, return True if ok + result = True + try: + for x in lst: + if not str(x).isdigit(): + result = False + + if (len(str(x)) != 5) and (len(str(x)) != 7): + result = False + + if int(x) <= 0: + result = False + + except Exception: + result = False + if errors == "raise": + raise ValueError(msg(x)) + elif errors == 'warn': + warnings.warn(msg(x)) + + if not result: + if errors == "raise": + raise ValueError(msg(x)) + elif errors == 'warn': + warnings.warn(msg(x)) + else: + return result + + +def check_cyc(lst, errors="raise"): + """ Validate a CYC option and returned it as a list of integers + + Parameters + ---------- + cyc: int + CYC must be an integer or an iterable with elements that can be casted as positive integers + errors: {'raise', 'warn', 'ignore'} + Possibly raises a ValueError exception or UserWarning, otherwise fails silently. + + Returns + ------- + list(int) + """ + is_cyc(lst, errors=errors) + + # Make sure we deal with a list + lst = to_list(lst) + + # Then cast list elements as integers + return [abs(int(x)) for x in lst] + + +def is_cyc(lst, errors="raise"): # noqa: C901 + """ Check if a CYC is valid + Parameters + ---------- + cyc: int, list(int), array(int) + CYC must be a single or a list of at most 4 digit positive numbers + errors: {'raise', 'warn', 'ignore'} + Possibly raises a ValueError exception or UserWarning, otherwise fails silently. + Returns + ------- + bool + True if cyc is indeed a list of integers + """ + # Make sure we deal with a list + lst = to_list(lst) + + # Error message: + msg = "CYC must be a single or a list of at most 4 digit positive numbers. Invalid: '{}'".format + + # Then try to cast list elements as integers, return True if ok + result = True + try: + for x in lst: + if not str(x).isdigit(): + result = False + + if (len(str(x)) > 4): + result = False + + if int(x) < 0: + result = False + + except Exception: + result = False + if errors == "raise": + raise ValueError(msg(x)) + elif errors == 'warn': + warnings.warn(msg(x)) + + if not result: + if errors == "raise": + raise ValueError(msg(x)) + elif errors == 'warn': + warnings.warn(msg(x)) + else: + return result + + +def check_index_cols(column_names: list, convention: str = 'ar_index_global_prof'): + """ + ar_index_global_prof.txt: Index of profile files + Profile directory file of the Argo Global Data Assembly Center + file,date,latitude,longitude,ocean,profiler_type,institution,date_update + + argo_bio-profile_index.txt: bgc Argo profiles index file + The directory file describes all individual bio-profile files of the argo GDAC ftp site. + file,date,latitude,longitude,ocean,profiler_type,institution,parameters,parameter_data_mode,date_update + """ + # Default for 'ar_index_global_prof' + ref = ['file', 'date', 'latitude', 'longitude', 'ocean', 'profiler_type', 'institution', + 'date_update'] + if convention == 'argo_bio-profile_index' or convention == 'argo_synthetic-profile_index': + ref = ['file', 'date', 'latitude', 'longitude', 'ocean', 'profiler_type', 'institution', + 'parameters', 'parameter_data_mode', 'date_update'] + + if not is_list_equal(column_names, ref): + # log.debug("Expected: %s, got: %s" % (";".join(ref), ";".join(column_names))) + raise InvalidDatasetStructure("Unexpected column names in this index !") + else: + return column_names From a5da1766b9483918f63c81c4430c0df4c899f32e Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 8 Sep 2023 09:50:47 +0200 Subject: [PATCH 14/33] [skip-ci] --- argopy/data_fetchers/erddap_data.py | 20 +- argopy/plot/dashboards.py | 3 +- argopy/plot/plot.py | 3 +- argopy/related/__init__.py | 5 + argopy/related/euroargo_api.py | 101 ++ argopy/stores/argo_index_pa.py | 2 +- argopy/stores/argo_index_pd.py | 2 +- argopy/tests/test_fetchers_data_argovis.py | 2 +- argopy/tests/test_fetchers_data_erddap.py | 2 +- argopy/tests/test_fetchers_data_erddap_bgc.py | 2 +- argopy/tests/test_fetchers_data_gdac.py | 3 +- argopy/tests/test_fetchers_facade_data.py | 2 +- argopy/tests/test_fetchers_index_gdac.py | 3 +- argopy/tests/test_fetchers_proto.py | 2 +- argopy/tests/test_stores_fsspec.py | 4 +- argopy/tests/test_stores_index.py | 2 +- argopy/tests/test_utilities.py | 180 +--- argopy/tests/test_utils_checkers.py | 179 ++++ argopy/utilities.py | 900 +----------------- argopy/utils/__init__.py | 27 + argopy/utils/casting.py | 376 ++++++++ argopy/utils/checkers.py | 97 +- argopy/utils/decorators.py | 154 +++ argopy/utils/lists.py | 198 ++++ argopy/xarray.py | 12 +- 25 files changed, 1178 insertions(+), 1103 deletions(-) create mode 100644 argopy/related/euroargo_api.py create mode 100644 argopy/tests/test_utils_checkers.py create mode 100644 argopy/utils/casting.py create mode 100644 argopy/utils/decorators.py create mode 100644 argopy/utils/lists.py diff --git a/argopy/data_fetchers/erddap_data.py b/argopy/data_fetchers/erddap_data.py index 9ac5d3b4..c3d3ea58 100644 --- a/argopy/data_fetchers/erddap_data.py +++ b/argopy/data_fetchers/erddap_data.py @@ -21,18 +21,16 @@ import getpass from typing import Union import fnmatch +from aiohttp import ClientResponseError +import logging from .proto import ArgoDataFetcherProto -from argopy.options import OPTIONS -from argopy.utilities import Chunker, format_oneline, to_list -from argopy.stores import httpstore +from ..options import OPTIONS +from ..utilities import Chunker, format_oneline +from ..stores import httpstore from ..errors import ErddapServerError, DataNotFound -from ..stores import ( - indexstore_pd as ArgoIndex, -) # make sure to work with the Pandas index store - -from aiohttp import ClientResponseError -import logging +from ..stores import indexstore_pd as ArgoIndex # make sure we work with the Pandas index store +from ..utils import is_list_of_strings, to_list # Load erddapy according to available version (breaking changes in v0.8.0) try: @@ -201,7 +199,7 @@ def __init__( # noqa: C901 raise ValueError() elif params[0] == "all": params = self._bgc_vlist_avail - elif not argopy.utilities.is_list_of_strings(params): + elif not is_list_of_strings(params): raise ValueError("'params' argument must be a list of strings") # raise ValueError("'params' argument must be a list of strings (possibly with a * wildcard)") self._bgc_vlist_requested = [p.upper() for p in params] @@ -222,7 +220,7 @@ def __init__( # noqa: C901 measured = [] elif self._bgc_measured[0] == "all": measured = self._bgc_vlist_requested - elif not argopy.utilities.is_list_of_strings(self._bgc_measured): + elif not is_list_of_strings(self._bgc_measured): raise ValueError("'measured' argument must be a list of strings") # raise ValueError("'measured' argument must be a list of strings (possibly with a * wildcard)") self._bgc_vlist_measured = [m.upper() for m in measured] diff --git a/argopy/plot/dashboards.py b/argopy/plot/dashboards.py index 6b513e1f..5f95d0f2 100644 --- a/argopy/plot/dashboards.py +++ b/argopy/plot/dashboards.py @@ -11,7 +11,8 @@ from packaging import version from .utils import has_ipython -from ..utilities import warnUnless, check_wmo, check_cyc, get_ea_profile_page +from ..utilities import warnUnless, get_ea_profile_page +from ..utils import check_wmo, check_cyc from ..errors import InvalidDashboard from .. import __version__ as argopy_version diff --git a/argopy/plot/plot.py b/argopy/plot/plot.py index d9e52c12..22248777 100644 --- a/argopy/plot/plot.py +++ b/argopy/plot/plot.py @@ -18,7 +18,8 @@ from .utils import axes_style, latlongrid, land_feature from .argo_colors import ArgoColors -from ..utilities import warnUnless, check_wmo +from ..utilities import warnUnless +from ..utils import check_wmo from ..errors import InvalidDatasetStructure if has_mpl: diff --git a/argopy/related/__init__.py b/argopy/related/__init__.py index 84c62f22..5c960c9d 100644 --- a/argopy/related/__init__.py +++ b/argopy/related/__init__.py @@ -3,6 +3,7 @@ from .reference_tables import ArgoNVSReferenceTables from .argo_documentation import ArgoDocs from .doi_snapshot import ArgoDOI +from .euroargo_api import get_coriolis_profile_id, get_ea_profile_page # @@ -13,4 +14,8 @@ "ArgoNVSReferenceTables", "ArgoDocs", "ArgoDOI", + + # Functions: + "get_coriolis_profile_id", + "get_ea_profile_page", ) diff --git a/argopy/related/euroargo_api.py b/argopy/related/euroargo_api.py new file mode 100644 index 00000000..b9e9b8b6 --- /dev/null +++ b/argopy/related/euroargo_api.py @@ -0,0 +1,101 @@ +import pandas as pd +from ..options import OPTIONS +from ..utils import check_wmo, check_cyc +from ..stores import httpstore + + +def get_coriolis_profile_id(WMO, CYC=None, **kwargs): + """ Return a :class:`pandas.DataFrame` with CORIOLIS ID of WMO/CYC profile pairs + + This method get ID by requesting the dataselection.euro-argo.eu trajectory API. + + Parameters + ---------- + WMO: int, list(int) + Define the list of Argo floats. This is a list of integers with WMO float identifiers. + WMO is the World Meteorological Organization. + CYC: int, list(int) + Define the list of cycle numbers to load ID for each Argo floats listed in ``WMO``. + + Returns + ------- + :class:`pandas.DataFrame` + """ + WMO_list = check_wmo(WMO) + if CYC is not None: + CYC_list = check_cyc(CYC) + if 'api_server' in kwargs: + api_server = kwargs['api_server'] + elif OPTIONS['server'] is not None: + api_server = OPTIONS['server'] + else: + api_server = "https://dataselection.euro-argo.eu/api" + URIs = [api_server + "/trajectory/%i" % wmo for wmo in WMO_list] + + def prec(data, url): + # Transform trajectory json to dataframe + # See: https://dataselection.euro-argo.eu/swagger-ui.html#!/cycle-controller/getCyclesByPlatformCodeUsingGET + WMO = check_wmo(url.split("/")[-1])[0] + rows = [] + for profile in data: + keys = [x for x in profile.keys() if x not in ["coordinate"]] + meta_row = dict((key, profile[key]) for key in keys) + for row in profile["coordinate"]: + meta_row[row] = profile["coordinate"][row] + meta_row["WMO"] = WMO + rows.append(meta_row) + return pd.DataFrame(rows) + + fs = httpstore(cache=True, cachedir=OPTIONS['cachedir']) + data = fs.open_mfjson(URIs, preprocess=prec, errors="raise", url_follow=True) + + # Merge results (list of dataframe): + key_map = { + "id": "ID", + "lat": "LATITUDE", + "lon": "LONGITUDE", + "cvNumber": "CYCLE_NUMBER", + "level": "level", + "WMO": "PLATFORM_NUMBER", + } + for i, df in enumerate(data): + df = df.reset_index() + df = df.rename(columns=key_map) + df = df[[value for value in key_map.values() if value in df.columns]] + data[i] = df + df = pd.concat(data, ignore_index=True) + df.sort_values(by=["PLATFORM_NUMBER", "CYCLE_NUMBER"], inplace=True) + df = df.reset_index(drop=True) + # df = df.set_index(["PLATFORM_NUMBER", "CYCLE_NUMBER"]) + df = df.astype({"ID": int}) + if CYC is not None: + df = pd.concat([df[df["CYCLE_NUMBER"] == cyc] for cyc in CYC_list]).reset_index( + drop=True + ) + return df[ + ["PLATFORM_NUMBER", "CYCLE_NUMBER", "ID", "LATITUDE", "LONGITUDE", "level"] + ] + + +def get_ea_profile_page(WMO, CYC=None, **kwargs): + """ Return a list of URL + + Parameters + ---------- + WMO: int, list(int) + WMO must be an integer or an iterable with elements that can be casted as integers + CYC: int, list(int), default (None) + CYC must be an integer or an iterable with elements that can be casted as positive integers + + Returns + ------- + list(str) + + See also + -------- + get_coriolis_profile_id + """ + df = get_coriolis_profile_id(WMO, CYC, **kwargs) + url = "https://dataselection.euro-argo.eu/cycle/{}" + return [url.format(this_id) for this_id in sorted(df["ID"])] + diff --git a/argopy/stores/argo_index_pa.py b/argopy/stores/argo_index_pa.py index 83d10ea4..df17b348 100644 --- a/argopy/stores/argo_index_pa.py +++ b/argopy/stores/argo_index_pa.py @@ -12,7 +12,7 @@ from packaging import version from ..errors import DataNotFound, InvalidDatasetStructure -from ..utilities import check_index_cols, is_indexbox, check_wmo, check_cyc, to_list +from ..utils import check_index_cols, is_indexbox, check_wmo, check_cyc, to_list from .argo_index_proto import ArgoIndexStoreProto try: import pyarrow.csv as csv # noqa: F401 diff --git a/argopy/stores/argo_index_pd.py b/argopy/stores/argo_index_pd.py index 5e69daed..7c00fca2 100644 --- a/argopy/stores/argo_index_pd.py +++ b/argopy/stores/argo_index_pd.py @@ -10,7 +10,7 @@ import gzip from ..errors import DataNotFound, InvalidDatasetStructure -from ..utilities import check_index_cols, is_indexbox, check_wmo, check_cyc, to_list +from ..utils import check_index_cols, is_indexbox, check_wmo, check_cyc, to_list from .argo_index_proto import ArgoIndexStoreProto diff --git a/argopy/tests/test_fetchers_data_argovis.py b/argopy/tests/test_fetchers_data_argovis.py index 6a083837..1db64d2b 100644 --- a/argopy/tests/test_fetchers_data_argovis.py +++ b/argopy/tests/test_fetchers_data_argovis.py @@ -13,7 +13,7 @@ CacheFileNotFound, FileSystemHasNoCache, ) -from argopy.utilities import is_list_of_strings +from argopy.utils import is_list_of_strings from utils import requires_connected_argovis, safe_to_server_errors diff --git a/argopy/tests/test_fetchers_data_erddap.py b/argopy/tests/test_fetchers_data_erddap.py index a0f299e6..eecf96a2 100644 --- a/argopy/tests/test_fetchers_data_erddap.py +++ b/argopy/tests/test_fetchers_data_erddap.py @@ -1,7 +1,7 @@ import logging from argopy import DataFetcher as ArgoDataFetcher -from argopy.utilities import is_list_of_strings +from argopy.utils import is_list_of_strings import pytest import xarray as xr diff --git a/argopy/tests/test_fetchers_data_erddap_bgc.py b/argopy/tests/test_fetchers_data_erddap_bgc.py index c01f230e..d3696e0a 100644 --- a/argopy/tests/test_fetchers_data_erddap_bgc.py +++ b/argopy/tests/test_fetchers_data_erddap_bgc.py @@ -2,7 +2,7 @@ import numpy as np from argopy import DataFetcher as ArgoDataFetcher -from argopy.utilities import is_list_of_strings +from argopy.utils import is_list_of_strings from argopy.stores import indexstore_pd as ArgoIndex # make sure to work with the Pandas index store with erddap-bgc import pytest diff --git a/argopy/tests/test_fetchers_data_gdac.py b/argopy/tests/test_fetchers_data_gdac.py index 4fd6c184..94faa7af 100644 --- a/argopy/tests/test_fetchers_data_gdac.py +++ b/argopy/tests/test_fetchers_data_gdac.py @@ -19,7 +19,8 @@ FileSystemHasNoCache, FtpPathError, ) -from argopy.utilities import is_list_of_strings, isconnected +from argopy.utilities import isconnected +from argopy.utils import is_list_of_strings from utils import requires_gdac from mocked_http import mocked_httpserver, mocked_server_address from collections import ChainMap diff --git a/argopy/tests/test_fetchers_facade_data.py b/argopy/tests/test_fetchers_facade_data.py index 944a1611..17e0646c 100644 --- a/argopy/tests/test_fetchers_facade_data.py +++ b/argopy/tests/test_fetchers_facade_data.py @@ -10,7 +10,7 @@ InvalidFetcher, OptionValueError, ) -from argopy.utilities import is_list_of_strings +from argopy.utils import is_list_of_strings from utils import ( requires_fetcher, requires_connection, diff --git a/argopy/tests/test_fetchers_index_gdac.py b/argopy/tests/test_fetchers_index_gdac.py index efd73afc..d5f23200 100644 --- a/argopy/tests/test_fetchers_index_gdac.py +++ b/argopy/tests/test_fetchers_index_gdac.py @@ -13,7 +13,8 @@ FileSystemHasNoCache, FtpPathError ) -from argopy.utilities import is_list_of_strings, isconnected +from argopy.utilities import isconnected +from argopy.utils import is_list_of_strings from utils import requires_gdac from mocked_http import mocked_httpserver, mocked_server_address diff --git a/argopy/tests/test_fetchers_proto.py b/argopy/tests/test_fetchers_proto.py index 0912cad7..24f669ff 100644 --- a/argopy/tests/test_fetchers_proto.py +++ b/argopy/tests/test_fetchers_proto.py @@ -1,7 +1,7 @@ import pytest import xarray from argopy.data_fetchers.proto import ArgoDataFetcherProto -from argopy.utilities import to_list +from argopy.utils import to_list class Fetcher(ArgoDataFetcherProto): diff --git a/argopy/tests/test_stores_fsspec.py b/argopy/tests/test_stores_fsspec.py index 1ad338ae..b2766d5b 100644 --- a/argopy/tests/test_stores_fsspec.py +++ b/argopy/tests/test_stores_fsspec.py @@ -30,9 +30,11 @@ DataNotFound, ) from argopy.utilities import ( + modified_environ, +) +from argopy.utils import ( is_list_of_datasets, is_list_of_dicts, - modified_environ, ) from utils import requires_connection, requires_connected_argovis from mocked_http import mocked_httpserver, mocked_server_address diff --git a/argopy/tests/test_stores_index.py b/argopy/tests/test_stores_index.py index 49b791ad..36823bf0 100644 --- a/argopy/tests/test_stores_index.py +++ b/argopy/tests/test_stores_index.py @@ -16,7 +16,7 @@ OptionValueError, InvalidDatasetStructure, ) -from argopy.utilities import ( +from argopy.utils import ( is_list_of_strings, ) from argopy.stores.argo_index_pd import indexstore_pandas diff --git a/argopy/tests/test_utilities.py b/argopy/tests/test_utilities.py index 24d37cb5..4f691037 100644 --- a/argopy/tests/test_utilities.py +++ b/argopy/tests/test_utilities.py @@ -20,11 +20,7 @@ erddap_ds_exists, linear_interpolation_remap, Chunker, - is_box, - is_list_of_strings, - format_oneline, is_indexbox, - check_wmo, is_wmo, - check_cyc, is_cyc, + format_oneline, wmo2box, modified_environ, wrap_longitude, @@ -35,6 +31,10 @@ get_coriolis_profile_id, get_ea_profile_page, ) +from argopy.utils import ( + is_box, + is_list_of_strings, +) from argopy.errors import InvalidFetcherAccessPoint, FtpPathError from argopy import DataFetcher as ArgoDataFetcher from utils import ( @@ -394,104 +394,6 @@ def test_chunk_box4d(self): ) -class Test_is_box: - @pytest.fixture(autouse=True) - def create_data(self): - self.BOX3d = [0, 20, 40, 60, 0, 1000] - self.BOX4d = [0, 20, 40, 60, 0, 1000, "2001-01", "2001-6"] - - def test_box_ok(self): - assert is_box(self.BOX3d) - assert is_box(self.BOX4d) - - def test_box_notok(self): - for box in [[], list(range(0, 12))]: - with pytest.raises(ValueError): - is_box(box) - with pytest.raises(ValueError): - is_box(box, errors="raise") - assert not is_box(box, errors="ignore") - - def test_box_invalid_num(self): - for i in [0, 1, 2, 3, 4, 5]: - box = self.BOX3d - box[i] = "str" - with pytest.raises(ValueError): - is_box(box) - with pytest.raises(ValueError): - is_box(box, errors="raise") - assert not is_box(box, errors="ignore") - - def test_box_invalid_range(self): - for i in [0, 1, 2, 3, 4, 5]: - box = self.BOX3d - box[i] = -1000 - with pytest.raises(ValueError): - is_box(box) - with pytest.raises(ValueError): - is_box(box, errors="raise") - assert not is_box(box, errors="ignore") - - def test_box_invalid_str(self): - for i in [6, 7]: - box = self.BOX4d - box[i] = "str" - with pytest.raises(ValueError): - is_box(box) - with pytest.raises(ValueError): - is_box(box, errors="raise") - assert not is_box(box, errors="ignore") - - -class Test_is_indexbox: - @pytest.fixture(autouse=True) - def create_data(self): - self.BOX2d = [0, 20, 40, 60] - self.BOX3d = [0, 20, 40, 60, "2001-01", "2001-6"] - - def test_box_ok(self): - assert is_indexbox(self.BOX2d) - assert is_indexbox(self.BOX3d) - - def test_box_notok(self): - for box in [[], list(range(0, 12))]: - with pytest.raises(ValueError): - is_indexbox(box) - with pytest.raises(ValueError): - is_indexbox(box, errors="raise") - assert not is_indexbox(box, errors="ignore") - - def test_box_invalid_num(self): - for i in [0, 1, 2, 3]: - box = self.BOX2d - box[i] = "str" - with pytest.raises(ValueError): - is_indexbox(box) - with pytest.raises(ValueError): - is_indexbox(box, errors="raise") - assert not is_indexbox(box, errors="ignore") - - def test_box_invalid_range(self): - for i in [0, 1, 2, 3]: - box = self.BOX2d - box[i] = -1000 - with pytest.raises(ValueError): - is_indexbox(box) - with pytest.raises(ValueError): - is_indexbox(box, errors="raise") - assert not is_indexbox(box, errors="ignore") - - def test_box_invalid_str(self): - for i in [4, 5]: - box = self.BOX3d - box[i] = "str" - with pytest.raises(ValueError): - is_indexbox(box) - with pytest.raises(ValueError): - is_indexbox(box, errors="raise") - assert not is_indexbox(box, errors="ignore") - - def test_format_oneline(): s = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore" assert isinstance(format_oneline(s), str) @@ -500,78 +402,6 @@ def test_format_oneline(): assert isinstance(s, str) and len(s) == 12 -def test_is_wmo(): - assert is_wmo(12345) - assert is_wmo([12345]) - assert is_wmo([12345, 1234567]) - - with pytest.raises(ValueError): - is_wmo(1234, errors="raise") - with pytest.raises(ValueError): - is_wmo(-1234, errors="raise") - with pytest.raises(ValueError): - is_wmo(1234.12, errors="raise") - with pytest.raises(ValueError): - is_wmo(12345.7, errors="raise") - - with pytest.warns(UserWarning): - is_wmo(1234, errors="warn") - with pytest.warns(UserWarning): - is_wmo(-1234, errors="warn") - with pytest.warns(UserWarning): - is_wmo(1234.12, errors="warn") - with pytest.warns(UserWarning): - is_wmo(12345.7, errors="warn") - - assert not is_wmo(12, errors="ignore") - assert not is_wmo(-12, errors="ignore") - assert not is_wmo(1234.12, errors="ignore") - assert not is_wmo(12345.7, errors="ignore") - - -def test_check_wmo(): - assert check_wmo(12345) == [12345] - assert check_wmo([1234567]) == [1234567] - assert check_wmo([12345, 1234567]) == [12345, 1234567] - assert check_wmo(np.array((12345, 1234567), dtype='int')) == [12345, 1234567] - - -def test_is_cyc(): - assert is_cyc(123) - assert is_cyc([123]) - assert is_cyc([12, 123, 1234]) - - with pytest.raises(ValueError): - is_cyc(12345, errors="raise") - with pytest.raises(ValueError): - is_cyc(-1234, errors="raise") - with pytest.raises(ValueError): - is_cyc(1234.12, errors="raise") - with pytest.raises(ValueError): - is_cyc(12345.7, errors="raise") - - with pytest.warns(UserWarning): - is_cyc(12345, errors="warn") - with pytest.warns(UserWarning): - is_cyc(-1234, errors="warn") - with pytest.warns(UserWarning): - is_cyc(1234.12, errors="warn") - with pytest.warns(UserWarning): - is_cyc(12345.7, errors="warn") - - assert not is_cyc(12345, errors="ignore") - assert not is_cyc(-12, errors="ignore") - assert not is_cyc(1234.12, errors="ignore") - assert not is_cyc(12345.7, errors="ignore") - - -def test_check_cyc(): - assert check_cyc(123) == [123] - assert check_cyc([12]) == [12] - assert check_cyc([12, 123]) == [12, 123] - assert check_cyc(np.array((123, 1234), dtype='int')) == [123, 1234] - - def test_modified_environ(): os.environ["DUMMY_ENV_ARGOPY"] = 'initial' with modified_environ(DUMMY_ENV_ARGOPY='toto'): diff --git a/argopy/tests/test_utils_checkers.py b/argopy/tests/test_utils_checkers.py new file mode 100644 index 00000000..5ccc14af --- /dev/null +++ b/argopy/tests/test_utils_checkers.py @@ -0,0 +1,179 @@ +import pytest +import numpy as np + +from argopy.utils import ( + is_box, is_indexbox, + check_wmo, is_wmo, + check_cyc, is_cyc, +) + + +class Test_is_box: + @pytest.fixture(autouse=True) + def create_data(self): + self.BOX3d = [0, 20, 40, 60, 0, 1000] + self.BOX4d = [0, 20, 40, 60, 0, 1000, "2001-01", "2001-6"] + + def test_box_ok(self): + assert is_box(self.BOX3d) + assert is_box(self.BOX4d) + + def test_box_notok(self): + for box in [[], list(range(0, 12))]: + with pytest.raises(ValueError): + is_box(box) + with pytest.raises(ValueError): + is_box(box, errors="raise") + assert not is_box(box, errors="ignore") + + def test_box_invalid_num(self): + for i in [0, 1, 2, 3, 4, 5]: + box = self.BOX3d + box[i] = "str" + with pytest.raises(ValueError): + is_box(box) + with pytest.raises(ValueError): + is_box(box, errors="raise") + assert not is_box(box, errors="ignore") + + def test_box_invalid_range(self): + for i in [0, 1, 2, 3, 4, 5]: + box = self.BOX3d + box[i] = -1000 + with pytest.raises(ValueError): + is_box(box) + with pytest.raises(ValueError): + is_box(box, errors="raise") + assert not is_box(box, errors="ignore") + + def test_box_invalid_str(self): + for i in [6, 7]: + box = self.BOX4d + box[i] = "str" + with pytest.raises(ValueError): + is_box(box) + with pytest.raises(ValueError): + is_box(box, errors="raise") + assert not is_box(box, errors="ignore") + + +class Test_is_indexbox: + @pytest.fixture(autouse=True) + def create_data(self): + self.BOX2d = [0, 20, 40, 60] + self.BOX3d = [0, 20, 40, 60, "2001-01", "2001-6"] + + def test_box_ok(self): + assert is_indexbox(self.BOX2d) + assert is_indexbox(self.BOX3d) + + def test_box_notok(self): + for box in [[], list(range(0, 12))]: + with pytest.raises(ValueError): + is_indexbox(box) + with pytest.raises(ValueError): + is_indexbox(box, errors="raise") + assert not is_indexbox(box, errors="ignore") + + def test_box_invalid_num(self): + for i in [0, 1, 2, 3]: + box = self.BOX2d + box[i] = "str" + with pytest.raises(ValueError): + is_indexbox(box) + with pytest.raises(ValueError): + is_indexbox(box, errors="raise") + assert not is_indexbox(box, errors="ignore") + + def test_box_invalid_range(self): + for i in [0, 1, 2, 3]: + box = self.BOX2d + box[i] = -1000 + with pytest.raises(ValueError): + is_indexbox(box) + with pytest.raises(ValueError): + is_indexbox(box, errors="raise") + assert not is_indexbox(box, errors="ignore") + + def test_box_invalid_str(self): + for i in [4, 5]: + box = self.BOX3d + box[i] = "str" + with pytest.raises(ValueError): + is_indexbox(box) + with pytest.raises(ValueError): + is_indexbox(box, errors="raise") + assert not is_indexbox(box, errors="ignore") + + +def test_is_wmo(): + assert is_wmo(12345) + assert is_wmo([12345]) + assert is_wmo([12345, 1234567]) + + with pytest.raises(ValueError): + is_wmo(1234, errors="raise") + with pytest.raises(ValueError): + is_wmo(-1234, errors="raise") + with pytest.raises(ValueError): + is_wmo(1234.12, errors="raise") + with pytest.raises(ValueError): + is_wmo(12345.7, errors="raise") + + with pytest.warns(UserWarning): + is_wmo(1234, errors="warn") + with pytest.warns(UserWarning): + is_wmo(-1234, errors="warn") + with pytest.warns(UserWarning): + is_wmo(1234.12, errors="warn") + with pytest.warns(UserWarning): + is_wmo(12345.7, errors="warn") + + assert not is_wmo(12, errors="ignore") + assert not is_wmo(-12, errors="ignore") + assert not is_wmo(1234.12, errors="ignore") + assert not is_wmo(12345.7, errors="ignore") + + +def test_check_wmo(): + assert check_wmo(12345) == [12345] + assert check_wmo([1234567]) == [1234567] + assert check_wmo([12345, 1234567]) == [12345, 1234567] + assert check_wmo(np.array((12345, 1234567), dtype='int')) == [12345, 1234567] + + +def test_is_cyc(): + assert is_cyc(123) + assert is_cyc([123]) + assert is_cyc([12, 123, 1234]) + + with pytest.raises(ValueError): + is_cyc(12345, errors="raise") + with pytest.raises(ValueError): + is_cyc(-1234, errors="raise") + with pytest.raises(ValueError): + is_cyc(1234.12, errors="raise") + with pytest.raises(ValueError): + is_cyc(12345.7, errors="raise") + + with pytest.warns(UserWarning): + is_cyc(12345, errors="warn") + with pytest.warns(UserWarning): + is_cyc(-1234, errors="warn") + with pytest.warns(UserWarning): + is_cyc(1234.12, errors="warn") + with pytest.warns(UserWarning): + is_cyc(12345.7, errors="warn") + + assert not is_cyc(12345, errors="ignore") + assert not is_cyc(-12, errors="ignore") + assert not is_cyc(1234.12, errors="ignore") + assert not is_cyc(12345.7, errors="ignore") + + +def test_check_cyc(): + assert check_cyc(123) == [123] + assert check_cyc([12]) == [12] + assert check_cyc([12, 123]) == [12, 123] + assert check_cyc(np.array((123, 1234), dtype='int')) == [123, 1234] + diff --git a/argopy/utilities.py b/argopy/utilities.py index 396abe0b..955f3250 100644 --- a/argopy/utilities.py +++ b/argopy/utilities.py @@ -81,10 +81,6 @@ log = logging.getLogger("argopy.utilities") -with open(os.path.join(path2assets, "data_types.json"), "r") as f: - DATA_TYPES = json.load(f) - - def clear_cache(fs=None): """ Delete argopy cache folder content """ @@ -223,6 +219,7 @@ def load_dict(ptype): else: raise ValueError("Invalid dictionary name") + def mapp_dict(Adictionnary, Avalue): if Avalue not in Adictionnary: return "Unknown" @@ -230,202 +227,8 @@ def mapp_dict(Adictionnary, Avalue): return Adictionnary[Avalue] -def list_available_data_src(): - """ List all available data sources """ - sources = {} - try: - from .data_fetchers import erddap_data as Erddap_Fetchers - # Ensure we're loading the erddap data fetcher with the current options: - Erddap_Fetchers.api_server_check = Erddap_Fetchers.api_server_check.replace(Erddap_Fetchers.api_server, OPTIONS['erddap']) - Erddap_Fetchers.api_server = OPTIONS['erddap'] - - sources["erddap"] = Erddap_Fetchers - except Exception: - warnings.warn( - "An error occurred while loading the ERDDAP data fetcher, " - "it will not be available !\n%s\n%s" - % (sys.exc_info()[0], sys.exc_info()[1]) - ) - pass - - try: - from .data_fetchers import argovis_data as ArgoVis_Fetchers - - sources["argovis"] = ArgoVis_Fetchers - except Exception: - warnings.warn( - "An error occurred while loading the ArgoVis data fetcher, " - "it will not be available !\n%s\n%s" - % (sys.exc_info()[0], sys.exc_info()[1]) - ) - pass - - try: - from .data_fetchers import gdacftp_data as GDAC_Fetchers - # Ensure we're loading the gdac data fetcher with the current options: - GDAC_Fetchers.api_server_check = OPTIONS['ftp'] - GDAC_Fetchers.api_server = OPTIONS['ftp'] - - sources["gdac"] = GDAC_Fetchers - except Exception: - warnings.warn( - "An error occurred while loading the GDAC data fetcher, " - "it will not be available !\n%s\n%s" - % (sys.exc_info()[0], sys.exc_info()[1]) - ) - pass - - # return dict(sorted(sources.items())) - return sources - - -def list_available_index_src(): - """ List all available index sources """ - sources = {} - try: - from .data_fetchers import erddap_index as Erddap_Fetchers - # Ensure we're loading the erddap data fetcher with the current options: - Erddap_Fetchers.api_server_check = Erddap_Fetchers.api_server_check.replace(Erddap_Fetchers.api_server, OPTIONS['erddap']) - Erddap_Fetchers.api_server = OPTIONS['erddap'] - - sources["erddap"] = Erddap_Fetchers - except Exception: - warnings.warn( - "An error occurred while loading the ERDDAP index fetcher, " - "it will not be available !\n%s\n%s" - % (sys.exc_info()[0], sys.exc_info()[1]) - ) - pass - - try: - from .data_fetchers import gdacftp_index as GDAC_Fetchers - # Ensure we're loading the gdac data fetcher with the current options: - GDAC_Fetchers.api_server_check = OPTIONS['ftp'] - GDAC_Fetchers.api_server = OPTIONS['ftp'] - - sources["gdac"] = GDAC_Fetchers - except Exception: - warnings.warn( - "An error occurred while loading the GDAC index fetcher, " - "it will not be available !\n%s\n%s" - % (sys.exc_info()[0], sys.exc_info()[1]) - ) - pass - - return sources - - -def list_standard_variables(): - """ List of variables for standard users """ - return [ - "DATA_MODE", - "LATITUDE", - "LONGITUDE", - "POSITION_QC", - "DIRECTION", - "PLATFORM_NUMBER", - "CYCLE_NUMBER", - "PRES", - "TEMP", - "PSAL", - "PRES_QC", - "TEMP_QC", - "PSAL_QC", - "PRES_ADJUSTED", - "TEMP_ADJUSTED", - "PSAL_ADJUSTED", - "PRES_ADJUSTED_QC", - "TEMP_ADJUSTED_QC", - "PSAL_ADJUSTED_QC", - "PRES_ADJUSTED_ERROR", - "TEMP_ADJUSTED_ERROR", - "PSAL_ADJUSTED_ERROR", - "PRES_ERROR", # can be created from PRES_ADJUSTED_ERROR after a filter_data_mode - "TEMP_ERROR", - "PSAL_ERROR", - "JULD", - "JULD_QC", - "TIME", - "TIME_QC", - # "CONFIG_MISSION_NUMBER", - ] - - -def list_multiprofile_file_variables(): - """ List of variables in a netcdf multiprofile file. - - This is for files created by GDAC under //_prof.nc - """ - return [ - "CONFIG_MISSION_NUMBER", - "CYCLE_NUMBER", - "DATA_CENTRE", - "DATA_MODE", - "DATA_STATE_INDICATOR", - "DATA_TYPE", - "DATE_CREATION", - "DATE_UPDATE", - "DC_REFERENCE", - "DIRECTION", - "FIRMWARE_VERSION", - "FLOAT_SERIAL_NO", - "FORMAT_VERSION", - "HANDBOOK_VERSION", - "HISTORY_ACTION", - "HISTORY_DATE", - "HISTORY_INSTITUTION", - "HISTORY_PARAMETER", - "HISTORY_PREVIOUS_VALUE", - "HISTORY_QCTEST", - "HISTORY_REFERENCE", - "HISTORY_SOFTWARE", - "HISTORY_SOFTWARE_RELEASE", - "HISTORY_START_PRES", - "HISTORY_STEP", - "HISTORY_STOP_PRES", - "JULD", - "JULD_LOCATION", - "JULD_QC", - "LATITUDE", - "LONGITUDE", - "PARAMETER", - "PI_NAME", - "PLATFORM_NUMBER", - "PLATFORM_TYPE", - "POSITIONING_SYSTEM", - "POSITION_QC", - "PRES", - "PRES_ADJUSTED", - "PRES_ADJUSTED_ERROR", - "PRES_ADJUSTED_QC", - "PRES_QC", - "PROFILE_PRES_QC", - "PROFILE_PSAL_QC", - "PROFILE_TEMP_QC", - "PROJECT_NAME", - "PSAL", - "PSAL_ADJUSTED", - "PSAL_ADJUSTED_ERROR", - "PSAL_ADJUSTED_QC", - "PSAL_QC", - "REFERENCE_DATE_TIME", - "SCIENTIFIC_CALIB_COEFFICIENT", - "SCIENTIFIC_CALIB_COMMENT", - "SCIENTIFIC_CALIB_DATE", - "SCIENTIFIC_CALIB_EQUATION", - "STATION_PARAMETERS", - "TEMP", - "TEMP_ADJUSTED", - "TEMP_ADJUSTED_ERROR", - "TEMP_ADJUSTED_QC", - "TEMP_QC", - "VERTICAL_SAMPLING_SCHEME", - "WMO_INST_TYPE", - ] - - def get_sys_info(): - "Returns system information as a dict" + """Returns system information as a dict""" blob = [] @@ -625,94 +428,6 @@ def show_options(file=sys.stdout): # noqa: C901 print(f"{k}: {v}", file=file) -def check_gdac_path(path, errors='ignore'): # noqa: C901 - """ Check if a path has the expected GDAC ftp structure - - Expected GDAC ftp structure:: - - . - └── dac - ├── aoml - ├── ... - ├── coriolis - ├── ... - ├── meds - └── nmdis - - This check will return True if at least one DAC sub-folder is found under path/dac/ - - Examples:: - >>> check_gdac_path("https://data-argo.ifremer.fr") # True - >>> check_gdac_path("ftp://ftp.ifremer.fr/ifremer/argo") # True - >>> check_gdac_path("ftp://usgodae.org/pub/outgoing/argo") # True - >>> check_gdac_path("/home/ref-argo/gdac") # True - >>> check_gdac_path("https://www.ifremer.fr") # False - >>> check_gdac_path("ftp://usgodae.org/pub/outgoing") # False - - Parameters - ---------- - path: str - Path name to check, including access protocol - errors: str - "ignore" or "raise" (or "warn") - - Returns - ------- - checked: boolean - True if at least one DAC folder is found under path/dac/ - False otherwise - """ - # Create a file system for this path - if split_protocol(path)[0] is None: - fs = fsspec.filesystem('file') - elif 'https' in split_protocol(path)[0]: - fs = fsspec.filesystem('http') - elif 'ftp' in split_protocol(path)[0]: - try: - host = split_protocol(path)[-1].split('/')[0] - fs = fsspec.filesystem('ftp', host=host) - except gaierror: - if errors == 'raise': - raise FtpPathError("Can't get address info (GAIerror) on '%s'" % host) - elif errors == "warn": - warnings.warn("Can't get address info (GAIerror) on '%s'" % host) - return False - else: - return False - else: - raise FtpPathError("Unknown protocol for an Argo GDAC host: %s" % split_protocol(path)[0]) - - # dacs = [ - # "aoml", - # "bodc", - # "coriolis", - # "csio", - # "csiro", - # "incois", - # "jma", - # "kma", - # "kordi", - # "meds", - # "nmdis", - # ] - - # Case 1: - check1 = ( - fs.exists(path) - and fs.exists(fs.sep.join([path, "dac"])) - # and np.any([fs.exists(fs.sep.join([path, "dac", dac])) for dac in dacs]) # Take too much time on http/ftp GDAC server - ) - if check1: - return True - elif errors == "raise": - raise FtpPathError("This path is not GDAC compliant (no `dac` folder with legitimate sub-folder):\n%s" % path) - - elif errors == "warn": - warnings.warn("This path is not GDAC compliant:\n%s" % path) - return False - else: - return False - def isconnected(host: str = "https://www.ifremer.fr", maxtry: int = 10): """Check if an URL is alive @@ -1372,16 +1087,6 @@ def format_oneline(s, max_width=65): return s -def to_list(obj): - """Make sure that an expected list is indeed a list""" - if not isinstance(obj, list): - if isinstance(obj, np.ndarray): - obj = list(obj) - else: - obj = [obj] - return obj - - def warnUnless(ok, txt): """Function to raise a warning unless condition is True @@ -1843,157 +1548,6 @@ def fix_localhost(host): return dict(sorted(output.items())) -class DocInherit(object): - """Docstring inheriting method descriptor - - The class itself is also used as a decorator - - Usage: - - class Foo(object): - def foo(self): - "Frobber" - pass - - class Bar(Foo): - @doc_inherit - def foo(self): - pass - - Now, Bar.foo.__doc__ == Bar().foo.__doc__ == Foo.foo.__doc__ == "Frobber" - - src: https://code.activestate.com/recipes/576862/ - """ - - def __init__(self, mthd): - self.mthd = mthd - self.name = mthd.__name__ - - def __get__(self, obj, cls): - if obj: - return self.get_with_inst(obj, cls) - else: - return self.get_no_inst(cls) - - def get_with_inst(self, obj, cls): - - overridden = getattr(super(cls, obj), self.name, None) - - @wraps(self.mthd, assigned=('__name__', '__module__')) - def f(*args, **kwargs): - return self.mthd(obj, *args, **kwargs) - - return self.use_parent_doc(f, overridden) - - def get_no_inst(self, cls): - - for parent in cls.__mro__[1:]: - overridden = getattr(parent, self.name, None) - if overridden: - break - - @wraps(self.mthd, assigned=('__name__', '__module__')) - def f(*args, **kwargs): - return self.mthd(*args, **kwargs) - - return self.use_parent_doc(f, overridden) - - def use_parent_doc(self, func, source): - if source is None: - raise NameError("Can't find '%s' in parents" % self.name) - func.__doc__ = source.__doc__ - return func - - -doc_inherit = DocInherit - - -def deprecated(reason): - """Deprecation warning decorator. - - This is a decorator which can be used to mark functions - as deprecated. It will result in a warning being emitted - when the function is used. - - Parameters - ---------- - reason: {str, None} - Text message to send with deprecation warning - - Examples - -------- - The @deprecated can be used with a 'reason'. - - .. code-block:: python - - @deprecated("please, use another function") - def old_function(x, y): - pass - - or without: - - .. code-block:: python - - @deprecated - def old_function(x, y): - pass - - References - ---------- - https://stackoverflow.com/a/40301488 - """ - import inspect - - if isinstance(reason, str): - - def decorator(func1): - - if inspect.isclass(func1): - fmt1 = "Call to deprecated class {name} ({reason})." - else: - fmt1 = "Call to deprecated function {name} ({reason})." - - @wraps(func1) - def new_func1(*args, **kwargs): - warnings.simplefilter('always', DeprecationWarning) - warnings.warn( - fmt1.format(name=func1.__name__, reason=reason), - category=DeprecationWarning, - stacklevel=2 - ) - warnings.simplefilter('default', DeprecationWarning) - return func1(*args, **kwargs) - - return new_func1 - - return decorator - - elif inspect.isclass(reason) or inspect.isfunction(reason): - - func2 = reason - - if inspect.isclass(func2): - fmt2 = "Call to deprecated class {name}." - else: - fmt2 = "Call to deprecated function {name}." - - @wraps(func2) - def new_func2(*args, **kwargs): - warnings.simplefilter('always', DeprecationWarning) - warnings.warn( - fmt2.format(name=func2.__name__), - category=DeprecationWarning, - stacklevel=2 - ) - warnings.simplefilter('default', DeprecationWarning) - return func2(*args, **kwargs) - - return new_func2 - - else: - raise TypeError(repr(type(reason))) - - class RegistryItem(ABC): """Prototype for possible custom items in a Registry""" @property @@ -2246,456 +1800,6 @@ def copy(self): return self.__copy__() -def get_coriolis_profile_id(WMO, CYC=None, **kwargs): - """ Return a :class:`pandas.DataFrame` with CORIOLIS ID of WMO/CYC profile pairs - - This method get ID by requesting the dataselection.euro-argo.eu trajectory API. - - Parameters - ---------- - WMO: int, list(int) - Define the list of Argo floats. This is a list of integers with WMO float identifiers. - WMO is the World Meteorological Organization. - CYC: int, list(int) - Define the list of cycle numbers to load ID for each Argo floats listed in ``WMO``. - - Returns - ------- - :class:`pandas.DataFrame` - """ - WMO_list = check_wmo(WMO) - if CYC is not None: - CYC_list = check_cyc(CYC) - if 'api_server' in kwargs: - api_server = kwargs['api_server'] - elif OPTIONS['server'] is not None: - api_server = OPTIONS['server'] - else: - api_server = "https://dataselection.euro-argo.eu/api" - URIs = [api_server + "/trajectory/%i" % wmo for wmo in WMO_list] - - def prec(data, url): - # Transform trajectory json to dataframe - # See: https://dataselection.euro-argo.eu/swagger-ui.html#!/cycle-controller/getCyclesByPlatformCodeUsingGET - WMO = check_wmo(url.split("/")[-1])[0] - rows = [] - for profile in data: - keys = [x for x in profile.keys() if x not in ["coordinate"]] - meta_row = dict((key, profile[key]) for key in keys) - for row in profile["coordinate"]: - meta_row[row] = profile["coordinate"][row] - meta_row["WMO"] = WMO - rows.append(meta_row) - return pd.DataFrame(rows) - - from .stores import httpstore - fs = httpstore(cache=True, cachedir=OPTIONS['cachedir']) - data = fs.open_mfjson(URIs, preprocess=prec, errors="raise", url_follow=True) - - # Merge results (list of dataframe): - key_map = { - "id": "ID", - "lat": "LATITUDE", - "lon": "LONGITUDE", - "cvNumber": "CYCLE_NUMBER", - "level": "level", - "WMO": "PLATFORM_NUMBER", - } - for i, df in enumerate(data): - df = df.reset_index() - df = df.rename(columns=key_map) - df = df[[value for value in key_map.values() if value in df.columns]] - data[i] = df - df = pd.concat(data, ignore_index=True) - df.sort_values(by=["PLATFORM_NUMBER", "CYCLE_NUMBER"], inplace=True) - df = df.reset_index(drop=True) - # df = df.set_index(["PLATFORM_NUMBER", "CYCLE_NUMBER"]) - df = df.astype({"ID": int}) - if CYC is not None: - df = pd.concat([df[df["CYCLE_NUMBER"] == cyc] for cyc in CYC_list]).reset_index( - drop=True - ) - return df[ - ["PLATFORM_NUMBER", "CYCLE_NUMBER", "ID", "LATITUDE", "LONGITUDE", "level"] - ] - - -def get_ea_profile_page(WMO, CYC=None, **kwargs): - """ Return a list of URL - - Parameters - ---------- - WMO: int, list(int) - WMO must be an integer or an iterable with elements that can be casted as integers - CYC: int, list(int), default (None) - CYC must be an integer or an iterable with elements that can be casted as positive integers - - Returns - ------- - list(str) - - See also - -------- - get_coriolis_profile_id - """ - df = get_coriolis_profile_id(WMO, CYC, **kwargs) - url = "https://dataselection.euro-argo.eu/cycle/{}" - return [url.format(this_id) for this_id in sorted(df["ID"])] - - -@deprecated -def cast_types(ds): # noqa: C901 - """ Make sure variables are of the appropriate types according to Argo - - #todo: This is hard coded, but should be retrieved from an API somewhere. - Should be able to handle all possible variables encountered in the Argo dataset. - - Parameter - --------- - :class:`xarray.DataSet` - - Returns - ------- - :class:`xarray.DataSet` - """ - - list_str = [ - "PLATFORM_NUMBER", - "DATA_MODE", - "DIRECTION", - "DATA_CENTRE", - "DATA_TYPE", - "FORMAT_VERSION", - "HANDBOOK_VERSION", - "PROJECT_NAME", - "PI_NAME", - "STATION_PARAMETERS", - "DATA_CENTER", - "DC_REFERENCE", - "DATA_STATE_INDICATOR", - "PLATFORM_TYPE", - "FIRMWARE_VERSION", - "POSITIONING_SYSTEM", - "PROFILE_PRES_QC", - "PROFILE_PSAL_QC", - "PROFILE_TEMP_QC", - "PARAMETER", - "SCIENTIFIC_CALIB_EQUATION", - "SCIENTIFIC_CALIB_COEFFICIENT", - "SCIENTIFIC_CALIB_COMMENT", - "HISTORY_INSTITUTION", - "HISTORY_STEP", - "HISTORY_SOFTWARE", - "HISTORY_SOFTWARE_RELEASE", - "HISTORY_REFERENCE", - "HISTORY_QCTEST", - "HISTORY_ACTION", - "HISTORY_PARAMETER", - "VERTICAL_SAMPLING_SCHEME", - "FLOAT_SERIAL_NO", - "SOURCE", - "EXPOCODE", - "QCLEVEL", - ] - list_int = [ - "PLATFORM_NUMBER", - "WMO_INST_TYPE", - "WMO_INST_TYPE", - "CYCLE_NUMBER", - "CONFIG_MISSION_NUMBER", - ] - list_datetime = [ - "REFERENCE_DATE_TIME", - "DATE_CREATION", - "DATE_UPDATE", - "JULD", - "JULD_LOCATION", - "SCIENTIFIC_CALIB_DATE", - "HISTORY_DATE", - "TIME" - ] - - def fix_weird_bytes(x): - x = x.replace(b"\xb1", b"+/-") - return x - fix_weird_bytes = np.vectorize(fix_weird_bytes) - - def cast_this(da, type): - """ Low-level casting of DataArray values """ - try: - da.values = da.values.astype(type) - da.attrs["casted"] = 1 - except Exception: - msg = "Oops! %s occurred. Fail to cast <%s> into %s for: %s. Encountered unique values: %s" % (sys.exc_info()[0], str(da.dtype), type, da.name, str(np.unique(da))) - log.debug(msg) - return da - - def cast_this_da(da): - """ Cast any DataArray """ - v = da.name - da.attrs["casted"] = 0 - if v in list_str and da.dtype == "O": # Object - if v in ["SCIENTIFIC_CALIB_COEFFICIENT"]: - da.values = fix_weird_bytes(da.values) - da = cast_this(da, str) - - if v in list_int: # and da.dtype == 'O': # Object - da = cast_this(da, np.int32) - - if v in list_datetime and da.dtype == "O": # Object - if ( - "conventions" in da.attrs - and da.attrs["conventions"] == "YYYYMMDDHHMISS" - ): - if da.size != 0: - if len(da.dims) <= 1: - val = da.astype(str).values.astype("U14") - # This should not happen, but still ! That's real world data - val[val == " "] = "nan" - da.values = pd.to_datetime(val, format="%Y%m%d%H%M%S") - else: - s = da.stack(dummy_index=da.dims) - val = s.astype(str).values.astype("U14") - # This should not happen, but still ! That's real world data - val[val == ""] = "nan" - val[val == " "] = "nan" - # - s.values = pd.to_datetime(val, format="%Y%m%d%H%M%S") - da.values = s.unstack("dummy_index") - da = cast_this(da, 'datetime64[s]') - else: - da = cast_this(da, 'datetime64[s]') - - elif v == "SCIENTIFIC_CALIB_DATE": - da = cast_this(da, str) - s = da.stack(dummy_index=da.dims) - s.values = pd.to_datetime(s.values, format="%Y%m%d%H%M%S") - da.values = (s.unstack("dummy_index")).values - da = cast_this(da, 'datetime64[s]') - - if "QC" in v and "PROFILE" not in v and "QCTEST" not in v: - if da.dtype == "O": # convert object to string - da = cast_this(da, str) - - # Address weird string values: - # (replace missing or nan values by a '0' that will be cast as an integer later - - if da.dtype == " + + Examples:: + >>> check_gdac_path("https://data-argo.ifremer.fr") # True + >>> check_gdac_path("ftp://ftp.ifremer.fr/ifremer/argo") # True + >>> check_gdac_path("ftp://usgodae.org/pub/outgoing/argo") # True + >>> check_gdac_path("/home/ref-argo/gdac") # True + >>> check_gdac_path("https://www.ifremer.fr") # False + >>> check_gdac_path("ftp://usgodae.org/pub/outgoing") # False + + Parameters + ---------- + path: str + Path name to check, including access protocol + errors: str + "ignore" or "raise" (or "warn") + + Returns + ------- + checked: boolean + True if at least one DAC folder is found under path/dac/ + False otherwise + """ + # Create a file system for this path + if split_protocol(path)[0] is None: + fs = fsspec.filesystem('file') + elif 'https' in split_protocol(path)[0]: + fs = fsspec.filesystem('http') + elif 'ftp' in split_protocol(path)[0]: + try: + host = split_protocol(path)[-1].split('/')[0] + fs = fsspec.filesystem('ftp', host=host) + except gaierror: + if errors == 'raise': + raise FtpPathError("Can't get address info (GAIerror) on '%s'" % host) + elif errors == "warn": + warnings.warn("Can't get address info (GAIerror) on '%s'" % host) + return False + else: + return False + else: + raise FtpPathError("Unknown protocol for an Argo GDAC host: %s" % split_protocol(path)[0]) + + # dacs = [ + # "aoml", + # "bodc", + # "coriolis", + # "csio", + # "csiro", + # "incois", + # "jma", + # "kma", + # "kordi", + # "meds", + # "nmdis", + # ] + + # Case 1: + check1 = ( + fs.exists(path) + and fs.exists(fs.sep.join([path, "dac"])) + # and np.any([fs.exists(fs.sep.join([path, "dac", dac])) for dac in dacs]) # Take too much time on http/ftp GDAC server + ) + if check1: + return True + elif errors == "raise": + raise FtpPathError("This path is not GDAC compliant (no `dac` folder with legitimate sub-folder):\n%s" % path) + + elif errors == "warn": + warnings.warn("This path is not GDAC compliant:\n%s" % path) + return False + else: + return False + diff --git a/argopy/utils/decorators.py b/argopy/utils/decorators.py new file mode 100644 index 00000000..6cff7e56 --- /dev/null +++ b/argopy/utils/decorators.py @@ -0,0 +1,154 @@ +from functools import wraps +import warnings + + +class DocInherit(object): + """Docstring inheriting method descriptor + + The class itself is also used as a decorator + + Usage: + + class Foo(object): + def foo(self): + "Frobber" + pass + + class Bar(Foo): + @doc_inherit + def foo(self): + pass + + Now, Bar.foo.__doc__ == Bar().foo.__doc__ == Foo.foo.__doc__ == "Frobber" + + src: https://code.activestate.com/recipes/576862/ + """ + + def __init__(self, mthd): + self.mthd = mthd + self.name = mthd.__name__ + + def __get__(self, obj, cls): + if obj: + return self.get_with_inst(obj, cls) + else: + return self.get_no_inst(cls) + + def get_with_inst(self, obj, cls): + + overridden = getattr(super(cls, obj), self.name, None) + + @wraps(self.mthd, assigned=('__name__', '__module__')) + def f(*args, **kwargs): + return self.mthd(obj, *args, **kwargs) + + return self.use_parent_doc(f, overridden) + + def get_no_inst(self, cls): + + for parent in cls.__mro__[1:]: + overridden = getattr(parent, self.name, None) + if overridden: + break + + @wraps(self.mthd, assigned=('__name__', '__module__')) + def f(*args, **kwargs): + return self.mthd(*args, **kwargs) + + return self.use_parent_doc(f, overridden) + + def use_parent_doc(self, func, source): + if source is None: + raise NameError("Can't find '%s' in parents" % self.name) + func.__doc__ = source.__doc__ + return func + + +doc_inherit = DocInherit + + +def deprecated(reason): + """Deprecation warning decorator. + + This is a decorator which can be used to mark functions + as deprecated. It will result in a warning being emitted + when the function is used. + + Parameters + ---------- + reason: {str, None} + Text message to send with deprecation warning + + Examples + -------- + The @deprecated can be used with a 'reason'. + + .. code-block:: python + + @deprecated("please, use another function") + def old_function(x, y): + pass + + or without: + + .. code-block:: python + + @deprecated + def old_function(x, y): + pass + + References + ---------- + https://stackoverflow.com/a/40301488 + """ + import inspect + + if isinstance(reason, str): + + def decorator(func1): + + if inspect.isclass(func1): + fmt1 = "Call to deprecated class {name} ({reason})." + else: + fmt1 = "Call to deprecated function {name} ({reason})." + + @wraps(func1) + def new_func1(*args, **kwargs): + warnings.simplefilter('always', DeprecationWarning) + warnings.warn( + fmt1.format(name=func1.__name__, reason=reason), + category=DeprecationWarning, + stacklevel=2 + ) + warnings.simplefilter('default', DeprecationWarning) + return func1(*args, **kwargs) + + return new_func1 + + return decorator + + elif inspect.isclass(reason) or inspect.isfunction(reason): + + func2 = reason + + if inspect.isclass(func2): + fmt2 = "Call to deprecated class {name}." + else: + fmt2 = "Call to deprecated function {name}." + + @wraps(func2) + def new_func2(*args, **kwargs): + warnings.simplefilter('always', DeprecationWarning) + warnings.warn( + fmt2.format(name=func2.__name__), + category=DeprecationWarning, + stacklevel=2 + ) + warnings.simplefilter('default', DeprecationWarning) + return func2(*args, **kwargs) + + return new_func2 + + else: + raise TypeError(repr(type(reason))) + diff --git a/argopy/utils/lists.py b/argopy/utils/lists.py new file mode 100644 index 00000000..78567313 --- /dev/null +++ b/argopy/utils/lists.py @@ -0,0 +1,198 @@ +import sys +import warnings +from ..options import OPTIONS + + +def list_available_data_src(): + """ List all available data sources """ + sources = {} + try: + from ..data_fetchers import erddap_data as Erddap_Fetchers + # Ensure we're loading the erddap data fetcher with the current options: + Erddap_Fetchers.api_server_check = Erddap_Fetchers.api_server_check.replace(Erddap_Fetchers.api_server, OPTIONS['erddap']) + Erddap_Fetchers.api_server = OPTIONS['erddap'] + + sources["erddap"] = Erddap_Fetchers + except Exception: + warnings.warn( + "An error occurred while loading the ERDDAP data fetcher, " + "it will not be available !\n%s\n%s" + % (sys.exc_info()[0], sys.exc_info()[1]) + ) + pass + + try: + from ..data_fetchers import argovis_data as ArgoVis_Fetchers + + sources["argovis"] = ArgoVis_Fetchers + except Exception: + warnings.warn( + "An error occurred while loading the ArgoVis data fetcher, " + "it will not be available !\n%s\n%s" + % (sys.exc_info()[0], sys.exc_info()[1]) + ) + pass + + try: + from ..data_fetchers import gdacftp_data as GDAC_Fetchers + # Ensure we're loading the gdac data fetcher with the current options: + GDAC_Fetchers.api_server_check = OPTIONS['ftp'] + GDAC_Fetchers.api_server = OPTIONS['ftp'] + + sources["gdac"] = GDAC_Fetchers + except Exception: + warnings.warn( + "An error occurred while loading the GDAC data fetcher, " + "it will not be available !\n%s\n%s" + % (sys.exc_info()[0], sys.exc_info()[1]) + ) + pass + + # return dict(sorted(sources.items())) + return sources + + +def list_available_index_src(): + """ List all available index sources """ + sources = {} + try: + from ..data_fetchers import erddap_index as Erddap_Fetchers + # Ensure we're loading the erddap data fetcher with the current options: + Erddap_Fetchers.api_server_check = Erddap_Fetchers.api_server_check.replace(Erddap_Fetchers.api_server, OPTIONS['erddap']) + Erddap_Fetchers.api_server = OPTIONS['erddap'] + + sources["erddap"] = Erddap_Fetchers + except Exception: + warnings.warn( + "An error occurred while loading the ERDDAP index fetcher, " + "it will not be available !\n%s\n%s" + % (sys.exc_info()[0], sys.exc_info()[1]) + ) + pass + + try: + from ..data_fetchers import gdacftp_index as GDAC_Fetchers + # Ensure we're loading the gdac data fetcher with the current options: + GDAC_Fetchers.api_server_check = OPTIONS['ftp'] + GDAC_Fetchers.api_server = OPTIONS['ftp'] + + sources["gdac"] = GDAC_Fetchers + except Exception: + warnings.warn( + "An error occurred while loading the GDAC index fetcher, " + "it will not be available !\n%s\n%s" + % (sys.exc_info()[0], sys.exc_info()[1]) + ) + pass + + return sources + + +def list_standard_variables(): + """ List of variables for standard users """ + return [ + "DATA_MODE", + "LATITUDE", + "LONGITUDE", + "POSITION_QC", + "DIRECTION", + "PLATFORM_NUMBER", + "CYCLE_NUMBER", + "PRES", + "TEMP", + "PSAL", + "PRES_QC", + "TEMP_QC", + "PSAL_QC", + "PRES_ADJUSTED", + "TEMP_ADJUSTED", + "PSAL_ADJUSTED", + "PRES_ADJUSTED_QC", + "TEMP_ADJUSTED_QC", + "PSAL_ADJUSTED_QC", + "PRES_ADJUSTED_ERROR", + "TEMP_ADJUSTED_ERROR", + "PSAL_ADJUSTED_ERROR", + "PRES_ERROR", # can be created from PRES_ADJUSTED_ERROR after a filter_data_mode + "TEMP_ERROR", + "PSAL_ERROR", + "JULD", + "JULD_QC", + "TIME", + "TIME_QC", + # "CONFIG_MISSION_NUMBER", + ] + + +def list_multiprofile_file_variables(): + """ List of variables in a netcdf multiprofile file. + + This is for files created by GDAC under //_prof.nc + """ + return [ + "CONFIG_MISSION_NUMBER", + "CYCLE_NUMBER", + "DATA_CENTRE", + "DATA_MODE", + "DATA_STATE_INDICATOR", + "DATA_TYPE", + "DATE_CREATION", + "DATE_UPDATE", + "DC_REFERENCE", + "DIRECTION", + "FIRMWARE_VERSION", + "FLOAT_SERIAL_NO", + "FORMAT_VERSION", + "HANDBOOK_VERSION", + "HISTORY_ACTION", + "HISTORY_DATE", + "HISTORY_INSTITUTION", + "HISTORY_PARAMETER", + "HISTORY_PREVIOUS_VALUE", + "HISTORY_QCTEST", + "HISTORY_REFERENCE", + "HISTORY_SOFTWARE", + "HISTORY_SOFTWARE_RELEASE", + "HISTORY_START_PRES", + "HISTORY_STEP", + "HISTORY_STOP_PRES", + "JULD", + "JULD_LOCATION", + "JULD_QC", + "LATITUDE", + "LONGITUDE", + "PARAMETER", + "PI_NAME", + "PLATFORM_NUMBER", + "PLATFORM_TYPE", + "POSITIONING_SYSTEM", + "POSITION_QC", + "PRES", + "PRES_ADJUSTED", + "PRES_ADJUSTED_ERROR", + "PRES_ADJUSTED_QC", + "PRES_QC", + "PROFILE_PRES_QC", + "PROFILE_PSAL_QC", + "PROFILE_TEMP_QC", + "PROJECT_NAME", + "PSAL", + "PSAL_ADJUSTED", + "PSAL_ADJUSTED_ERROR", + "PSAL_ADJUSTED_QC", + "PSAL_QC", + "REFERENCE_DATE_TIME", + "SCIENTIFIC_CALIB_COEFFICIENT", + "SCIENTIFIC_CALIB_COMMENT", + "SCIENTIFIC_CALIB_DATE", + "SCIENTIFIC_CALIB_EQUATION", + "STATION_PARAMETERS", + "TEMP", + "TEMP_ADJUSTED", + "TEMP_ADJUSTED_ERROR", + "TEMP_ADJUSTED_QC", + "TEMP_QC", + "VERTICAL_SAMPLING_SCHEME", + "WMO_INST_TYPE", + ] + diff --git a/argopy/xarray.py b/argopy/xarray.py index 87ceddef..1ccc8f99 100644 --- a/argopy/xarray.py +++ b/argopy/xarray.py @@ -14,16 +14,20 @@ except ModuleNotFoundError: with_gsw = False -from argopy.utilities import ( +from .utilities import ( linear_interpolation_remap, - is_list_of_strings, toYearFraction, groupby_remap, + # log_argopy_callerstack, +) + +from .utils import ( + is_list_of_strings, + # is_list_equal, cast_Argo_variable_type, DATA_TYPES, - # log_argopy_callerstack, ) -from argopy.errors import InvalidDatasetStructure, DataNotFound, OptionValueError +from .errors import InvalidDatasetStructure, DataNotFound, OptionValueError log = logging.getLogger("argopy.xarray") From 9ac43a2ff695467edeb3315314d33ee859e1e959 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 8 Sep 2023 14:16:39 +0200 Subject: [PATCH 15/33] [skip-ci] --- argopy/data_fetchers/erddap_index.py | 7 +- argopy/data_fetchers/gdacftp_index.py | 15 --- argopy/related/__init__.py | 6 +- argopy/related/utils.py | 42 ++++++ argopy/stores/argo_index_proto.py | 2 +- argopy/tests/test_related.py | 15 ++- argopy/tests/test_utilities.py | 57 -------- argopy/tests/test_utils_checkers.py | 49 ++++++- argopy/tests/test_utils_lists.py | 7 + argopy/utilities.py | 181 -------------------------- argopy/utils/__init__.py | 3 + argopy/utils/checkers.py | 159 +++++++++++++++++++++- 12 files changed, 282 insertions(+), 261 deletions(-) create mode 100644 argopy/related/utils.py create mode 100644 argopy/tests/test_utils_lists.py diff --git a/argopy/data_fetchers/erddap_index.py b/argopy/data_fetchers/erddap_index.py index ebaff973..5cf84730 100644 --- a/argopy/data_fetchers/erddap_index.py +++ b/argopy/data_fetchers/erddap_index.py @@ -17,9 +17,10 @@ from abc import ABC, abstractmethod -from argopy.utilities import load_dict, mapp_dict, format_oneline -from argopy.stores import httpstore -from argopy.options import OPTIONS +from ..utilities import format_oneline +from ..related import load_dict, mapp_dict +from ..stores import httpstore +from ..options import OPTIONS log = logging.getLogger("argopy.fetchers.erddap_index") diff --git a/argopy/data_fetchers/gdacftp_index.py b/argopy/data_fetchers/gdacftp_index.py index c1a117d9..7b11f73d 100644 --- a/argopy/data_fetchers/gdacftp_index.py +++ b/argopy/data_fetchers/gdacftp_index.py @@ -161,21 +161,6 @@ def clear_cache(self): def to_dataframe(self): """ Filter index file and return a pandas dataframe """ df = self.indexfs.run().to_dataframe() - - # Post-processing of the filtered index is done at the indexstore level - # if 'wmo' not in df: - # df['wmo'] = df['file'].apply(lambda x: int(x.split('/')[1])) - # - # # institution & profiler mapping for all users - # # todo: may be we need to separate this for standard and expert users - # institution_dictionnary = load_dict('institutions') - # df['tmp1'] = df.institution.apply(lambda x: mapp_dict(institution_dictionnary, x)) - # df = df.rename(columns={"institution": "institution_code", "tmp1": "institution"}) - # - # profiler_dictionnary = load_dict('profilers') - # df['profiler'] = df.profiler_type.apply(lambda x: mapp_dict(profiler_dictionnary, int(x))) - # df = df.rename(columns={"profiler_type": "profiler_code"}) - return df def to_xarray(self): diff --git a/argopy/related/__init__.py b/argopy/related/__init__.py index 5c960c9d..68f16bd7 100644 --- a/argopy/related/__init__.py +++ b/argopy/related/__init__.py @@ -4,7 +4,7 @@ from .argo_documentation import ArgoDocs from .doi_snapshot import ArgoDOI from .euroargo_api import get_coriolis_profile_id, get_ea_profile_page - +from .utils import load_dict, mapp_dict # __all__ = ( @@ -18,4 +18,8 @@ # Functions: "get_coriolis_profile_id", "get_ea_profile_page", + + # Utilities: + "load_dict", + "mapp_dict", ) diff --git a/argopy/related/utils.py b/argopy/related/utils.py new file mode 100644 index 00000000..0463b102 --- /dev/null +++ b/argopy/related/utils.py @@ -0,0 +1,42 @@ +import importlib +import os +import json +from . import ArgoNVSReferenceTables + + +path2assets = importlib.util.find_spec('argopy.static.assets').submodule_search_locations[0] + + +def load_dict(ptype): + if ptype == "profilers": + try: + nvs = ArgoNVSReferenceTables(cache=True) + profilers = {} + for row in nvs.tbl(8).iterrows(): + profilers.update({int(row[1]['altLabel']): row[1]['prefLabel']}) + return profilers + except Exception: + with open(os.path.join(path2assets, "profilers.json"), "rb") as f: + loaded_dict = json.load(f)['data']['profilers'] + return loaded_dict + elif ptype == "institutions": + try: + nvs = ArgoNVSReferenceTables(cache=True) + institutions = {} + for row in nvs.tbl(4).iterrows(): + institutions.update({row[1]['altLabel']: row[1]['prefLabel']}) + return institutions + except Exception: + with open(os.path.join(path2assets, "institutions.json"), "rb") as f: + loaded_dict = json.load(f)['data']['institutions'] + return loaded_dict + else: + raise ValueError("Invalid dictionary name") + + +def mapp_dict(Adictionnary, Avalue): + if Avalue not in Adictionnary: + return "Unknown" + else: + return Adictionnary[Avalue] + diff --git a/argopy/stores/argo_index_proto.py b/argopy/stores/argo_index_proto.py index f7ed13d0..5a1f629a 100644 --- a/argopy/stores/argo_index_proto.py +++ b/argopy/stores/argo_index_proto.py @@ -505,7 +505,7 @@ def get_filename(s, index): else: log.debug("Converting [%s] to dataframe from scratch ..." % src) # Post-processing for user: - from argopy.utilities import load_dict, mapp_dict + from ..related import load_dict, mapp_dict if nrows is not None: df = df.loc[0: nrows - 1].copy() diff --git a/argopy/tests/test_related.py b/argopy/tests/test_related.py index 82f2028c..81d219d6 100644 --- a/argopy/tests/test_related.py +++ b/argopy/tests/test_related.py @@ -19,8 +19,9 @@ ArgoNVSReferenceTables, OceanOPSDeployments, ArgoDocs, + load_dict, mapp_dict ) -from argopy.utilities import ( +from argopy.utils.checkers import ( is_list_of_strings, ) @@ -301,3 +302,15 @@ def test_open_pdf(self, page, an_instance): else: with pytest.raises(ValueError): an_instance.show() + + + +def test_invalid_dictionnary(): + with pytest.raises(ValueError): + load_dict("invalid_dictionnary") + + +def test_invalid_dictionnary_key(): + d = load_dict("profilers") + assert mapp_dict(d, "invalid_key") == "Unknown" + diff --git a/argopy/tests/test_utilities.py b/argopy/tests/test_utilities.py index 4f691037..cecfb621 100644 --- a/argopy/tests/test_utilities.py +++ b/argopy/tests/test_utilities.py @@ -45,28 +45,6 @@ from mocked_http import mocked_httpserver, mocked_server_address -def test_invalid_dictionnary(): - with pytest.raises(ValueError): - load_dict("invalid_dictionnary") - - -def test_invalid_dictionnary_key(): - d = load_dict("profilers") - assert mapp_dict(d, "invalid_key") == "Unknown" - - -def test_list_multiprofile_file_variables(): - assert is_list_of_strings(list_multiprofile_file_variables()) - - -def test_check_gdac_path(): - assert check_gdac_path("dummy_path", errors='ignore') is False - with pytest.raises(FtpPathError): - check_gdac_path("dummy_path", errors='raise') - with pytest.warns(UserWarning): - assert check_gdac_path("dummy_path", errors='warn') is False - - @pytest.mark.parametrize("conda", [False, True], indirect=False, ids=["conda=%s" % str(p) for p in [False, True]]) @@ -76,41 +54,6 @@ def test_show_versions(conda): assert "SYSTEM" in f.getvalue() -def test_isconnected(mocked_httpserver): - assert isinstance(isconnected(host=mocked_server_address), bool) - assert isconnected(host="http://dummyhost") is False - - -def test_urlhaskeyword(mocked_httpserver): - url = "https://api.ifremer.fr/argopy/data/ARGO-FULL.json" - url.replace("https://api.ifremer.fr", mocked_server_address) - assert isinstance(urlhaskeyword(url, "label"), bool) - - -params = [mocked_server_address, - {"url": mocked_server_address + "/argopy/data/ARGO-FULL.json", "keyword": "label"} - ] -params_ids = ["url is a %s" % str(type(p)) for p in params] -@pytest.mark.parametrize("params", params, indirect=False, ids=params_ids) -def test_isalive(params, mocked_httpserver): - assert isinstance(isalive(params), bool) - - -@requires_erddap -@pytest.mark.parametrize("data", [True, False], indirect=False, ids=["data=%s" % t for t in [True, False]]) -def test_isAPIconnected(data, mocked_httpserver): - with argopy.set_options(erddap=mocked_server_address): - assert isinstance(isAPIconnected(src="erddap", data=data), bool) - - -def test_erddap_ds_exists(mocked_httpserver): - with argopy.set_options(erddap=mocked_server_address): - assert isinstance(erddap_ds_exists(ds="ArgoFloats"), bool) - assert erddap_ds_exists(ds="DummyDS") is False - -# todo : Implement tests for utilities functions: badge, fetch_status and monitor_status - - @requires_gdac def test_clear_cache(): ftproot, flist = argopy.tutorial.open_dataset("gdac") diff --git a/argopy/tests/test_utils_checkers.py b/argopy/tests/test_utils_checkers.py index 5ccc14af..987bd88a 100644 --- a/argopy/tests/test_utils_checkers.py +++ b/argopy/tests/test_utils_checkers.py @@ -1,10 +1,15 @@ import pytest import numpy as np +from mocked_http import mocked_httpserver, mocked_server_address -from argopy.utils import ( +import argopy +from argopy.errors import FtpPathError +from argopy.utils.checkers import ( is_box, is_indexbox, check_wmo, is_wmo, check_cyc, is_cyc, + check_gdac_path, + isconnected, urlhaskeyword, isAPIconnected, erddap_ds_exists, isalive ) @@ -177,3 +182,45 @@ def test_check_cyc(): assert check_cyc([12, 123]) == [12, 123] assert check_cyc(np.array((123, 1234), dtype='int')) == [123, 1234] + +def test_check_gdac_path(): + assert check_gdac_path("dummy_path", errors='ignore') is False + with pytest.raises(FtpPathError): + check_gdac_path("dummy_path", errors='raise') + with pytest.warns(UserWarning): + assert check_gdac_path("dummy_path", errors='warn') is False + + +def test_isconnected(mocked_httpserver): + assert isinstance(isconnected(host=mocked_server_address), bool) + assert isconnected(host="http://dummyhost") is False + + +def test_urlhaskeyword(mocked_httpserver): + url = "https://api.ifremer.fr/argopy/data/ARGO-FULL.json" + url.replace("https://api.ifremer.fr", mocked_server_address) + assert isinstance(urlhaskeyword(url, "label"), bool) + + +params = [mocked_server_address, + {"url": mocked_server_address + "/argopy/data/ARGO-FULL.json", "keyword": "label"} + ] +params_ids = ["url is a %s" % str(type(p)) for p in params] +@pytest.mark.parametrize("params", params, indirect=False, ids=params_ids) +def test_isalive(params, mocked_httpserver): + assert isinstance(isalive(params), bool) + + +@requires_erddap +@pytest.mark.parametrize("data", [True, False], indirect=False, ids=["data=%s" % t for t in [True, False]]) +def test_isAPIconnected(data, mocked_httpserver): + with argopy.set_options(erddap=mocked_server_address): + assert isinstance(isAPIconnected(src="erddap", data=data), bool) + + +def test_erddap_ds_exists(mocked_httpserver): + with argopy.set_options(erddap=mocked_server_address): + assert isinstance(erddap_ds_exists(ds="ArgoFloats"), bool) + assert erddap_ds_exists(ds="DummyDS") is False + +# todo : Implement tests for utilities functions: badge, fetch_status and monitor_status diff --git a/argopy/tests/test_utils_lists.py b/argopy/tests/test_utils_lists.py new file mode 100644 index 00000000..06aaa893 --- /dev/null +++ b/argopy/tests/test_utils_lists.py @@ -0,0 +1,7 @@ +# import pytest +from argopy.utils.checkers import is_list_of_strings +from argopy.utils.lists import list_multiprofile_file_variables + + +def test_list_multiprofile_file_variables(): + assert is_list_of_strings(list_multiprofile_file_variables()) diff --git a/argopy/utilities.py b/argopy/utilities.py index 955f3250..fd5e0479 100644 --- a/argopy/utilities.py +++ b/argopy/utilities.py @@ -193,40 +193,6 @@ def convert_size(size_bytes): return pd.DataFrame(listing) -def load_dict(ptype): - if ptype == "profilers": - try: - nvs = ArgoNVSReferenceTables(cache=True) - profilers = {} - for row in nvs.tbl(8).iterrows(): - profilers.update({int(row[1]['altLabel']): row[1]['prefLabel']}) - return profilers - except Exception: - with open(os.path.join(path2assets, "profilers.json"), "rb") as f: - loaded_dict = json.load(f)['data']['profilers'] - return loaded_dict - elif ptype == "institutions": - try: - nvs = ArgoNVSReferenceTables(cache=True) - institutions = {} - for row in nvs.tbl(4).iterrows(): - institutions.update({row[1]['altLabel']: row[1]['prefLabel']}) - return institutions - except Exception: - with open(os.path.join(path2assets, "institutions.json"), "rb") as f: - loaded_dict = json.load(f)['data']['institutions'] - return loaded_dict - else: - raise ValueError("Invalid dictionary name") - - -def mapp_dict(Adictionnary, Avalue): - if Avalue not in Adictionnary: - return "Unknown" - else: - return Adictionnary[Avalue] - - def get_sys_info(): """Returns system information as a dict""" @@ -428,153 +394,6 @@ def show_options(file=sys.stdout): # noqa: C901 print(f"{k}: {v}", file=file) - -def isconnected(host: str = "https://www.ifremer.fr", maxtry: int = 10): - """Check if an URL is alive - - Parameters - ---------- - host: str - URL to use, 'https://www.ifremer.fr' by default - maxtry: int, default: 10 - Maximum number of host connections to try before - - Returns - ------- - bool - """ - # log.debug("isconnected: %s" % host) - if split_protocol(host)[0] in ["http", "https", "ftp", "sftp"]: - it = 0 - while it < maxtry: - try: - # log.debug("Checking if %s is connected ..." % host) - urllib.request.urlopen(host, timeout=1) # nosec B310 because host protocol already checked - result, it = True, maxtry - except Exception: - result, it = False, it + 1 - return result - else: - return os.path.exists(host) - - -def urlhaskeyword(url: str = "", keyword: str = '', maxtry: int = 10): - """ Check if a keyword is in the content of a URL - - Parameters - ---------- - url: str - keyword: str - maxtry: int, default: 10 - Maximum number of host connections to try before returning False - - Returns - ------- - bool - """ - it = 0 - while it < maxtry: - try: - with fsspec.open(url) as f: - data = f.read() - result = keyword in str(data) - it = maxtry - except Exception: - result, it = False, it + 1 - return result - - -def isalive(api_server_check: Union[str, dict] = "") -> bool: - """Check if an API is alive or not - - 2 methods are available: - - - URL Ping - - keyword Check - - Parameters - ---------- - api_server_check - Url string or dictionary with [``url``, ``keyword``] keys. - - - For a string, uses: :class:`argopy.utilities.isconnected` - - For a dictionary, uses: :class:`argopy.utilities.urlhaskeyword` - - Returns - ------- - bool - """ - # log.debug("isalive: %s" % api_server_check) - if isinstance(api_server_check, dict): - return urlhaskeyword(url=api_server_check['url'], keyword=api_server_check['keyword']) - else: - return isconnected(api_server_check) - - -def isAPIconnected(src="erddap", data=True): - """ Check if a source API is alive or not - - The API is connected when it has a live URL or valid folder path. - - Parameters - ---------- - src: str - The data or index source name, 'erddap' default - data: bool - If True check the data fetcher (default), if False, check the index fetcher - - Returns - ------- - bool - """ - if data: - list_src = list_available_data_src() - else: - list_src = list_available_index_src() - - if src in list_src and getattr(list_src[src], "api_server_check", None): - return isalive(list_src[src].api_server_check) - else: - raise InvalidFetcher - - -def erddap_ds_exists( - ds: Union[list, str] = "ArgoFloats", - erddap: str = None, - maxtry: int = 2 -) -> bool: - """ Check if a dataset exists on a remote erddap server - - Parameter - --------- - ds: str, default='ArgoFloats' - Name of the erddap dataset to check - erddap: str, default=OPTIONS['erddap'] - Url of the erddap server - maxtry: int, default: 2 - Maximum number of host connections to try - - Return - ------ - bool - """ - if erddap is None: - erddap = OPTIONS['erddap'] - # log.debug("from erddap_ds_exists: %s" % erddap) - from .stores import httpstore - if isconnected(erddap, maxtry=maxtry): - with httpstore(timeout=OPTIONS['api_timeout']).open("".join([erddap, "/info/index.json"])) as of: - erddap_index = json.load(of) - if is_list_of_strings(ds): - return [this_ds in [row[-1] for row in erddap_index["table"]["rows"]] for this_ds in ds] - else: - return ds in [row[-1] for row in erddap_index["table"]["rows"]] - else: - log.debug("Cannot reach erddap server: %s" % erddap) - warnings.warn("Return False because we cannot reach the erddap server %s" % erddap) - return False - - def badge(label="label", message="message", color="green", insert=False): """ Return or insert shield.io badge image diff --git a/argopy/utils/__init__.py b/argopy/utils/__init__.py index acdfe1ce..8acfbd69 100644 --- a/argopy/utils/__init__.py +++ b/argopy/utils/__init__.py @@ -6,6 +6,8 @@ is_cyc, check_cyc, check_index_cols, check_gdac_path, + isconnected, urlhaskeyword, + isalive, isAPIconnected, erddap_ds_exists, ) from .casting import DATA_TYPES, cast_Argo_variable_type, to_list from .decorators import deprecated, doc_inherit @@ -29,6 +31,7 @@ "is_cyc", "check_cyc", "check_index_cols", "check_gdac_path", + "isconnected", "isalive", "isAPIconnected", "erddap_ds_exists", # Data type casting: "DATA_TYPES", diff --git a/argopy/utils/checkers.py b/argopy/utils/checkers.py index 2c8cfb53..b3f2f977 100644 --- a/argopy/utils/checkers.py +++ b/argopy/utils/checkers.py @@ -1,12 +1,24 @@ +import os import warnings import numpy as np import pandas as pd import xarray as xr +from typing import Union from fsspec.core import split_protocol import fsspec from socket import gaierror +import urllib +import json +import logging + +from ..options import OPTIONS +from ..stores import httpstore from ..utils import to_list -from ..errors import InvalidDatasetStructure, FtpPathError +from ..errors import InvalidDatasetStructure, FtpPathError, InvalidFetcher +from . import list_available_data_src, list_available_index_src + + +log = logging.getLogger("argopy.utils.checkers") def is_indexbox(box: list, errors="raise"): @@ -468,3 +480,148 @@ def check_gdac_path(path, errors='ignore'): # noqa: C901 else: return False + +def isconnected(host: str = "https://www.ifremer.fr", maxtry: int = 10): + """Check if an URL is alive + + Parameters + ---------- + host: str + URL to use, 'https://www.ifremer.fr' by default + maxtry: int, default: 10 + Maximum number of host connections to try before + + Returns + ------- + bool + """ + # log.debug("isconnected: %s" % host) + if split_protocol(host)[0] in ["http", "https", "ftp", "sftp"]: + it = 0 + while it < maxtry: + try: + # log.debug("Checking if %s is connected ..." % host) + urllib.request.urlopen(host, timeout=1) # nosec B310 because host protocol already checked + result, it = True, maxtry + except Exception: + result, it = False, it + 1 + return result + else: + return os.path.exists(host) + + +def urlhaskeyword(url: str = "", keyword: str = '', maxtry: int = 10): + """ Check if a keyword is in the content of a URL + + Parameters + ---------- + url: str + keyword: str + maxtry: int, default: 10 + Maximum number of host connections to try before returning False + + Returns + ------- + bool + """ + it = 0 + while it < maxtry: + try: + with fsspec.open(url) as f: + data = f.read() + result = keyword in str(data) + it = maxtry + except Exception: + result, it = False, it + 1 + return result + + +def isalive(api_server_check: Union[str, dict] = "") -> bool: + """Check if an API is alive or not + + 2 methods are available: + + - URL Ping + - keyword Check + + Parameters + ---------- + api_server_check + Url string or dictionary with [``url``, ``keyword``] keys. + + - For a string, uses: :class:`argopy.utilities.isconnected` + - For a dictionary, uses: :class:`argopy.utilities.urlhaskeyword` + + Returns + ------- + bool + """ + # log.debug("isalive: %s" % api_server_check) + if isinstance(api_server_check, dict): + return urlhaskeyword(url=api_server_check['url'], keyword=api_server_check['keyword']) + else: + return isconnected(api_server_check) + + +def isAPIconnected(src="erddap", data=True): + """ Check if a source API is alive or not + + The API is connected when it has a live URL or valid folder path. + + Parameters + ---------- + src: str + The data or index source name, 'erddap' default + data: bool + If True check the data fetcher (default), if False, check the index fetcher + + Returns + ------- + bool + """ + if data: + list_src = list_available_data_src() + else: + list_src = list_available_index_src() + + if src in list_src and getattr(list_src[src], "api_server_check", None): + return isalive(list_src[src].api_server_check) + else: + raise InvalidFetcher + + +def erddap_ds_exists( + ds: Union[list, str] = "ArgoFloats", + erddap: str = None, + maxtry: int = 2 +) -> bool: + """ Check if a dataset exists on a remote erddap server + + Parameter + --------- + ds: str, default='ArgoFloats' + Name of the erddap dataset to check + erddap: str, default=OPTIONS['erddap'] + Url of the erddap server + maxtry: int, default: 2 + Maximum number of host connections to try + + Return + ------ + bool + """ + if erddap is None: + erddap = OPTIONS['erddap'] + # log.debug("from erddap_ds_exists: %s" % erddap) + if isconnected(erddap, maxtry=maxtry): + with httpstore(timeout=OPTIONS['api_timeout']).open("".join([erddap, "/info/index.json"])) as of: + erddap_index = json.load(of) + if is_list_of_strings(ds): + return [this_ds in [row[-1] for row in erddap_index["table"]["rows"]] for this_ds in ds] + else: + return ds in [row[-1] for row in erddap_index["table"]["rows"]] + else: + log.debug("Cannot reach erddap server: %s" % erddap) + warnings.warn("Return False because we cannot reach the erddap server %s" % erddap) + return False + From 33d242388cc0b55b6c77cfd1a2263609b4cc6fa5 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 8 Sep 2023 15:20:45 +0200 Subject: [PATCH 16/33] [skip-ci] --- argopy/data_fetchers/argovis_data.py | 11 +- argopy/data_fetchers/erddap_data.py | 7 +- argopy/data_fetchers/erddap_refdata.py | 8 +- argopy/data_fetchers/gdacftp_data.py | 2 +- argopy/data_fetchers/proto.py | 2 +- argopy/stores/argo_index_proto.py | 2 +- argopy/stores/filesystems.py | 3 +- argopy/tests/test_related.py | 15 +- argopy/tests/test_utilities.py | 316 ------------ argopy/tests/test_utils_accessories.py | 86 ++++ argopy/tests/test_utils_caching.py | 35 ++ argopy/tests/test_utils_checkers.py | 6 +- argopy/tests/test_utils_chunking.py | 196 ++++++++ argopy/utilities.py | 635 ------------------------- argopy/utils/__init__.py | 19 +- argopy/utils/accessories.py | 262 ++++++++++ argopy/utils/caching.py | 122 +++++ argopy/utils/chunking.py | 282 +++++++++++ 18 files changed, 1031 insertions(+), 978 deletions(-) create mode 100644 argopy/tests/test_utils_accessories.py create mode 100644 argopy/tests/test_utils_caching.py create mode 100644 argopy/tests/test_utils_chunking.py create mode 100644 argopy/utils/accessories.py create mode 100644 argopy/utils/caching.py create mode 100644 argopy/utils/chunking.py diff --git a/argopy/data_fetchers/argovis_data.py b/argopy/data_fetchers/argovis_data.py index 8bab0bca..f57a22c7 100644 --- a/argopy/data_fetchers/argovis_data.py +++ b/argopy/data_fetchers/argovis_data.py @@ -9,14 +9,15 @@ import xarray as xr import getpass import logging -from .proto import ArgoDataFetcherProto from abc import abstractmethod import warnings -from argopy.stores import httpstore -from argopy.options import OPTIONS -from argopy.utilities import format_oneline, Chunker -from argopy.errors import DataNotFound +from ..stores import httpstore +from ..options import OPTIONS +from ..utilities import format_oneline +from ..utils import Chunker +from ..errors import DataNotFound +from .proto import ArgoDataFetcherProto access_points = ["wmo", "box"] diff --git a/argopy/data_fetchers/erddap_data.py b/argopy/data_fetchers/erddap_data.py index c3d3ea58..e817f087 100644 --- a/argopy/data_fetchers/erddap_data.py +++ b/argopy/data_fetchers/erddap_data.py @@ -24,13 +24,14 @@ from aiohttp import ClientResponseError import logging -from .proto import ArgoDataFetcherProto from ..options import OPTIONS -from ..utilities import Chunker, format_oneline +from ..utilities import format_oneline from ..stores import httpstore from ..errors import ErddapServerError, DataNotFound from ..stores import indexstore_pd as ArgoIndex # make sure we work with the Pandas index store -from ..utils import is_list_of_strings, to_list +from ..utils import is_list_of_strings, to_list,Chunker +from .proto import ArgoDataFetcherProto + # Load erddapy according to available version (breaking changes in v0.8.0) try: diff --git a/argopy/data_fetchers/erddap_refdata.py b/argopy/data_fetchers/erddap_refdata.py index fc0c216a..1b07bf4e 100644 --- a/argopy/data_fetchers/erddap_refdata.py +++ b/argopy/data_fetchers/erddap_refdata.py @@ -2,11 +2,11 @@ Fetcher to retrieve CTD reference data from Ifremer erddap """ import xarray as xr -from .erddap_data import ErddapArgoDataFetcher -from argopy.options import OPTIONS -from argopy.utilities import Chunker -from argopy.stores import httpstore_erddap_auth import logging +from ..options import OPTIONS +from ..utils import Chunker +from ..stores import httpstore_erddap_auth +from .erddap_data import ErddapArgoDataFetcher # Load erddapy according to available version (breaking changes in v0.8.0) try: diff --git a/argopy/data_fetchers/gdacftp_data.py b/argopy/data_fetchers/gdacftp_data.py index 631d31d2..d6ee4599 100644 --- a/argopy/data_fetchers/gdacftp_data.py +++ b/argopy/data_fetchers/gdacftp_data.py @@ -12,11 +12,11 @@ import getpass import logging -from .proto import ArgoDataFetcherProto from ..utilities import format_oneline, argo_split_path from ..options import OPTIONS, check_gdac_path from ..errors import DataNotFound from ..stores import ArgoIndex +from .proto import ArgoDataFetcherProto log = logging.getLogger("argopy.gdacftp.data") access_points = ["wmo", "box"] diff --git a/argopy/data_fetchers/proto.py b/argopy/data_fetchers/proto.py index 28452de5..a3f0ded1 100644 --- a/argopy/data_fetchers/proto.py +++ b/argopy/data_fetchers/proto.py @@ -5,7 +5,7 @@ import hashlib import warnings from ..plot import dashboard -from ..utilities import list_standard_variables +from ..utils import list_standard_variables class ArgoDataFetcherProto(ABC): diff --git a/argopy/stores/argo_index_proto.py b/argopy/stores/argo_index_proto.py index 5a1f629a..48e82180 100644 --- a/argopy/stores/argo_index_proto.py +++ b/argopy/stores/argo_index_proto.py @@ -13,7 +13,7 @@ from ..options import OPTIONS from ..errors import FtpPathError, InvalidDataset, OptionValueError -from ..utilities import Registry, isconnected +from ..utils import Registry, isconnected from .filesystems import httpstore, memorystore, filestore, ftpstore try: diff --git a/argopy/stores/filesystems.py b/argopy/stores/filesystems.py index 3e08cd3d..42517f93 100644 --- a/argopy/stores/filesystems.py +++ b/argopy/stores/filesystems.py @@ -49,12 +49,11 @@ ) from abc import ABC, abstractmethod from ..utilities import ( - Registry, - # log_argopy_callerstack, drop_variables_not_in_all_datasets, fill_variables_not_in_all_datasets, ) from ..utils import MonitoredThreadPoolExecutor as MyExecutor +from ..utils import Registry log = logging.getLogger("argopy.stores") diff --git a/argopy/tests/test_related.py b/argopy/tests/test_related.py index 81d219d6..031d2710 100644 --- a/argopy/tests/test_related.py +++ b/argopy/tests/test_related.py @@ -14,12 +14,14 @@ has_cartopy, has_ipython, ) +import argopy from argopy.related import ( TopoFetcher, ArgoNVSReferenceTables, OceanOPSDeployments, ArgoDocs, - load_dict, mapp_dict + load_dict, mapp_dict, + get_coriolis_profile_id, get_ea_profile_page ) from argopy.utils.checkers import ( is_list_of_strings, @@ -304,7 +306,6 @@ def test_open_pdf(self, page, an_instance): an_instance.show() - def test_invalid_dictionnary(): with pytest.raises(ValueError): load_dict("invalid_dictionnary") @@ -314,3 +315,13 @@ def test_invalid_dictionnary_key(): d = load_dict("profilers") assert mapp_dict(d, "invalid_key") == "Unknown" + +@pytest.mark.parametrize("params", [[6901929, None], [6901929, 12]], indirect=False, ids=['float', 'profile']) +def test_get_coriolis_profile_id(params, mocked_httpserver): + with argopy.set_options(cachedir=tempfile.mkdtemp()): + assert isinstance(get_coriolis_profile_id(params[0], params[1], api_server=mocked_server_address), pd.core.frame.DataFrame) + +@pytest.mark.parametrize("params", [[6901929, None], [6901929, 12]], indirect=False, ids=['float', 'profile']) +def test_get_ea_profile_page(params, mocked_httpserver): + with argopy.set_options(cachedir=tempfile.mkdtemp()): + assert is_list_of_strings(get_ea_profile_page(params[0], params[1], api_server=mocked_server_address)) diff --git a/argopy/tests/test_utilities.py b/argopy/tests/test_utilities.py index cecfb621..8ccd8d46 100644 --- a/argopy/tests/test_utilities.py +++ b/argopy/tests/test_utilities.py @@ -9,17 +9,7 @@ import argopy from argopy.utilities import ( - load_dict, - mapp_dict, - list_multiprofile_file_variables, - check_gdac_path, - isconnected, - urlhaskeyword, - isalive, - isAPIconnected, - erddap_ds_exists, linear_interpolation_remap, - Chunker, format_oneline, wmo2box, modified_environ, @@ -54,33 +44,6 @@ def test_show_versions(conda): assert "SYSTEM" in f.getvalue() -@requires_gdac -def test_clear_cache(): - ftproot, flist = argopy.tutorial.open_dataset("gdac") - with tempfile.TemporaryDirectory() as cachedir: - with argopy.set_options(cachedir=cachedir): - loader = ArgoDataFetcher(src="gdac", ftp=ftproot, cache=True).profile(2902696, 12) - loader.to_xarray() - argopy.clear_cache() - assert os.path.exists(cachedir) is True - assert len(os.listdir(cachedir)) == 0 - - -@requires_gdac -def test_lscache(): - ftproot, flist = argopy.tutorial.open_dataset("gdac") - with tempfile.TemporaryDirectory() as cachedir: - with argopy.set_options(cachedir=cachedir): - loader = ArgoDataFetcher(src="gdac", ftp=ftproot, cache=True).profile(2902696, 12) - loader.to_xarray() - result = argopy.utilities.lscache(cache_path=cachedir, prt=True) - assert isinstance(result, str) - - result = argopy.utilities.lscache(cache_path=cachedir, prt=False) - assert isinstance(result, pd.DataFrame) - - - class Test_linear_interpolation_remap: @pytest.fixture(autouse=True) def create_data(self): @@ -150,193 +113,6 @@ def test_error_ds(self): ) -class Test_Chunker: - @pytest.fixture(autouse=True) - def create_data(self): - self.WMO = [ - 6902766, - 6902772, - 6902914, - 6902746, - 6902916, - 6902915, - 6902757, - 6902771, - ] - self.BOX3d = [0, 20, 40, 60, 0, 1000] - self.BOX4d = [0, 20, 40, 60, 0, 1000, "2001-01", "2001-6"] - - def test_InvalidFetcherAccessPoint(self): - with pytest.raises(InvalidFetcherAccessPoint): - Chunker({"invalid": self.WMO}) - - def test_invalid_chunks(self): - with pytest.raises(ValueError): - Chunker({"box": self.BOX3d}, chunks='toto') - - def test_invalid_chunksize(self): - with pytest.raises(ValueError): - Chunker({"box": self.BOX3d}, chunksize='toto') - - def test_chunk_wmo(self): - C = Chunker({"wmo": self.WMO}) - assert all( - [all(isinstance(x, int) for x in chunk) for chunk in C.fit_transform()] - ) - - C = Chunker({"wmo": self.WMO}, chunks="auto") - assert all( - [all(isinstance(x, int) for x in chunk) for chunk in C.fit_transform()] - ) - - C = Chunker({"wmo": self.WMO}, chunks={"wmo": 1}) - assert all( - [all(isinstance(x, int) for x in chunk) for chunk in C.fit_transform()] - ) - assert len(C.fit_transform()) == 1 - - with pytest.raises(ValueError): - Chunker({"wmo": self.WMO}, chunks=["wmo", 1]) - - C = Chunker({"wmo": self.WMO}) - assert isinstance(C.this_chunker, types.FunctionType) or isinstance( - C.this_chunker, types.MethodType - ) - - def test_chunk_box3d(self): - C = Chunker({"box": self.BOX3d}) - assert all([is_box(chunk) for chunk in C.fit_transform()]) - - C = Chunker({"box": self.BOX3d}, chunks="auto") - assert all([is_box(chunk) for chunk in C.fit_transform()]) - - C = Chunker({"box": self.BOX3d}, chunks={"lon": 12, "lat": 1, "dpt": 1}) - assert all([is_box(chunk) for chunk in C.fit_transform()]) - assert len(C.fit_transform()) == 12 - - C = Chunker( - {"box": self.BOX3d}, chunks={"lat": 1, "dpt": 1}, chunksize={"lon": 10} - ) - chunks = C.fit_transform() - assert all([is_box(chunk) for chunk in chunks]) - assert chunks[0][1] - chunks[0][0] == 10 - - C = Chunker({"box": self.BOX3d}, chunks={"lon": 1, "lat": 12, "dpt": 1}) - assert all([is_box(chunk) for chunk in C.fit_transform()]) - assert len(C.fit_transform()) == 12 - - C = Chunker( - {"box": self.BOX3d}, chunks={"lon": 1, "dpt": 1}, chunksize={"lat": 10} - ) - chunks = C.fit_transform() - assert all([is_box(chunk) for chunk in chunks]) - assert chunks[0][3] - chunks[0][2] == 10 - - C = Chunker({"box": self.BOX3d}, chunks={"lon": 1, "lat": 1, "dpt": 12}) - assert all([is_box(chunk) for chunk in C.fit_transform()]) - assert len(C.fit_transform()) == 12 - - C = Chunker( - {"box": self.BOX3d}, chunks={"lon": 1, "lat": 1}, chunksize={"dpt": 10} - ) - chunks = C.fit_transform() - assert all([is_box(chunk) for chunk in chunks]) - assert chunks[0][5] - chunks[0][4] == 10 - - C = Chunker({"box": self.BOX3d}, chunks={"lon": 4, "lat": 2, "dpt": 1}) - assert all([is_box(chunk) for chunk in C.fit_transform()]) - assert len(C.fit_transform()) == 2 * 4 - - C = Chunker({"box": self.BOX3d}, chunks={"lon": 2, "lat": 3, "dpt": 4}) - assert all([is_box(chunk) for chunk in C.fit_transform()]) - assert len(C.fit_transform()) == 2 * 3 * 4 - - with pytest.raises(ValueError): - Chunker({"box": self.BOX3d}, chunks=["lon", 1]) - - C = Chunker({"box": self.BOX3d}) - assert isinstance(C.this_chunker, types.FunctionType) or isinstance( - C.this_chunker, types.MethodType - ) - - def test_chunk_box4d(self): - C = Chunker({"box": self.BOX4d}) - assert all([is_box(chunk) for chunk in C.fit_transform()]) - - C = Chunker({"box": self.BOX4d}, chunks="auto") - assert all([is_box(chunk) for chunk in C.fit_transform()]) - - C = Chunker( - {"box": self.BOX4d}, chunks={"lon": 2, "lat": 1, "dpt": 1, "time": 1} - ) - assert all([is_box(chunk) for chunk in C.fit_transform()]) - assert len(C.fit_transform()) == 2 - - C = Chunker( - {"box": self.BOX4d}, - chunks={"lat": 1, "dpt": 1, "time": 1}, - chunksize={"lon": 10}, - ) - chunks = C.fit_transform() - assert all([is_box(chunk) for chunk in chunks]) - assert chunks[0][1] - chunks[0][0] == 10 - - C = Chunker( - {"box": self.BOX4d}, chunks={"lon": 1, "lat": 2, "dpt": 1, "time": 1} - ) - assert all([is_box(chunk) for chunk in C.fit_transform()]) - assert len(C.fit_transform()) == 2 - - C = Chunker( - {"box": self.BOX4d}, - chunks={"lon": 1, "dpt": 1, "time": 1}, - chunksize={"lat": 10}, - ) - chunks = C.fit_transform() - assert all([is_box(chunk) for chunk in chunks]) - assert chunks[0][3] - chunks[0][2] == 10 - - C = Chunker( - {"box": self.BOX4d}, chunks={"lon": 1, "lat": 1, "dpt": 2, "time": 1} - ) - assert all([is_box(chunk) for chunk in C.fit_transform()]) - assert len(C.fit_transform()) == 2 - - C = Chunker( - {"box": self.BOX4d}, - chunks={"lon": 1, "lat": 1, "time": 1}, - chunksize={"dpt": 10}, - ) - chunks = C.fit_transform() - assert all([is_box(chunk) for chunk in chunks]) - assert chunks[0][5] - chunks[0][4] == 10 - - C = Chunker( - {"box": self.BOX4d}, chunks={"lon": 1, "lat": 1, "dpt": 1, "time": 2} - ) - assert all([is_box(chunk) for chunk in C.fit_transform()]) - assert len(C.fit_transform()) == 2 - - C = Chunker( - {"box": self.BOX4d}, - chunks={"lon": 1, "lat": 1, "dpt": 1}, - chunksize={"time": 5}, - ) - chunks = C.fit_transform() - assert all([is_box(chunk) for chunk in chunks]) - assert np.timedelta64( - pd.to_datetime(chunks[0][7]) - pd.to_datetime(chunks[0][6]), "D" - ) <= np.timedelta64(5, "D") - - with pytest.raises(ValueError): - Chunker({"box": self.BOX4d}, chunks=["lon", 1]) - - C = Chunker({"box": self.BOX4d}) - assert isinstance(C.this_chunker, types.FunctionType) or isinstance( - C.this_chunker, types.MethodType - ) - - def test_format_oneline(): s = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore" assert isinstance(format_oneline(s), str) @@ -438,95 +214,3 @@ def test_argo_split_path(self, file): assert key in desc -class Test_float_wmo(): - - def test_init(self): - assert isinstance(float_wmo(2901746), float_wmo) - assert isinstance(float_wmo(float_wmo(2901746)), float_wmo) - - def test_isvalid(self): - assert float_wmo(2901746).isvalid - assert not float_wmo(12, errors='ignore').isvalid - - def test_ppt(self): - assert isinstance(str(float_wmo(2901746)), str) - assert isinstance(repr(float_wmo(2901746)), str) - - def test_comparisons(self): - assert float_wmo(2901746) == float_wmo(2901746) - assert float_wmo(2901746) != float_wmo(2901745) - assert float_wmo(2901746) >= float_wmo(2901746) - assert float_wmo(2901746) > float_wmo(2901745) - assert float_wmo(2901746) <= float_wmo(2901746) - assert float_wmo(2901746) < float_wmo(2901747) - - def test_hashable(self): - assert isinstance(hash(float_wmo(2901746)), int) - - -class Test_Registry(): - - opts = [(None, 'str'), (['hello', 'world'], str), (None, float_wmo), ([2901746, 4902252], float_wmo)] - opts_ids = ["%s, %s" % ((lambda x: 'iterlist' if x is not None else x)(opt[0]), repr(opt[1])) for opt in opts] - - @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) - def test_init(self, opts): - assert isinstance(Registry(opts[0], dtype=opts[1]), Registry) - - opts = [(['hello', 'world'], str), ([2901746, 4902252], float_wmo)] - opts_ids = ["%s, %s" % ((lambda x: 'iterlist' if x is not None else x)(opt[0]), repr(opt[1])) for opt in opts] - - @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) - def test_commit(self, opts): - R = Registry(dtype=opts[1]) - R.commit(opts[0]) - - @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) - def test_append(self, opts): - R = Registry(dtype=opts[1]) - R.append(opts[0][0]) - - @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) - def test_extend(self, opts): - R = Registry(dtype=opts[1]) - R.append(opts[0]) - - @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) - def test_insert(self, opts): - R = Registry(opts[0][0], dtype=opts[1]) - R.insert(0, opts[0][-1]) - assert R[0] == opts[0][-1] - - @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) - def test_remove(self, opts): - R = Registry(opts[0], dtype=opts[1]) - R.remove(opts[0][0]) - assert opts[0][0] not in R - - @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) - def test_copy(self, opts): - R = Registry(opts[0], dtype=opts[1]) - assert R == R.copy() - - bad_opts = [(['hello', 12], str), ([2901746, 1], float_wmo)] - bad_opts_ids = ["%s, %s" % ((lambda x: 'iterlist' if x is not None else x)(opt[0]), repr(opt[1])) for opt in opts] - - @pytest.mark.parametrize("opts", bad_opts, indirect=False, ids=bad_opts_ids) - def test_invalid_dtype(self, opts): - with pytest.raises(ValueError): - Registry(opts[0][0], dtype=opts[1], invalid='raise').commit(opts[0][-1]) - with pytest.warns(UserWarning): - Registry(opts[0][0], dtype=opts[1], invalid='warn').commit(opts[0][-1]) - # Raise nothing: - Registry(opts[0][0], dtype=opts[1], invalid='ignore').commit(opts[0][-1]) - - -@pytest.mark.parametrize("params", [[6901929, None], [6901929, 12]], indirect=False, ids=['float', 'profile']) -def test_get_coriolis_profile_id(params, mocked_httpserver): - with argopy.set_options(cachedir=tempfile.mkdtemp()): - assert isinstance(get_coriolis_profile_id(params[0], params[1], api_server=mocked_server_address), pd.core.frame.DataFrame) - -@pytest.mark.parametrize("params", [[6901929, None], [6901929, 12]], indirect=False, ids=['float', 'profile']) -def test_get_ea_profile_page(params, mocked_httpserver): - with argopy.set_options(cachedir=tempfile.mkdtemp()): - assert is_list_of_strings(get_ea_profile_page(params[0], params[1], api_server=mocked_server_address)) diff --git a/argopy/tests/test_utils_accessories.py b/argopy/tests/test_utils_accessories.py new file mode 100644 index 00000000..f786d8c1 --- /dev/null +++ b/argopy/tests/test_utils_accessories.py @@ -0,0 +1,86 @@ +import pytest +from argopy.utils.accessories import float_wmo, Registry + + +class Test_float_wmo(): + + def test_init(self): + assert isinstance(float_wmo(2901746), float_wmo) + assert isinstance(float_wmo(float_wmo(2901746)), float_wmo) + + def test_isvalid(self): + assert float_wmo(2901746).isvalid + assert not float_wmo(12, errors='ignore').isvalid + + def test_ppt(self): + assert isinstance(str(float_wmo(2901746)), str) + assert isinstance(repr(float_wmo(2901746)), str) + + def test_comparisons(self): + assert float_wmo(2901746) == float_wmo(2901746) + assert float_wmo(2901746) != float_wmo(2901745) + assert float_wmo(2901746) >= float_wmo(2901746) + assert float_wmo(2901746) > float_wmo(2901745) + assert float_wmo(2901746) <= float_wmo(2901746) + assert float_wmo(2901746) < float_wmo(2901747) + + def test_hashable(self): + assert isinstance(hash(float_wmo(2901746)), int) + + +class Test_Registry(): + + opts = [(None, 'str'), (['hello', 'world'], str), (None, float_wmo), ([2901746, 4902252], float_wmo)] + opts_ids = ["%s, %s" % ((lambda x: 'iterlist' if x is not None else x)(opt[0]), repr(opt[1])) for opt in opts] + + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_init(self, opts): + assert isinstance(Registry(opts[0], dtype=opts[1]), Registry) + + opts = [(['hello', 'world'], str), ([2901746, 4902252], float_wmo)] + opts_ids = ["%s, %s" % ((lambda x: 'iterlist' if x is not None else x)(opt[0]), repr(opt[1])) for opt in opts] + + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_commit(self, opts): + R = Registry(dtype=opts[1]) + R.commit(opts[0]) + + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_append(self, opts): + R = Registry(dtype=opts[1]) + R.append(opts[0][0]) + + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_extend(self, opts): + R = Registry(dtype=opts[1]) + R.append(opts[0]) + + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_insert(self, opts): + R = Registry(opts[0][0], dtype=opts[1]) + R.insert(0, opts[0][-1]) + assert R[0] == opts[0][-1] + + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_remove(self, opts): + R = Registry(opts[0], dtype=opts[1]) + R.remove(opts[0][0]) + assert opts[0][0] not in R + + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_copy(self, opts): + R = Registry(opts[0], dtype=opts[1]) + assert R == R.copy() + + bad_opts = [(['hello', 12], str), ([2901746, 1], float_wmo)] + bad_opts_ids = ["%s, %s" % ((lambda x: 'iterlist' if x is not None else x)(opt[0]), repr(opt[1])) for opt in opts] + + @pytest.mark.parametrize("opts", bad_opts, indirect=False, ids=bad_opts_ids) + def test_invalid_dtype(self, opts): + with pytest.raises(ValueError): + Registry(opts[0][0], dtype=opts[1], invalid='raise').commit(opts[0][-1]) + with pytest.warns(UserWarning): + Registry(opts[0][0], dtype=opts[1], invalid='warn').commit(opts[0][-1]) + # Raise nothing: + Registry(opts[0][0], dtype=opts[1], invalid='ignore').commit(opts[0][-1]) + diff --git a/argopy/tests/test_utils_caching.py b/argopy/tests/test_utils_caching.py new file mode 100644 index 00000000..59472072 --- /dev/null +++ b/argopy/tests/test_utils_caching.py @@ -0,0 +1,35 @@ +import os +import pandas as pd +import argopy +from argopy import DataFetcher as ArgoDataFetcher +from utils import ( + requires_gdac, +) +import tempfile + + +@requires_gdac +def test_clear_cache(): + ftproot, flist = argopy.tutorial.open_dataset("gdac") + with tempfile.TemporaryDirectory() as cachedir: + with argopy.set_options(cachedir=cachedir): + loader = ArgoDataFetcher(src="gdac", ftp=ftproot, cache=True).profile(2902696, 12) + loader.to_xarray() + argopy.clear_cache() + assert os.path.exists(cachedir) is True + assert len(os.listdir(cachedir)) == 0 + + +@requires_gdac +def test_lscache(): + ftproot, flist = argopy.tutorial.open_dataset("gdac") + with tempfile.TemporaryDirectory() as cachedir: + with argopy.set_options(cachedir=cachedir): + loader = ArgoDataFetcher(src="gdac", ftp=ftproot, cache=True).profile(2902696, 12) + loader.to_xarray() + result = argopy.utilities.lscache(cache_path=cachedir, prt=True) + assert isinstance(result, str) + + result = argopy.utilities.lscache(cache_path=cachedir, prt=False) + assert isinstance(result, pd.DataFrame) + diff --git a/argopy/tests/test_utils_checkers.py b/argopy/tests/test_utils_checkers.py index 987bd88a..b8c2d53d 100644 --- a/argopy/tests/test_utils_checkers.py +++ b/argopy/tests/test_utils_checkers.py @@ -1,7 +1,9 @@ import pytest import numpy as np from mocked_http import mocked_httpserver, mocked_server_address - +from utils import ( + requires_erddap, +) import argopy from argopy.errors import FtpPathError from argopy.utils.checkers import ( @@ -222,5 +224,3 @@ def test_erddap_ds_exists(mocked_httpserver): with argopy.set_options(erddap=mocked_server_address): assert isinstance(erddap_ds_exists(ds="ArgoFloats"), bool) assert erddap_ds_exists(ds="DummyDS") is False - -# todo : Implement tests for utilities functions: badge, fetch_status and monitor_status diff --git a/argopy/tests/test_utils_chunking.py b/argopy/tests/test_utils_chunking.py new file mode 100644 index 00000000..3aee8c86 --- /dev/null +++ b/argopy/tests/test_utils_chunking.py @@ -0,0 +1,196 @@ +import pytest +import types +import numpy as np +import pandas as pd + +from argopy.errors import InvalidFetcherAccessPoint +from argopy.utils.chunking import Chunker +from argopy.utils.checkers import is_box + + +class Test_Chunker: + @pytest.fixture(autouse=True) + def create_data(self): + self.WMO = [ + 6902766, + 6902772, + 6902914, + 6902746, + 6902916, + 6902915, + 6902757, + 6902771, + ] + self.BOX3d = [0, 20, 40, 60, 0, 1000] + self.BOX4d = [0, 20, 40, 60, 0, 1000, "2001-01", "2001-6"] + + def test_InvalidFetcherAccessPoint(self): + with pytest.raises(InvalidFetcherAccessPoint): + Chunker({"invalid": self.WMO}) + + def test_invalid_chunks(self): + with pytest.raises(ValueError): + Chunker({"box": self.BOX3d}, chunks='toto') + + def test_invalid_chunksize(self): + with pytest.raises(ValueError): + Chunker({"box": self.BOX3d}, chunksize='toto') + + def test_chunk_wmo(self): + C = Chunker({"wmo": self.WMO}) + assert all( + [all(isinstance(x, int) for x in chunk) for chunk in C.fit_transform()] + ) + + C = Chunker({"wmo": self.WMO}, chunks="auto") + assert all( + [all(isinstance(x, int) for x in chunk) for chunk in C.fit_transform()] + ) + + C = Chunker({"wmo": self.WMO}, chunks={"wmo": 1}) + assert all( + [all(isinstance(x, int) for x in chunk) for chunk in C.fit_transform()] + ) + assert len(C.fit_transform()) == 1 + + with pytest.raises(ValueError): + Chunker({"wmo": self.WMO}, chunks=["wmo", 1]) + + C = Chunker({"wmo": self.WMO}) + assert isinstance(C.this_chunker, types.FunctionType) or isinstance( + C.this_chunker, types.MethodType + ) + + def test_chunk_box3d(self): + C = Chunker({"box": self.BOX3d}) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + + C = Chunker({"box": self.BOX3d}, chunks="auto") + assert all([is_box(chunk) for chunk in C.fit_transform()]) + + C = Chunker({"box": self.BOX3d}, chunks={"lon": 12, "lat": 1, "dpt": 1}) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + assert len(C.fit_transform()) == 12 + + C = Chunker( + {"box": self.BOX3d}, chunks={"lat": 1, "dpt": 1}, chunksize={"lon": 10} + ) + chunks = C.fit_transform() + assert all([is_box(chunk) for chunk in chunks]) + assert chunks[0][1] - chunks[0][0] == 10 + + C = Chunker({"box": self.BOX3d}, chunks={"lon": 1, "lat": 12, "dpt": 1}) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + assert len(C.fit_transform()) == 12 + + C = Chunker( + {"box": self.BOX3d}, chunks={"lon": 1, "dpt": 1}, chunksize={"lat": 10} + ) + chunks = C.fit_transform() + assert all([is_box(chunk) for chunk in chunks]) + assert chunks[0][3] - chunks[0][2] == 10 + + C = Chunker({"box": self.BOX3d}, chunks={"lon": 1, "lat": 1, "dpt": 12}) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + assert len(C.fit_transform()) == 12 + + C = Chunker( + {"box": self.BOX3d}, chunks={"lon": 1, "lat": 1}, chunksize={"dpt": 10} + ) + chunks = C.fit_transform() + assert all([is_box(chunk) for chunk in chunks]) + assert chunks[0][5] - chunks[0][4] == 10 + + C = Chunker({"box": self.BOX3d}, chunks={"lon": 4, "lat": 2, "dpt": 1}) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + assert len(C.fit_transform()) == 2 * 4 + + C = Chunker({"box": self.BOX3d}, chunks={"lon": 2, "lat": 3, "dpt": 4}) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + assert len(C.fit_transform()) == 2 * 3 * 4 + + with pytest.raises(ValueError): + Chunker({"box": self.BOX3d}, chunks=["lon", 1]) + + C = Chunker({"box": self.BOX3d}) + assert isinstance(C.this_chunker, types.FunctionType) or isinstance( + C.this_chunker, types.MethodType + ) + + def test_chunk_box4d(self): + C = Chunker({"box": self.BOX4d}) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + + C = Chunker({"box": self.BOX4d}, chunks="auto") + assert all([is_box(chunk) for chunk in C.fit_transform()]) + + C = Chunker( + {"box": self.BOX4d}, chunks={"lon": 2, "lat": 1, "dpt": 1, "time": 1} + ) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + assert len(C.fit_transform()) == 2 + + C = Chunker( + {"box": self.BOX4d}, + chunks={"lat": 1, "dpt": 1, "time": 1}, + chunksize={"lon": 10}, + ) + chunks = C.fit_transform() + assert all([is_box(chunk) for chunk in chunks]) + assert chunks[0][1] - chunks[0][0] == 10 + + C = Chunker( + {"box": self.BOX4d}, chunks={"lon": 1, "lat": 2, "dpt": 1, "time": 1} + ) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + assert len(C.fit_transform()) == 2 + + C = Chunker( + {"box": self.BOX4d}, + chunks={"lon": 1, "dpt": 1, "time": 1}, + chunksize={"lat": 10}, + ) + chunks = C.fit_transform() + assert all([is_box(chunk) for chunk in chunks]) + assert chunks[0][3] - chunks[0][2] == 10 + + C = Chunker( + {"box": self.BOX4d}, chunks={"lon": 1, "lat": 1, "dpt": 2, "time": 1} + ) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + assert len(C.fit_transform()) == 2 + + C = Chunker( + {"box": self.BOX4d}, + chunks={"lon": 1, "lat": 1, "time": 1}, + chunksize={"dpt": 10}, + ) + chunks = C.fit_transform() + assert all([is_box(chunk) for chunk in chunks]) + assert chunks[0][5] - chunks[0][4] == 10 + + C = Chunker( + {"box": self.BOX4d}, chunks={"lon": 1, "lat": 1, "dpt": 1, "time": 2} + ) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + assert len(C.fit_transform()) == 2 + + C = Chunker( + {"box": self.BOX4d}, + chunks={"lon": 1, "lat": 1, "dpt": 1}, + chunksize={"time": 5}, + ) + chunks = C.fit_transform() + assert all([is_box(chunk) for chunk in chunks]) + assert np.timedelta64( + pd.to_datetime(chunks[0][7]) - pd.to_datetime(chunks[0][6]), "D" + ) <= np.timedelta64(5, "D") + + with pytest.raises(ValueError): + Chunker({"box": self.BOX4d}, chunks=["lon", 1]) + + C = Chunker({"box": self.BOX4d}) + assert isinstance(C.this_chunker, types.FunctionType) or isinstance( + C.this_chunker, types.MethodType + ) + diff --git a/argopy/utilities.py b/argopy/utilities.py index fd5e0479..2a8cf81a 100644 --- a/argopy/utilities.py +++ b/argopy/utilities.py @@ -12,12 +12,10 @@ import urllib import json import collections -from collections import UserList import copy from functools import reduce, wraps from packaging import version import logging -from abc import ABC, abstractmethod from urllib.parse import urlparse from typing import Union import inspect @@ -82,117 +80,6 @@ log = logging.getLogger("argopy.utilities") -def clear_cache(fs=None): - """ Delete argopy cache folder content """ - if os.path.exists(OPTIONS["cachedir"]): - # shutil.rmtree(OPTIONS["cachedir"]) - for filename in os.listdir(OPTIONS["cachedir"]): - file_path = os.path.join(OPTIONS["cachedir"], filename) - try: - if os.path.isfile(file_path) or os.path.islink(file_path): - os.unlink(file_path) - elif os.path.isdir(file_path): - shutil.rmtree(file_path) - except Exception as e: - print("Failed to delete %s. Reason: %s" % (file_path, e)) - if fs: - fs.clear_cache() - - -def lscache(cache_path: str = "", prt=True): - """ Decode and list cache folder content - - Parameters - ---------- - cache_path: str - prt: bool, default=True - Return a printable string or a :class:`pandas.DataFrame` - - Returns - ------- - str or :class:`pandas.DataFrame` - """ - from datetime import datetime - import math - summary = [] - - cache_path = OPTIONS['cachedir'] if cache_path == '' else cache_path - apath = os.path.abspath(cache_path) - log.debug("Listing cache content at: %s" % cache_path) - - def convert_size(size_bytes): - if size_bytes == 0: - return "0B" - size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB") - i = int(math.floor(math.log(size_bytes, 1024))) - p = math.pow(1024, i) - s = round(size_bytes / p, 2) - return "%s %s" % (s, size_name[i]) - - cached_files = [] - fn = os.path.join(apath, "cache") - if os.path.exists(fn): - with open(fn, "rb") as f: - loaded_cached_files = pickle.load(f) # nosec B301 because files controlled internally - for c in loaded_cached_files.values(): - if isinstance(c["blocks"], list): - c["blocks"] = set(c["blocks"]) - cached_files.append(loaded_cached_files) - else: - raise FileSystemHasNoCache("No fsspec cache system at: %s" % apath) - - cached_files = cached_files or [{}] - cached_files = cached_files[-1] - - N_FILES = len(cached_files) - TOTAL_SIZE = 0 - for cfile in cached_files: - path = os.path.join(apath, cached_files[cfile]['fn']) - TOTAL_SIZE += os.path.getsize(path) - - summary.append("%s %s" % ("=" * 20, "%i files in fsspec cache folder (%s)" % (N_FILES, convert_size(TOTAL_SIZE)))) - summary.append("lscache %s" % os.path.sep.join([apath, ""])) - summary.append("=" * 20) - - listing = {'fn': [], 'size': [], 'time': [], 'original': [], 'uid': [], 'blocks': []} - for cfile in cached_files: - summary.append("- %s" % cached_files[cfile]['fn']) - listing['fn'].append(cached_files[cfile]['fn']) - - path = os.path.join(cache_path, cached_files[cfile]['fn']) - summary.append("\t%8s: %s" % ('SIZE', convert_size(os.path.getsize(path)))) - listing['size'].append(os.path.getsize(path)) - - key = 'time' - ts = cached_files[cfile][key] - tsf = pd.to_datetime(datetime.fromtimestamp(ts)).strftime("%c") - summary.append("\t%8s: %s (%s)" % (key, tsf, ts)) - listing['time'].append(pd.to_datetime(datetime.fromtimestamp(ts))) - - if version.parse(fsspec.__version__) > version.parse("0.8.7"): - key = 'original' - summary.append("\t%8s: %s" % (key, cached_files[cfile][key])) - listing[key].append(cached_files[cfile][key]) - - key = 'uid' - summary.append("\t%8s: %s" % (key, cached_files[cfile][key])) - listing[key].append(cached_files[cfile][key]) - - key = 'blocks' - summary.append("\t%8s: %s" % (key, cached_files[cfile][key])) - listing[key].append(cached_files[cfile][key]) - - summary.append("=" * 20) - summary = "\n".join(summary) - if prt: - # Return string to be printed: - return summary - else: - # Return dataframe listing: - # log.debug(summary) - return pd.DataFrame(listing) - - def get_sys_info(): """Returns system information as a dict""" @@ -621,276 +508,6 @@ def _regular_interp(x, y, target_values): return remapped -class Chunker: - """ To chunk fetcher requests """ - - # Default maximum chunks size for all possible request parameters - default_chunksize = { - "box": { - "lon": 20, # degree - "lat": 20, # degree - "dpt": 500, # meters/db - "time": 3 * 30, - }, # Days - "wmo": {"wmo": 5, "cyc": 100}, # Nb of floats - } # Nb of cycles - - def __init__(self, request: dict, chunks: str = "auto", chunksize: dict = {}): - """ Create a request Chunker - - Allow to easily split an access point request into chunks - - Parameters - ---------- - request: dict - Access point request to be chunked. One of the following: - - - {'box': [lon_min, lon_max, lat_min, lat_max, dpt_min, dpt_max, time_min, time_max]} - - {'box': [lon_min, lon_max, lat_min, lat_max, dpt_min, dpt_max]} - - {'wmo': [wmo1, wmo2, ...], 'cyc': [0,1, ...]} - chunks: 'auto' or dict - Dictionary with request access point as keys and number of chunks to create as values. - - Eg: {'wmo':10} will create a maximum of 10 chunks along WMOs. - chunksize: dict, optional - Dictionary with request access point as keys and chunk size as values (used as maximum values in - 'auto' chunking). - - Eg: {'wmo': 5} will create chunks with as many as 5 WMOs each. - - """ - self.request = request - - if "box" in self.request: - is_box(self.request["box"]) - if len(self.request["box"]) == 8: - self.this_chunker = self._chunker_box4d - elif len(self.request["box"]) == 6: - self.this_chunker = self._chunker_box3d - elif "wmo" in self.request: - self.this_chunker = self._chunker_wmo - else: - raise InvalidFetcherAccessPoint( - "'%s' not valid access point" % ",".join(self.request.keys()) - ) - - default = self.default_chunksize[[k for k in self.request.keys()][0]] - if len(chunksize) == 0: # chunksize = {} - chunksize = default - if not isinstance(chunksize, collectionsAbc.Mapping): - raise ValueError("chunksize must be mappable") - else: # merge with default: - chunksize = {**default, **chunksize} - self.chunksize = collections.OrderedDict(sorted(chunksize.items())) - - default = {k: "auto" for k in self.chunksize.keys()} - if chunks == "auto": # auto for all - chunks = default - elif len(chunks) == 0: # chunks = {}, i.e. chunk=1 for all - chunks = {k: 1 for k in self.request} - if not isinstance(chunks, collectionsAbc.Mapping): - raise ValueError("chunks must be 'auto' or mappable") - chunks = {**default, **chunks} - self.chunks = collections.OrderedDict(sorted(chunks.items())) - - def _split(self, lst, n=1): - """Yield successive n-sized chunks from lst""" - for i in range(0, len(lst), n): - yield lst[i: i + n] - - def _split_list_bychunknb(self, lst, n=1): - """Split list in n-imposed chunks of similar size - The last chunk may contain less element than the others, depending on the size of the list. - """ - res = [] - s = int(np.floor_divide(len(lst), n)) - for i in self._split(lst, s): - res.append(i) - if len(res) > n: - res[n - 1::] = [reduce(lambda i, j: i + j, res[n - 1::])] - return res - - def _split_list_bychunksize(self, lst, max_size=1): - """Split list in chunks of imposed size - The last chunk may contain less element than the others, depending on the size of the list. - """ - res = [] - for i in self._split(lst, max_size): - res.append(i) - return res - - def _split_box(self, large_box, n=1, d="x"): # noqa: C901 - """Split a box domain in one direction in n-imposed equal chunks """ - if d == "x": - i_left, i_right = 0, 1 - if d == "y": - i_left, i_right = 2, 3 - if d == "z": - i_left, i_right = 4, 5 - if d == "t": - i_left, i_right = 6, 7 - if n == 1: - return [large_box] - boxes = [] - if d in ["x", "y", "z"]: - n += 1 # Required because we split in linspace - bins = np.linspace(large_box[i_left], large_box[i_right], n) - for ii, left in enumerate(bins): - if ii < len(bins) - 1: - right = bins[ii + 1] - this_box = large_box.copy() - this_box[i_left] = left - this_box[i_right] = right - boxes.append(this_box) - elif "t" in d: - dates = pd.to_datetime(large_box[i_left: i_right + 1]) - date_bounds = [ - d.strftime("%Y%m%d%H%M%S") - for d in pd.date_range(dates[0], dates[1], periods=n + 1) - ] - for i1, i2 in zip(np.arange(0, n), np.arange(1, n + 1)): - left, right = date_bounds[i1], date_bounds[i2] - this_box = large_box.copy() - this_box[i_left] = left - this_box[i_right] = right - boxes.append(this_box) - return boxes - - def _split_this_4Dbox(self, box, nx=1, ny=1, nz=1, nt=1): - box_list = [] - split_x = self._split_box(box, n=nx, d="x") - for bx in split_x: - split_y = self._split_box(bx, n=ny, d="y") - for bxy in split_y: - split_z = self._split_box(bxy, n=nz, d="z") - for bxyz in split_z: - split_t = self._split_box(bxyz, n=nt, d="t") - for bxyzt in split_t: - box_list.append(bxyzt) - return box_list - - def _split_this_3Dbox(self, box, nx=1, ny=1, nz=1): - box_list = [] - split_x = self._split_box(box, n=nx, d="x") - for bx in split_x: - split_y = self._split_box(bx, n=ny, d="y") - for bxy in split_y: - split_z = self._split_box(bxy, n=nz, d="z") - for bxyz in split_z: - box_list.append(bxyz) - return box_list - - def _chunker_box4d(self, request, chunks, chunks_maxsize): # noqa: C901 - BOX = request["box"] - n_chunks = chunks - for axis, n in n_chunks.items(): - if n == "auto": - if axis == "lon": - Lx = BOX[1] - BOX[0] - if Lx > chunks_maxsize["lon"]: # Max box size in longitude - n_chunks["lon"] = int( - np.ceil(np.divide(Lx, chunks_maxsize["lon"])) - ) - else: - n_chunks["lon"] = 1 - if axis == "lat": - Ly = BOX[3] - BOX[2] - if Ly > chunks_maxsize["lat"]: # Max box size in latitude - n_chunks["lat"] = int( - np.ceil(np.divide(Ly, chunks_maxsize["lat"])) - ) - else: - n_chunks["lat"] = 1 - if axis == "dpt": - Lz = BOX[5] - BOX[4] - if Lz > chunks_maxsize["dpt"]: # Max box size in depth - n_chunks["dpt"] = int( - np.ceil(np.divide(Lz, chunks_maxsize["dpt"])) - ) - else: - n_chunks["dpt"] = 1 - if axis == "time": - Lt = np.timedelta64( - pd.to_datetime(BOX[7]) - pd.to_datetime(BOX[6]), "D" - ) - MaxLen = np.timedelta64(chunks_maxsize["time"], "D") - if Lt > MaxLen: # Max box size in time - n_chunks["time"] = int(np.ceil(np.divide(Lt, MaxLen))) - else: - n_chunks["time"] = 1 - - boxes = self._split_this_4Dbox( - BOX, - nx=n_chunks["lon"], - ny=n_chunks["lat"], - nz=n_chunks["dpt"], - nt=n_chunks["time"], - ) - return {"chunks": sorted(n_chunks), "values": boxes} - - def _chunker_box3d(self, request, chunks, chunks_maxsize): - BOX = request["box"] - n_chunks = chunks - for axis, n in n_chunks.items(): - if n == "auto": - if axis == "lon": - Lx = BOX[1] - BOX[0] - if Lx > chunks_maxsize["lon"]: # Max box size in longitude - n_chunks["lon"] = int( - np.floor_divide(Lx, chunks_maxsize["lon"]) - ) - else: - n_chunks["lon"] = 1 - if axis == "lat": - Ly = BOX[3] - BOX[2] - if Ly > chunks_maxsize["lat"]: # Max box size in latitude - n_chunks["lat"] = int( - np.floor_divide(Ly, chunks_maxsize["lat"]) - ) - else: - n_chunks["lat"] = 1 - if axis == "dpt": - Lz = BOX[5] - BOX[4] - if Lz > chunks_maxsize["dpt"]: # Max box size in depth - n_chunks["dpt"] = int( - np.floor_divide(Lz, chunks_maxsize["dpt"]) - ) - else: - n_chunks["dpt"] = 1 - # if axis == 'time': - # Lt = np.timedelta64(pd.to_datetime(BOX[5]) - pd.to_datetime(BOX[4]), 'D') - # MaxLen = np.timedelta64(chunks_maxsize['time'], 'D') - # if Lt > MaxLen: # Max box size in time - # n_chunks['time'] = int(np.floor_divide(Lt, MaxLen)) - # else: - # n_chunks['time'] = 1 - boxes = self._split_this_3Dbox( - BOX, nx=n_chunks["lon"], ny=n_chunks["lat"], nz=n_chunks["dpt"] - ) - return {"chunks": sorted(n_chunks), "values": boxes} - - def _chunker_wmo(self, request, chunks, chunks_maxsize): - WMO = request["wmo"] - n_chunks = chunks - if n_chunks["wmo"] == "auto": - wmo_grps = self._split_list_bychunksize(WMO, max_size=chunks_maxsize["wmo"]) - else: - n = np.min([n_chunks["wmo"], len(WMO)]) - wmo_grps = self._split_list_bychunknb(WMO, n=n) - n_chunks["wmo"] = len(wmo_grps) - return {"chunks": sorted(n_chunks), "values": wmo_grps} - - def fit_transform(self): - """ Chunk a fetcher request - - Returns - ------- - list - """ - self._results = self.this_chunker(self.request, self.chunks, self.chunksize) - # self.chunks = self._results['chunks'] - return self._results["values"] - def format_oneline(s, max_width=65): """ Return a string formatted for a line print """ @@ -1367,258 +984,6 @@ def fix_localhost(host): return dict(sorted(output.items())) -class RegistryItem(ABC): - """Prototype for possible custom items in a Registry""" - @property - @abstractmethod - def value(self): - raise NotImplementedError("Not implemented") - - @property - @abstractmethod - def isvalid(self, item): - raise NotImplementedError("Not implemented") - - @abstractmethod - def __str__(self): - raise NotImplementedError("Not implemented") - - @abstractmethod - def __repr__(self): - raise NotImplementedError("Not implemented") - - -class float_wmo(RegistryItem): - """Argo float WMO number object""" - - def __init__(self, WMO_number, errors='raise'): - """Create an Argo float WMO number object - - Parameters - ---------- - WMO_number: object - Anything that could be casted as an integer - errors: {'raise', 'warn', 'ignore'} - Possibly raises a ValueError exception or UserWarning, otherwise fails silently if WMO_number is not valid - - Returns - ------- - :class:`argopy.utilities.float_wmo` - """ - self.errors = errors - if isinstance(WMO_number, float_wmo): - item = WMO_number.value - else: - item = check_wmo(WMO_number, errors=self.errors)[0] # This will automatically validate item - self.item = item - - @property - def isvalid(self): - """Check if WMO number is valid""" - return is_wmo(self.item, errors=self.errors) - # return True # Because it was checked at instantiation - - @property - def value(self): - """Return WMO number as in integer""" - return int(self.item) - - def __str__(self): - # return "%s" % check_wmo(self.item)[0] - return "%s" % self.item - - def __repr__(self): - return f"WMO({self.item})" - - def __check_other__(self, other): - return check_wmo(other)[0] if type(other) is not float_wmo else other.item - - def __eq__(self, other): - return self.item.__eq__(self.__check_other__(other)) - - def __ne__(self, other): - return self.item.__ne__(self.__check_other__(other)) - - def __gt__(self, other): - return self.item.__gt__(self.__check_other__(other)) - - def __lt__(self, other): - return self.item.__lt__(self.__check_other__(other)) - - def __ge__(self, other): - return self.item.__ge__(self.__check_other__(other)) - - def __le__(self, other): - return self.item.__le__(self.__check_other__(other)) - - def __hash__(self): - return hash(self.item) - - -class Registry(UserList): - """A list manager can that validate item type - - Examples - -------- - You can commit new entry to the registry, one by one: - - >>> R = Registry(name='file') - >>> R.commit('meds/4901105/profiles/D4901105_017.nc') - >>> R.commit('aoml/1900046/profiles/D1900046_179.nc') - - Or with a list: - - >>> R = Registry(name='My floats', dtype='wmo') - >>> R.commit([2901746, 4902252]) - - And also at instantiation time (name and dtype are optional): - - >>> R = Registry([2901746, 4902252], name='My floats', dtype=float_wmo) - - Registry can be used like a list. - - It is iterable: - - >>> for wmo in R: - >>> print(wmo) - - It has a ``len`` property: - - >>> len(R) - - It can be checked for values: - - >>> 4902252 in R - - You can also remove items from the registry, again one by one or with a list: - - >>> R.remove('2901746') - - """ - - def _complain(self, msg): - if self._invalid == 'raise': - raise ValueError(msg) - elif self._invalid == 'warn': - warnings.warn(msg) - else: - log.debug(msg) - - def _str(self, item): - is_valid = isinstance(item, str) - if not is_valid: - self._complain("%s is not a valid %s" % (str(item), self.dtype)) - return is_valid - - def _dict(self, item): - is_valid = isinstance(item, dict) - if not is_valid: - self._complain("%s is not a valid %s" % (str(item), self.dtype)) - return is_valid - - def _wmo(self, item): - return item.isvalid - - def __init__(self, initlist=None, name: str = 'unnamed', dtype='str', invalid='raise'): - """Create a registry, i.e. a controlled list - - Parameters - ---------- - initlist: list, optional - List of values to register - name: str, default: 'unnamed' - Name of the Registry - dtype: :class:`str` or dtype, default: :class:`str` - Data type of registry content. Supported values are: 'str', 'wmo', float_wmo - invalid: str, default: 'raise' - Define what do to when a new item is not valid. Can be 'raise' or 'ignore' - """ - self.name = name - self._invalid = invalid - if repr(dtype) == "" or dtype == 'str': - self._validator = self._str - self.dtype = str - elif dtype == float_wmo or str(dtype).lower() == 'wmo': - self._validator = self._wmo - self.dtype = float_wmo - elif repr(dtype) == "" or dtype == 'dict': - self._validator = self._dict - self.dtype = dict - elif hasattr(dtype, 'isvalid'): - self._validator = dtype.isvalid - self.dtype = dtype - else: - raise ValueError("Unrecognised Registry data type '%s'" % dtype) - - if initlist is not None: - initlist = self._process_items(initlist) - super().__init__(initlist) - - def __repr__(self): - summary = ["%s" % str(self.dtype)] - summary.append("Name: %s" % self.name) - N = len(self.data) - msg = "Nitems: %s" % N if N > 1 else "Nitem: %s" % N - summary.append(msg) - if N > 0: - items = [str(item) for item in self.data] - # msg = format_oneline("[%s]" % "; ".join(items), max_width=120) - msg = "[%s]" % "; ".join(items) - summary.append("Content: %s" % msg) - return "\n".join(summary) - - def _process_items(self, items): - if not isinstance(items, list): - items = [items] - if self.dtype == float_wmo: - items = [float_wmo(item, errors=self._invalid) for item in items] - return items - - def commit(self, values): - """R.commit(values) -- append values to the end of the registry if not already in""" - items = self._process_items(values) - for item in items: - if item not in self.data and self._validator(item): - super().append(item) - return self - - def append(self, value): - """R.append(value) -- append value to the end of the registry""" - items = self._process_items(value) - for item in items: - if self._validator(item): - super().append(item) - return self - - def extend(self, other): - """R.extend(iterable) -- extend registry by appending elements from the iterable""" - self.append(other) - return self - - def remove(self, values): - """R.remove(valueS) -- remove first occurrence of values.""" - items = self._process_items(values) - for item in items: - if item in self.data: - super().remove(item) - return self - - def insert(self, index, value): - """R.insert(index, value) -- insert value before index.""" - item = self._process_items(value)[0] - if self._validator(item): - super().insert(index, item) - return self - - def __copy__(self): - # Called with copy.copy(R) - return Registry(copy.copy(self.data), dtype=self.dtype) - - def copy(self): - """Return a shallow copy of the registry""" - return self.__copy__() - - def log_argopy_callerstack(level='debug'): """log the caller’s stack""" froot = str(pathlib.Path(__file__).parent.resolve()) diff --git a/argopy/utils/__init__.py b/argopy/utils/__init__.py index 8acfbd69..69f2bb23 100644 --- a/argopy/utils/__init__.py +++ b/argopy/utils/__init__.py @@ -1,4 +1,3 @@ -from .monitored_threadpool import MyThreadPoolExecutor as MonitoredThreadPoolExecutor from .checkers import ( is_box, is_indexbox, is_list_of_strings, is_list_of_dicts, is_list_of_datasets, is_list_equal, @@ -17,13 +16,13 @@ list_standard_variables, list_multiprofile_file_variables ) - +from .caching import clear_cache, lscache +from .monitored_threadpool import MyThreadPoolExecutor as MonitoredThreadPoolExecutor +from .chunking import Chunker +from .accessories import Registry, float_wmo __all__ = ( - # Classes: - "MonitoredThreadPoolExecutor", - # Checkers: "is_box", "is_indexbox", "is_list_of_strings", "is_list_of_dicts", "is_list_of_datasets", "is_list_equal", @@ -47,4 +46,14 @@ "list_available_index_src", "list_standard_variables", "list_multiprofile_file_variables", + + # Cache management: + "clear_cache", "lscache", + + # Computation and performances: + "MonitoredThreadPoolExecutor", + "Chunker", + + # Accessories classes (specific objects): + "Registry", "float_wmo" ) diff --git a/argopy/utils/accessories.py b/argopy/utils/accessories.py new file mode 100644 index 00000000..5be8c7b0 --- /dev/null +++ b/argopy/utils/accessories.py @@ -0,0 +1,262 @@ +from abc import ABC, abstractmethod +from collections import UserList +import warnings +import logging +import copy + +from .checkers import check_wmo, is_wmo + + +log = logging.getLogger("argopy.utils.accessories") + + +class RegistryItem(ABC): + """Prototype for possible custom items in a Registry""" + @property + @abstractmethod + def value(self): + raise NotImplementedError("Not implemented") + + @property + @abstractmethod + def isvalid(self, item): + raise NotImplementedError("Not implemented") + + @abstractmethod + def __str__(self): + raise NotImplementedError("Not implemented") + + @abstractmethod + def __repr__(self): + raise NotImplementedError("Not implemented") + + +class float_wmo(RegistryItem): + """Argo float WMO number object""" + + def __init__(self, WMO_number, errors='raise'): + """Create an Argo float WMO number object + + Parameters + ---------- + WMO_number: object + Anything that could be casted as an integer + errors: {'raise', 'warn', 'ignore'} + Possibly raises a ValueError exception or UserWarning, otherwise fails silently if WMO_number is not valid + + Returns + ------- + :class:`argopy.utilities.float_wmo` + """ + self.errors = errors + if isinstance(WMO_number, float_wmo): + item = WMO_number.value + else: + item = check_wmo(WMO_number, errors=self.errors)[0] # This will automatically validate item + self.item = item + + @property + def isvalid(self): + """Check if WMO number is valid""" + return is_wmo(self.item, errors=self.errors) + # return True # Because it was checked at instantiation + + @property + def value(self): + """Return WMO number as in integer""" + return int(self.item) + + def __str__(self): + # return "%s" % check_wmo(self.item)[0] + return "%s" % self.item + + def __repr__(self): + return f"WMO({self.item})" + + def __check_other__(self, other): + return check_wmo(other)[0] if type(other) is not float_wmo else other.item + + def __eq__(self, other): + return self.item.__eq__(self.__check_other__(other)) + + def __ne__(self, other): + return self.item.__ne__(self.__check_other__(other)) + + def __gt__(self, other): + return self.item.__gt__(self.__check_other__(other)) + + def __lt__(self, other): + return self.item.__lt__(self.__check_other__(other)) + + def __ge__(self, other): + return self.item.__ge__(self.__check_other__(other)) + + def __le__(self, other): + return self.item.__le__(self.__check_other__(other)) + + def __hash__(self): + return hash(self.item) + + +class Registry(UserList): + """A list manager can that validate item type + + Examples + -------- + You can commit new entry to the registry, one by one: + + >>> R = Registry(name='file') + >>> R.commit('meds/4901105/profiles/D4901105_017.nc') + >>> R.commit('aoml/1900046/profiles/D1900046_179.nc') + + Or with a list: + + >>> R = Registry(name='My floats', dtype='wmo') + >>> R.commit([2901746, 4902252]) + + And also at instantiation time (name and dtype are optional): + + >>> R = Registry([2901746, 4902252], name='My floats', dtype=float_wmo) + + Registry can be used like a list. + + It is iterable: + + >>> for wmo in R: + >>> print(wmo) + + It has a ``len`` property: + + >>> len(R) + + It can be checked for values: + + >>> 4902252 in R + + You can also remove items from the registry, again one by one or with a list: + + >>> R.remove('2901746') + + """ + + def _complain(self, msg): + if self._invalid == 'raise': + raise ValueError(msg) + elif self._invalid == 'warn': + warnings.warn(msg) + else: + log.debug(msg) + + def _str(self, item): + is_valid = isinstance(item, str) + if not is_valid: + self._complain("%s is not a valid %s" % (str(item), self.dtype)) + return is_valid + + def _dict(self, item): + is_valid = isinstance(item, dict) + if not is_valid: + self._complain("%s is not a valid %s" % (str(item), self.dtype)) + return is_valid + + def _wmo(self, item): + return item.isvalid + + def __init__(self, initlist=None, name: str = 'unnamed', dtype='str', invalid='raise'): + """Create a registry, i.e. a controlled list + + Parameters + ---------- + initlist: list, optional + List of values to register + name: str, default: 'unnamed' + Name of the Registry + dtype: :class:`str` or dtype, default: :class:`str` + Data type of registry content. Supported values are: 'str', 'wmo', float_wmo + invalid: str, default: 'raise' + Define what do to when a new item is not valid. Can be 'raise' or 'ignore' + """ + self.name = name + self._invalid = invalid + if repr(dtype) == "" or dtype == 'str': + self._validator = self._str + self.dtype = str + elif dtype == float_wmo or str(dtype).lower() == 'wmo': + self._validator = self._wmo + self.dtype = float_wmo + elif repr(dtype) == "" or dtype == 'dict': + self._validator = self._dict + self.dtype = dict + elif hasattr(dtype, 'isvalid'): + self._validator = dtype.isvalid + self.dtype = dtype + else: + raise ValueError("Unrecognised Registry data type '%s'" % dtype) + + if initlist is not None: + initlist = self._process_items(initlist) + super().__init__(initlist) + + def __repr__(self): + summary = ["%s" % str(self.dtype)] + summary.append("Name: %s" % self.name) + N = len(self.data) + msg = "Nitems: %s" % N if N > 1 else "Nitem: %s" % N + summary.append(msg) + if N > 0: + items = [str(item) for item in self.data] + # msg = format_oneline("[%s]" % "; ".join(items), max_width=120) + msg = "[%s]" % "; ".join(items) + summary.append("Content: %s" % msg) + return "\n".join(summary) + + def _process_items(self, items): + if not isinstance(items, list): + items = [items] + if self.dtype == float_wmo: + items = [float_wmo(item, errors=self._invalid) for item in items] + return items + + def commit(self, values): + """R.commit(values) -- append values to the end of the registry if not already in""" + items = self._process_items(values) + for item in items: + if item not in self.data and self._validator(item): + super().append(item) + return self + + def append(self, value): + """R.append(value) -- append value to the end of the registry""" + items = self._process_items(value) + for item in items: + if self._validator(item): + super().append(item) + return self + + def extend(self, other): + """R.extend(iterable) -- extend registry by appending elements from the iterable""" + self.append(other) + return self + + def remove(self, values): + """R.remove(valueS) -- remove first occurrence of values.""" + items = self._process_items(values) + for item in items: + if item in self.data: + super().remove(item) + return self + + def insert(self, index, value): + """R.insert(index, value) -- insert value before index.""" + item = self._process_items(value)[0] + if self._validator(item): + super().insert(index, item) + return self + + def __copy__(self): + # Called with copy.copy(R) + return Registry(copy.copy(self.data), dtype=self.dtype) + + def copy(self): + """Return a shallow copy of the registry""" + return self.__copy__() diff --git a/argopy/utils/caching.py b/argopy/utils/caching.py new file mode 100644 index 00000000..7e257f23 --- /dev/null +++ b/argopy/utils/caching.py @@ -0,0 +1,122 @@ +import os +import shutil +import logging +import pickle +import fsspec +import pandas as pd +from packaging import version +from ..options import OPTIONS +from ..errors import FileSystemHasNoCache + +log = logging.getLogger("argopy.utils.caching") + + +def clear_cache(fs=None): + """ Delete argopy cache folder content """ + if os.path.exists(OPTIONS["cachedir"]): + # shutil.rmtree(OPTIONS["cachedir"]) + for filename in os.listdir(OPTIONS["cachedir"]): + file_path = os.path.join(OPTIONS["cachedir"], filename) + try: + if os.path.isfile(file_path) or os.path.islink(file_path): + os.unlink(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) + except Exception as e: + print("Failed to delete %s. Reason: %s" % (file_path, e)) + if fs: + fs.clear_cache() + + +def lscache(cache_path: str = "", prt=True): + """ Decode and list cache folder content + + Parameters + ---------- + cache_path: str + prt: bool, default=True + Return a printable string or a :class:`pandas.DataFrame` + + Returns + ------- + str or :class:`pandas.DataFrame` + """ + from datetime import datetime + import math + summary = [] + + cache_path = OPTIONS['cachedir'] if cache_path == '' else cache_path + apath = os.path.abspath(cache_path) + log.debug("Listing cache content at: %s" % cache_path) + + def convert_size(size_bytes): + if size_bytes == 0: + return "0B" + size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB") + i = int(math.floor(math.log(size_bytes, 1024))) + p = math.pow(1024, i) + s = round(size_bytes / p, 2) + return "%s %s" % (s, size_name[i]) + + cached_files = [] + fn = os.path.join(apath, "cache") + if os.path.exists(fn): + with open(fn, "rb") as f: + loaded_cached_files = pickle.load(f) # nosec B301 because files controlled internally + for c in loaded_cached_files.values(): + if isinstance(c["blocks"], list): + c["blocks"] = set(c["blocks"]) + cached_files.append(loaded_cached_files) + else: + raise FileSystemHasNoCache("No fsspec cache system at: %s" % apath) + + cached_files = cached_files or [{}] + cached_files = cached_files[-1] + + N_FILES = len(cached_files) + TOTAL_SIZE = 0 + for cfile in cached_files: + path = os.path.join(apath, cached_files[cfile]['fn']) + TOTAL_SIZE += os.path.getsize(path) + + summary.append("%s %s" % ("=" * 20, "%i files in fsspec cache folder (%s)" % (N_FILES, convert_size(TOTAL_SIZE)))) + summary.append("lscache %s" % os.path.sep.join([apath, ""])) + summary.append("=" * 20) + + listing = {'fn': [], 'size': [], 'time': [], 'original': [], 'uid': [], 'blocks': []} + for cfile in cached_files: + summary.append("- %s" % cached_files[cfile]['fn']) + listing['fn'].append(cached_files[cfile]['fn']) + + path = os.path.join(cache_path, cached_files[cfile]['fn']) + summary.append("\t%8s: %s" % ('SIZE', convert_size(os.path.getsize(path)))) + listing['size'].append(os.path.getsize(path)) + + key = 'time' + ts = cached_files[cfile][key] + tsf = pd.to_datetime(datetime.fromtimestamp(ts)).strftime("%c") + summary.append("\t%8s: %s (%s)" % (key, tsf, ts)) + listing['time'].append(pd.to_datetime(datetime.fromtimestamp(ts))) + + if version.parse(fsspec.__version__) > version.parse("0.8.7"): + key = 'original' + summary.append("\t%8s: %s" % (key, cached_files[cfile][key])) + listing[key].append(cached_files[cfile][key]) + + key = 'uid' + summary.append("\t%8s: %s" % (key, cached_files[cfile][key])) + listing[key].append(cached_files[cfile][key]) + + key = 'blocks' + summary.append("\t%8s: %s" % (key, cached_files[cfile][key])) + listing[key].append(cached_files[cfile][key]) + + summary.append("=" * 20) + summary = "\n".join(summary) + if prt: + # Return string to be printed: + return summary + else: + # Return dataframe listing: + # log.debug(summary) + return pd.DataFrame(listing) diff --git a/argopy/utils/chunking.py b/argopy/utils/chunking.py new file mode 100644 index 00000000..4ff7459f --- /dev/null +++ b/argopy/utils/chunking.py @@ -0,0 +1,282 @@ +import numpy as np +import pandas as pd +from functools import reduce +from ..errors import InvalidFetcherAccessPoint +from . import is_box + +import collections +try: + collectionsAbc = collections.abc +except AttributeError: + collectionsAbc = collections + + +class Chunker: + """ To chunk fetcher requests """ + + # Default maximum chunks size for all possible request parameters + default_chunksize = { + "box": { + "lon": 20, # degree + "lat": 20, # degree + "dpt": 500, # meters/db + "time": 3 * 30, + }, # Days + "wmo": {"wmo": 5, "cyc": 100}, # Nb of floats + } # Nb of cycles + + def __init__(self, request: dict, chunks: str = "auto", chunksize: dict = {}): + """ Create a request Chunker + + Allow to easily split an access point request into chunks + + Parameters + ---------- + request: dict + Access point request to be chunked. One of the following: + + - {'box': [lon_min, lon_max, lat_min, lat_max, dpt_min, dpt_max, time_min, time_max]} + - {'box': [lon_min, lon_max, lat_min, lat_max, dpt_min, dpt_max]} + - {'wmo': [wmo1, wmo2, ...], 'cyc': [0,1, ...]} + chunks: 'auto' or dict + Dictionary with request access point as keys and number of chunks to create as values. + + Eg: {'wmo':10} will create a maximum of 10 chunks along WMOs. + chunksize: dict, optional + Dictionary with request access point as keys and chunk size as values (used as maximum values in + 'auto' chunking). + + Eg: {'wmo': 5} will create chunks with as many as 5 WMOs each. + + """ + self.request = request + + if "box" in self.request: + is_box(self.request["box"]) + if len(self.request["box"]) == 8: + self.this_chunker = self._chunker_box4d + elif len(self.request["box"]) == 6: + self.this_chunker = self._chunker_box3d + elif "wmo" in self.request: + self.this_chunker = self._chunker_wmo + else: + raise InvalidFetcherAccessPoint( + "'%s' not valid access point" % ",".join(self.request.keys()) + ) + + default = self.default_chunksize[[k for k in self.request.keys()][0]] + if len(chunksize) == 0: # chunksize = {} + chunksize = default + if not isinstance(chunksize, collectionsAbc.Mapping): + raise ValueError("chunksize must be mappable") + else: # merge with default: + chunksize = {**default, **chunksize} + self.chunksize = collections.OrderedDict(sorted(chunksize.items())) + + default = {k: "auto" for k in self.chunksize.keys()} + if chunks == "auto": # auto for all + chunks = default + elif len(chunks) == 0: # chunks = {}, i.e. chunk=1 for all + chunks = {k: 1 for k in self.request} + if not isinstance(chunks, collectionsAbc.Mapping): + raise ValueError("chunks must be 'auto' or mappable") + chunks = {**default, **chunks} + self.chunks = collections.OrderedDict(sorted(chunks.items())) + + def _split(self, lst, n=1): + """Yield successive n-sized chunks from lst""" + for i in range(0, len(lst), n): + yield lst[i: i + n] + + def _split_list_bychunknb(self, lst, n=1): + """Split list in n-imposed chunks of similar size + The last chunk may contain less element than the others, depending on the size of the list. + """ + res = [] + s = int(np.floor_divide(len(lst), n)) + for i in self._split(lst, s): + res.append(i) + if len(res) > n: + res[n - 1::] = [reduce(lambda i, j: i + j, res[n - 1::])] + return res + + def _split_list_bychunksize(self, lst, max_size=1): + """Split list in chunks of imposed size + The last chunk may contain less element than the others, depending on the size of the list. + """ + res = [] + for i in self._split(lst, max_size): + res.append(i) + return res + + def _split_box(self, large_box, n=1, d="x"): # noqa: C901 + """Split a box domain in one direction in n-imposed equal chunks """ + if d == "x": + i_left, i_right = 0, 1 + if d == "y": + i_left, i_right = 2, 3 + if d == "z": + i_left, i_right = 4, 5 + if d == "t": + i_left, i_right = 6, 7 + if n == 1: + return [large_box] + boxes = [] + if d in ["x", "y", "z"]: + n += 1 # Required because we split in linspace + bins = np.linspace(large_box[i_left], large_box[i_right], n) + for ii, left in enumerate(bins): + if ii < len(bins) - 1: + right = bins[ii + 1] + this_box = large_box.copy() + this_box[i_left] = left + this_box[i_right] = right + boxes.append(this_box) + elif "t" in d: + dates = pd.to_datetime(large_box[i_left: i_right + 1]) + date_bounds = [ + d.strftime("%Y%m%d%H%M%S") + for d in pd.date_range(dates[0], dates[1], periods=n + 1) + ] + for i1, i2 in zip(np.arange(0, n), np.arange(1, n + 1)): + left, right = date_bounds[i1], date_bounds[i2] + this_box = large_box.copy() + this_box[i_left] = left + this_box[i_right] = right + boxes.append(this_box) + return boxes + + def _split_this_4Dbox(self, box, nx=1, ny=1, nz=1, nt=1): + box_list = [] + split_x = self._split_box(box, n=nx, d="x") + for bx in split_x: + split_y = self._split_box(bx, n=ny, d="y") + for bxy in split_y: + split_z = self._split_box(bxy, n=nz, d="z") + for bxyz in split_z: + split_t = self._split_box(bxyz, n=nt, d="t") + for bxyzt in split_t: + box_list.append(bxyzt) + return box_list + + def _split_this_3Dbox(self, box, nx=1, ny=1, nz=1): + box_list = [] + split_x = self._split_box(box, n=nx, d="x") + for bx in split_x: + split_y = self._split_box(bx, n=ny, d="y") + for bxy in split_y: + split_z = self._split_box(bxy, n=nz, d="z") + for bxyz in split_z: + box_list.append(bxyz) + return box_list + + def _chunker_box4d(self, request, chunks, chunks_maxsize): # noqa: C901 + BOX = request["box"] + n_chunks = chunks + for axis, n in n_chunks.items(): + if n == "auto": + if axis == "lon": + Lx = BOX[1] - BOX[0] + if Lx > chunks_maxsize["lon"]: # Max box size in longitude + n_chunks["lon"] = int( + np.ceil(np.divide(Lx, chunks_maxsize["lon"])) + ) + else: + n_chunks["lon"] = 1 + if axis == "lat": + Ly = BOX[3] - BOX[2] + if Ly > chunks_maxsize["lat"]: # Max box size in latitude + n_chunks["lat"] = int( + np.ceil(np.divide(Ly, chunks_maxsize["lat"])) + ) + else: + n_chunks["lat"] = 1 + if axis == "dpt": + Lz = BOX[5] - BOX[4] + if Lz > chunks_maxsize["dpt"]: # Max box size in depth + n_chunks["dpt"] = int( + np.ceil(np.divide(Lz, chunks_maxsize["dpt"])) + ) + else: + n_chunks["dpt"] = 1 + if axis == "time": + Lt = np.timedelta64( + pd.to_datetime(BOX[7]) - pd.to_datetime(BOX[6]), "D" + ) + MaxLen = np.timedelta64(chunks_maxsize["time"], "D") + if Lt > MaxLen: # Max box size in time + n_chunks["time"] = int(np.ceil(np.divide(Lt, MaxLen))) + else: + n_chunks["time"] = 1 + + boxes = self._split_this_4Dbox( + BOX, + nx=n_chunks["lon"], + ny=n_chunks["lat"], + nz=n_chunks["dpt"], + nt=n_chunks["time"], + ) + return {"chunks": sorted(n_chunks), "values": boxes} + + def _chunker_box3d(self, request, chunks, chunks_maxsize): + BOX = request["box"] + n_chunks = chunks + for axis, n in n_chunks.items(): + if n == "auto": + if axis == "lon": + Lx = BOX[1] - BOX[0] + if Lx > chunks_maxsize["lon"]: # Max box size in longitude + n_chunks["lon"] = int( + np.floor_divide(Lx, chunks_maxsize["lon"]) + ) + else: + n_chunks["lon"] = 1 + if axis == "lat": + Ly = BOX[3] - BOX[2] + if Ly > chunks_maxsize["lat"]: # Max box size in latitude + n_chunks["lat"] = int( + np.floor_divide(Ly, chunks_maxsize["lat"]) + ) + else: + n_chunks["lat"] = 1 + if axis == "dpt": + Lz = BOX[5] - BOX[4] + if Lz > chunks_maxsize["dpt"]: # Max box size in depth + n_chunks["dpt"] = int( + np.floor_divide(Lz, chunks_maxsize["dpt"]) + ) + else: + n_chunks["dpt"] = 1 + # if axis == 'time': + # Lt = np.timedelta64(pd.to_datetime(BOX[5]) - pd.to_datetime(BOX[4]), 'D') + # MaxLen = np.timedelta64(chunks_maxsize['time'], 'D') + # if Lt > MaxLen: # Max box size in time + # n_chunks['time'] = int(np.floor_divide(Lt, MaxLen)) + # else: + # n_chunks['time'] = 1 + boxes = self._split_this_3Dbox( + BOX, nx=n_chunks["lon"], ny=n_chunks["lat"], nz=n_chunks["dpt"] + ) + return {"chunks": sorted(n_chunks), "values": boxes} + + def _chunker_wmo(self, request, chunks, chunks_maxsize): + WMO = request["wmo"] + n_chunks = chunks + if n_chunks["wmo"] == "auto": + wmo_grps = self._split_list_bychunksize(WMO, max_size=chunks_maxsize["wmo"]) + else: + n = np.min([n_chunks["wmo"], len(WMO)]) + wmo_grps = self._split_list_bychunknb(WMO, n=n) + n_chunks["wmo"] = len(wmo_grps) + return {"chunks": sorted(n_chunks), "values": wmo_grps} + + def fit_transform(self): + """ Chunk a fetcher request + + Returns + ------- + list + """ + self._results = self.this_chunker(self.request, self.chunks, self.chunksize) + # self.chunks = self._results['chunks'] + return self._results["values"] From 28f77027b59da43eccb8b449ca026466f0cc24ab Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 8 Sep 2023 16:23:04 +0200 Subject: [PATCH 17/33] let's try this --- argopy/__init__.py | 7 +- argopy/stores/filesystems.py | 2 +- argopy/tests/test_utilities.py | 181 ----- argopy/tests/test_utils_compute.py | 75 ++ argopy/tests/test_utils_format.py | 60 ++ argopy/tests/test_utils_geo.py | 42 ++ argopy/tests/test_utils_locals.py | 22 + argopy/utilities.py | 1123 ---------------------------- argopy/utils/__init__.py | 34 +- argopy/utils/compute.py | 193 +++++ argopy/utils/format.py | 185 +++++ argopy/utils/geo.py | 149 ++++ argopy/utils/locals.py | 244 ++++++ argopy/utils/loggers.py | 44 ++ argopy/utils/manip.py | 126 ++++ argopy/utils/monitors.py | 169 +++++ argopy/xarray.py | 11 +- 17 files changed, 1350 insertions(+), 1317 deletions(-) create mode 100644 argopy/tests/test_utils_compute.py create mode 100644 argopy/tests/test_utils_format.py create mode 100644 argopy/tests/test_utils_geo.py create mode 100644 argopy/tests/test_utils_locals.py delete mode 100644 argopy/utilities.py create mode 100644 argopy/utils/compute.py create mode 100644 argopy/utils/format.py create mode 100644 argopy/utils/geo.py create mode 100644 argopy/utils/locals.py create mode 100644 argopy/utils/loggers.py create mode 100644 argopy/utils/manip.py create mode 100644 argopy/utils/monitors.py diff --git a/argopy/__init__.py b/argopy/__init__.py index 8648d332..6ab892b1 100644 --- a/argopy/__init__.py +++ b/argopy/__init__.py @@ -29,17 +29,18 @@ from . import tutorial # noqa: E402 # Other Import -from . import utilities # noqa: E402 +from . import utils as utilities # noqa: E402 from . import stores # noqa: E402 from . import errors # noqa: E402 from . import plot # noqa: E402 from .plot import dashboard, ArgoColors # noqa: E402 -from .utilities import show_versions, show_options, clear_cache, lscache # noqa: E402 -from .utilities import monitor_status as status # noqa: E402 from .options import set_options, reset_options # noqa: E402 from .data_fetchers import CTDRefDataFetcher # noqa: E402 from .stores import ArgoIndex # noqa: E402 +from .utils import show_versions, show_options # noqa: E402 +from .utils import clear_cache, lscache # noqa: E402 from .utils import MonitoredThreadPoolExecutor # noqa: E402, F401 +from .utils import monitor_status as status # noqa: E402 from .related import TopoFetcher, OceanOPSDeployments, ArgoNVSReferenceTables, ArgoDocs, ArgoDOI # noqa: E402 diff --git a/argopy/stores/filesystems.py b/argopy/stores/filesystems.py index 42517f93..0bdbe997 100644 --- a/argopy/stores/filesystems.py +++ b/argopy/stores/filesystems.py @@ -48,7 +48,7 @@ ErddapHTTPNotFound, ) from abc import ABC, abstractmethod -from ..utilities import ( +from ..utils import ( drop_variables_not_in_all_datasets, fill_variables_not_in_all_datasets, ) diff --git a/argopy/tests/test_utilities.py b/argopy/tests/test_utilities.py index 8ccd8d46..508c617f 100644 --- a/argopy/tests/test_utilities.py +++ b/argopy/tests/test_utilities.py @@ -1,5 +1,4 @@ import os -import io import pytest import tempfile import xarray as xr @@ -12,12 +11,9 @@ linear_interpolation_remap, format_oneline, wmo2box, - modified_environ, wrap_longitude, toYearFraction, YearFraction_to_datetime, argo_split_path, - Registry, - float_wmo, get_coriolis_profile_id, get_ea_profile_page, ) @@ -35,182 +31,5 @@ from mocked_http import mocked_httpserver, mocked_server_address -@pytest.mark.parametrize("conda", [False, True], - indirect=False, - ids=["conda=%s" % str(p) for p in [False, True]]) -def test_show_versions(conda): - f = io.StringIO() - argopy.show_versions(file=f, conda=conda) - assert "SYSTEM" in f.getvalue() - - -class Test_linear_interpolation_remap: - @pytest.fixture(autouse=True) - def create_data(self): - # create fake data to test interpolation: - temp = np.random.rand(200, 100) - pres = np.sort( - np.floor( - np.zeros([200, 100]) - + np.linspace(50, 950, 100) - + np.random.randint(-5, 5, [200, 100]) - ) - ) - self.dsfake = xr.Dataset( - { - "TEMP": (["N_PROF", "N_LEVELS"], temp), - "PRES": (["N_PROF", "N_LEVELS"], pres), - }, - coords={ - "N_PROF": ("N_PROF", range(200)), - "N_LEVELS": ("N_LEVELS", range(100)), - "Z_LEVELS": ("Z_LEVELS", np.arange(100, 900, 20)), - }, - ) - - def test_interpolation(self): - # Run it with success: - dsi = linear_interpolation_remap( - self.dsfake["PRES"], - self.dsfake["TEMP"], - self.dsfake["Z_LEVELS"], - z_dim="N_LEVELS", - z_regridded_dim="Z_LEVELS", - ) - assert "remapped" in dsi.dims - - def test_interpolation_1d(self): - # Run it with success: - dsi = linear_interpolation_remap( - self.dsfake["PRES"].isel(N_PROF=0), - self.dsfake["TEMP"].isel(N_PROF=0), - self.dsfake["Z_LEVELS"], - z_regridded_dim="Z_LEVELS", - ) - assert "remapped" in dsi.dims - - def test_error_zdim(self): - # Test error: - # catches error from _regular_interp linked to z_dim - with pytest.raises(RuntimeError): - linear_interpolation_remap( - self.dsfake["PRES"], - self.dsfake["TEMP"], - self.dsfake["Z_LEVELS"], - z_regridded_dim="Z_LEVELS", - ) - - def test_error_ds(self): - # Test error: - # catches error from linear_interpolation_remap linked to datatype - with pytest.raises(ValueError): - linear_interpolation_remap( - self.dsfake["PRES"], - self.dsfake, - self.dsfake["Z_LEVELS"], - z_dim="N_LEVELS", - z_regridded_dim="Z_LEVELS", - ) - - -def test_format_oneline(): - s = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore" - assert isinstance(format_oneline(s), str) - assert isinstance(format_oneline(s[0:5]), str) - s = format_oneline(s, max_width=12) - assert isinstance(s, str) and len(s) == 12 - - -def test_modified_environ(): - os.environ["DUMMY_ENV_ARGOPY"] = 'initial' - with modified_environ(DUMMY_ENV_ARGOPY='toto'): - assert os.environ['DUMMY_ENV_ARGOPY'] == 'toto' - assert os.environ['DUMMY_ENV_ARGOPY'] == 'initial' - os.environ.pop('DUMMY_ENV_ARGOPY') - - -def test_wmo2box(): - with pytest.raises(ValueError): - wmo2box(12) - with pytest.raises(ValueError): - wmo2box(8000) - with pytest.raises(ValueError): - wmo2box(2000) - - def complete_box(b): - b2 = b.copy() - b2.insert(4, 0.) - b2.insert(5, 10000.) - return b2 - - assert is_box(complete_box(wmo2box(1212))) - assert is_box(complete_box(wmo2box(3324))) - assert is_box(complete_box(wmo2box(5402))) - assert is_box(complete_box(wmo2box(7501))) - - -def test_wrap_longitude(): - assert wrap_longitude(np.array([-20])) == 340 - assert wrap_longitude(np.array([40])) == 40 - assert np.all(np.equal(wrap_longitude(np.array([340, 20])), np.array([340, 380]))) - - -def test_toYearFraction(): - assert toYearFraction(pd.to_datetime('202001010000')) == 2020 - assert toYearFraction(pd.to_datetime('202001010000', utc=True)) == 2020 - assert toYearFraction(pd.to_datetime('202001010000')+pd.offsets.DateOffset(years=1)) == 2021 - - -def test_YearFraction_to_datetime(): - assert YearFraction_to_datetime(2020) == pd.to_datetime('202001010000') - assert YearFraction_to_datetime(2020+1) == pd.to_datetime('202101010000') - - -class Test_argo_split_path: - ############# - # UTILITIES # - ############# - # src = "https://data-argo.ifremer.fr/dac" - src = argopy.tutorial.open_dataset("gdac")[0] + "/dac" - list_of_files = [ - src + "/bodc/6901929/6901929_prof.nc", # core / multi-profile - src + "/coriolis/3902131/3902131_Sprof.nc", # bgc / synthetic multi-profile - - src + "/meds/4901079/profiles/D4901079_110.nc", # core / mono-profile / Delayed - src + "/aoml/13857/profiles/R13857_001.nc", # core / mono-profile / Real - - src + "/coriolis/3902131/profiles/SD3902131_001.nc", # bgc / synthetic mono-profile / Delayed - src + "/coriolis/3902131/profiles/SD3902131_001D.nc", # bgc / synthetic mono-profile / Delayed / Descent - src + "/coriolis/6903247/profiles/SR6903247_134.nc", # bgc / synthetic mono-profile / Real - src + "/coriolis/6903247/profiles/SR6903247_134D.nc", # bgc / synthetic mono-profile / Real / Descent - - src + "/coriolis/3902131/profiles/BR3902131_001.nc", # bgc / mono-profile / Real - src + "/coriolis/3902131/profiles/BR3902131_001D.nc", # bgc / mono-profile / Real / Descent - - src + "/aoml/5900446/5900446_Dtraj.nc", # traj / Delayed - src + "/csio/2902696/2902696_Rtraj.nc", # traj / Real - - src + "/coriolis/3902131/3902131_BRtraj.nc", # bgc / traj / Real - # src + "/coriolis/6903247/6903247_BRtraj.nc", # bgc / traj / Real - - src + "/incois/2902269/2902269_tech.nc", # technical - # src + "/nmdis/2901623/2901623_tech.nc", # technical - - src + "/jma/4902252/4902252_meta.nc", # meta-data - # src + "/coriolis/1900857/1900857_meta.nc", # meta-data - ] - list_of_files = [f.replace("/", os.path.sep) for f in list_of_files] - - ######### - # TESTS # - ######### - - @pytest.mark.parametrize("file", list_of_files, - indirect=False) - def test_argo_split_path(self, file): - desc = argo_split_path(file) - assert isinstance(desc, dict) - for key in ['origin', 'path', 'name', 'type', 'extension', 'wmo', 'dac']: - assert key in desc diff --git a/argopy/tests/test_utils_compute.py b/argopy/tests/test_utils_compute.py new file mode 100644 index 00000000..2806fd14 --- /dev/null +++ b/argopy/tests/test_utils_compute.py @@ -0,0 +1,75 @@ +import pytest +import numpy as np +import xarray as xr + +from argopy.utils.compute import linear_interpolation_remap + + +class Test_linear_interpolation_remap: + @pytest.fixture(autouse=True) + def create_data(self): + # create fake data to test interpolation: + temp = np.random.rand(200, 100) + pres = np.sort( + np.floor( + np.zeros([200, 100]) + + np.linspace(50, 950, 100) + + np.random.randint(-5, 5, [200, 100]) + ) + ) + self.dsfake = xr.Dataset( + { + "TEMP": (["N_PROF", "N_LEVELS"], temp), + "PRES": (["N_PROF", "N_LEVELS"], pres), + }, + coords={ + "N_PROF": ("N_PROF", range(200)), + "N_LEVELS": ("N_LEVELS", range(100)), + "Z_LEVELS": ("Z_LEVELS", np.arange(100, 900, 20)), + }, + ) + + def test_interpolation(self): + # Run it with success: + dsi = linear_interpolation_remap( + self.dsfake["PRES"], + self.dsfake["TEMP"], + self.dsfake["Z_LEVELS"], + z_dim="N_LEVELS", + z_regridded_dim="Z_LEVELS", + ) + assert "remapped" in dsi.dims + + def test_interpolation_1d(self): + # Run it with success: + dsi = linear_interpolation_remap( + self.dsfake["PRES"].isel(N_PROF=0), + self.dsfake["TEMP"].isel(N_PROF=0), + self.dsfake["Z_LEVELS"], + z_regridded_dim="Z_LEVELS", + ) + assert "remapped" in dsi.dims + + def test_error_zdim(self): + # Test error: + # catches error from _regular_interp linked to z_dim + with pytest.raises(RuntimeError): + linear_interpolation_remap( + self.dsfake["PRES"], + self.dsfake["TEMP"], + self.dsfake["Z_LEVELS"], + z_regridded_dim="Z_LEVELS", + ) + + def test_error_ds(self): + # Test error: + # catches error from linear_interpolation_remap linked to datatype + with pytest.raises(ValueError): + linear_interpolation_remap( + self.dsfake["PRES"], + self.dsfake, + self.dsfake["Z_LEVELS"], + z_dim="N_LEVELS", + z_regridded_dim="Z_LEVELS", + ) + diff --git a/argopy/tests/test_utils_format.py b/argopy/tests/test_utils_format.py new file mode 100644 index 00000000..6d3c161c --- /dev/null +++ b/argopy/tests/test_utils_format.py @@ -0,0 +1,60 @@ +import os +import pytest +import argopy +from argopy.utils.format import format_oneline, argo_split_path + + +def test_format_oneline(): + s = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore" + assert isinstance(format_oneline(s), str) + assert isinstance(format_oneline(s[0:5]), str) + s = format_oneline(s, max_width=12) + assert isinstance(s, str) and len(s) == 12 + + +class Test_argo_split_path: + ############# + # UTILITIES # + ############# + # src = "https://data-argo.ifremer.fr/dac" + src = argopy.tutorial.open_dataset("gdac")[0] + "/dac" + list_of_files = [ + src + "/bodc/6901929/6901929_prof.nc", # core / multi-profile + src + "/coriolis/3902131/3902131_Sprof.nc", # bgc / synthetic multi-profile + + src + "/meds/4901079/profiles/D4901079_110.nc", # core / mono-profile / Delayed + src + "/aoml/13857/profiles/R13857_001.nc", # core / mono-profile / Real + + src + "/coriolis/3902131/profiles/SD3902131_001.nc", # bgc / synthetic mono-profile / Delayed + src + "/coriolis/3902131/profiles/SD3902131_001D.nc", # bgc / synthetic mono-profile / Delayed / Descent + src + "/coriolis/6903247/profiles/SR6903247_134.nc", # bgc / synthetic mono-profile / Real + src + "/coriolis/6903247/profiles/SR6903247_134D.nc", # bgc / synthetic mono-profile / Real / Descent + + src + "/coriolis/3902131/profiles/BR3902131_001.nc", # bgc / mono-profile / Real + src + "/coriolis/3902131/profiles/BR3902131_001D.nc", # bgc / mono-profile / Real / Descent + + src + "/aoml/5900446/5900446_Dtraj.nc", # traj / Delayed + src + "/csio/2902696/2902696_Rtraj.nc", # traj / Real + + src + "/coriolis/3902131/3902131_BRtraj.nc", # bgc / traj / Real + # src + "/coriolis/6903247/6903247_BRtraj.nc", # bgc / traj / Real + + src + "/incois/2902269/2902269_tech.nc", # technical + # src + "/nmdis/2901623/2901623_tech.nc", # technical + + src + "/jma/4902252/4902252_meta.nc", # meta-data + # src + "/coriolis/1900857/1900857_meta.nc", # meta-data + ] + list_of_files = [f.replace("/", os.path.sep) for f in list_of_files] + + ######### + # TESTS # + ######### + + @pytest.mark.parametrize("file", list_of_files, + indirect=False) + def test_argo_split_path(self, file): + desc = argo_split_path(file) + assert isinstance(desc, dict) + for key in ['origin', 'path', 'name', 'type', 'extension', 'wmo', 'dac']: + assert key in desc diff --git a/argopy/tests/test_utils_geo.py b/argopy/tests/test_utils_geo.py new file mode 100644 index 00000000..609242c9 --- /dev/null +++ b/argopy/tests/test_utils_geo.py @@ -0,0 +1,42 @@ +import pytest +import numpy as np +import pandas as pd +from argopy.utils.geo import wmo2box, wrap_longitude, toYearFraction, YearFraction_to_datetime +from argopy.utils.checkers import is_box + + +def test_wmo2box(): + with pytest.raises(ValueError): + wmo2box(12) + with pytest.raises(ValueError): + wmo2box(8000) + with pytest.raises(ValueError): + wmo2box(2000) + + def complete_box(b): + b2 = b.copy() + b2.insert(4, 0.) + b2.insert(5, 10000.) + return b2 + + assert is_box(complete_box(wmo2box(1212))) + assert is_box(complete_box(wmo2box(3324))) + assert is_box(complete_box(wmo2box(5402))) + assert is_box(complete_box(wmo2box(7501))) + + +def test_wrap_longitude(): + assert wrap_longitude(np.array([-20])) == 340 + assert wrap_longitude(np.array([40])) == 40 + assert np.all(np.equal(wrap_longitude(np.array([340, 20])), np.array([340, 380]))) + + +def test_toYearFraction(): + assert toYearFraction(pd.to_datetime('202001010000')) == 2020 + assert toYearFraction(pd.to_datetime('202001010000', utc=True)) == 2020 + assert toYearFraction(pd.to_datetime('202001010000')+pd.offsets.DateOffset(years=1)) == 2021 + + +def test_YearFraction_to_datetime(): + assert YearFraction_to_datetime(2020) == pd.to_datetime('202001010000') + assert YearFraction_to_datetime(2020+1) == pd.to_datetime('202101010000') diff --git a/argopy/tests/test_utils_locals.py b/argopy/tests/test_utils_locals.py new file mode 100644 index 00000000..fa04418d --- /dev/null +++ b/argopy/tests/test_utils_locals.py @@ -0,0 +1,22 @@ +import os +import pytest +import io +import argopy +from ..utils.locals import modified_environ + + +@pytest.mark.parametrize("conda", [False, True], + indirect=False, + ids=["conda=%s" % str(p) for p in [False, True]]) +def test_show_versions(conda): + f = io.StringIO() + argopy.show_versions(file=f, conda=conda) + assert "SYSTEM" in f.getvalue() + + +def test_modified_environ(): + os.environ["DUMMY_ENV_ARGOPY"] = 'initial' + with modified_environ(DUMMY_ENV_ARGOPY='toto'): + assert os.environ['DUMMY_ENV_ARGOPY'] == 'toto' + assert os.environ['DUMMY_ENV_ARGOPY'] == 'initial' + os.environ.pop('DUMMY_ENV_ARGOPY') diff --git a/argopy/utilities.py b/argopy/utilities.py deleted file mode 100644 index 2a8cf81a..00000000 --- a/argopy/utilities.py +++ /dev/null @@ -1,1123 +0,0 @@ -#!/bin/env python -# -*coding: UTF-8 -*- -# -# Disclaimer: -# Functions get_sys_info, netcdf_and_hdf5_versions and show_versions are from: -# xarray/util/print_versions.py -# - -import os -import sys -import warnings -import urllib -import json -import collections -import copy -from functools import reduce, wraps -from packaging import version -import logging -from urllib.parse import urlparse -from typing import Union -import inspect -import pathlib -import importlib -import locale -import platform -import struct -import subprocess # nosec B404 only used without user inputs -import contextlib -from fsspec.core import split_protocol -import fsspec -from functools import lru_cache - -import xarray as xr -import pandas as pd -import numpy as np -from scipy import interpolate - -import pickle # nosec B403 only used with internal files/assets -import shutil - -import threading -from socket import gaierror - -import time -import setuptools # noqa: F401 - -from .options import OPTIONS -from .errors import ( - FtpPathError, - InvalidFetcher, - InvalidFetcherAccessPoint, - InvalidOption, - InvalidDatasetStructure, - FileSystemHasNoCache, - DataNotFound, -) -from .utils import ( - is_box, - is_list_of_strings, - is_wmo, check_wmo, - check_cyc, -) -from .related import ( - ArgoNVSReferenceTables, -) - -try: - collectionsAbc = collections.abc -except AttributeError: - collectionsAbc = collections - -try: - importlib.import_module('matplotlib') # noqa: E402 - from matplotlib.colors import to_hex -except ImportError: - pass - -path2assets = importlib.util.find_spec('argopy.static.assets').submodule_search_locations[0] - -log = logging.getLogger("argopy.utilities") - - -def get_sys_info(): - """Returns system information as a dict""" - - blob = [] - - # get full commit hash - commit = None - if os.path.isdir(".git") and os.path.isdir("argopy"): - try: - pipe = subprocess.Popen( # nosec No user provided input to control here - 'git log --format="%H" -n 1'.split(" "), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - so, serr = pipe.communicate() - except Exception: - pass - else: - if pipe.returncode == 0: - commit = so - try: - commit = so.decode("utf-8") - except ValueError: - pass - commit = commit.strip().strip('"') - - blob.append(("commit", commit)) - - try: - (sysname, nodename, release, version_, machine, processor) = platform.uname() - blob.extend( - [ - ("python", sys.version), - ("python-bits", struct.calcsize("P") * 8), - ("OS", "%s" % (sysname)), - ("OS-release", "%s" % (release)), - ("machine", "%s" % (machine)), - ("processor", "%s" % (processor)), - ("byteorder", "%s" % sys.byteorder), - ("LC_ALL", "%s" % os.environ.get("LC_ALL", "None")), - ("LANG", "%s" % os.environ.get("LANG", "None")), - ("LOCALE", "%s.%s" % locale.getlocale()), - ] - ) - except Exception: - pass - - return blob - - -def netcdf_and_hdf5_versions(): - libhdf5_version = None - libnetcdf_version = None - try: - import netCDF4 - - libhdf5_version = netCDF4.__hdf5libversion__ - libnetcdf_version = netCDF4.__netcdf4libversion__ - except ImportError: - try: - import h5py - - libhdf5_version = h5py.version.hdf5_version - except ImportError: - pass - return [("libhdf5", libhdf5_version), ("libnetcdf", libnetcdf_version)] - - -def show_versions(file=sys.stdout, conda=False): # noqa: C901 - """ Print the versions of argopy and its dependencies - - Parameters - ---------- - file : file-like, optional - print to the given file-like object. Defaults to sys.stdout. - conda: bool, optional - format versions to be copy/pasted on a conda environment file (default, False) - """ - sys_info = get_sys_info() - - try: - sys_info.extend(netcdf_and_hdf5_versions()) - except Exception as e: - print(f"Error collecting netcdf / hdf5 version: {e}") - - DEPS = { - 'core': sorted([ - ("argopy", lambda mod: mod.__version__), - - ("xarray", lambda mod: mod.__version__), - ("scipy", lambda mod: mod.__version__), - ("netCDF4", lambda mod: mod.__version__), - ("erddapy", lambda mod: mod.__version__), # This could go away from requirements ? - ("fsspec", lambda mod: mod.__version__), - ("aiohttp", lambda mod: mod.__version__), - ("packaging", lambda mod: mod.__version__), # will come with xarray, Using 'version' to make API compatible with several fsspec releases - ("requests", lambda mod: mod.__version__), - ("toolz", lambda mod: mod.__version__), - ]), - 'ext.util': sorted([ - ("gsw", lambda mod: mod.__version__), # Used by xarray accessor to compute new variables - ("tqdm", lambda mod: mod.__version__), - ("zarr", lambda mod: mod.__version__), - ]), - 'ext.perf': sorted([ - ("dask", lambda mod: mod.__version__), - ("distributed", lambda mod: mod.__version__), - ("pyarrow", lambda mod: mod.__version__), - ]), - 'ext.plot': sorted([ - ("matplotlib", lambda mod: mod.__version__), - ("cartopy", lambda mod: mod.__version__), - ("seaborn", lambda mod: mod.__version__), - ("IPython", lambda mod: mod.__version__), - ("ipywidgets", lambda mod: mod.__version__), - ("ipykernel", lambda mod: mod.__version__), - ]), - 'dev': sorted([ - - ("bottleneck", lambda mod: mod.__version__), - ("cftime", lambda mod: mod.__version__), - ("cfgrib", lambda mod: mod.__version__), - ("conda", lambda mod: mod.__version__), - ("nc_time_axis", lambda mod: mod.__version__), - - ("numpy", lambda mod: mod.__version__), # will come with xarray and pandas - ("pandas", lambda mod: mod.__version__), # will come with xarray - - ("pip", lambda mod: mod.__version__), - ("black", lambda mod: mod.__version__), - ("flake8", lambda mod: mod.__version__), - ("pytest", lambda mod: mod.__version__), # will come with pandas - ("pytest_env", lambda mod: mod.__version__), # will come with pandas - ("pytest_cov", lambda mod: mod.__version__), # will come with pandas - ("pytest_localftpserver", lambda mod: mod.__version__), # will come with pandas - ("pytest_reportlog", lambda mod: mod.__version__), # will come with pandas - ("setuptools", lambda mod: mod.__version__), - ("aiofiles", lambda mod: mod.__version__), - ("sphinx", lambda mod: mod.__version__), - ]), - } - - DEPS_blob = {} - for level in DEPS.keys(): - deps = DEPS[level] - deps_blob = list() - for (modname, ver_f) in deps: - try: - if modname in sys.modules: - mod = sys.modules[modname] - else: - mod = importlib.import_module(modname) - except Exception: - deps_blob.append((modname, '-')) - else: - try: - ver = ver_f(mod) - deps_blob.append((modname, ver)) - except Exception: - deps_blob.append((modname, "installed")) - DEPS_blob[level] = deps_blob - - print("\nSYSTEM", file=file) - print("------", file=file) - for k, stat in sys_info: - print(f"{k}: {stat}", file=file) - - for level in DEPS_blob: - if conda: - print("\n# %s:" % level.upper(), file=file) - else: - title = "INSTALLED VERSIONS: %s" % level.upper() - print("\n%s" % title, file=file) - print("-" * len(title), file=file) - deps_blob = DEPS_blob[level] - for k, stat in deps_blob: - if conda: - if k != 'argopy': - kf = k.replace("_", "-") - comment = ' ' if stat != '-' else '# ' - print(f"{comment} - {kf} = {stat}", file=file) # Format like a conda env line, useful to update ci/requirements - else: - print("{:<12}: {:<12}".format(k, stat), file=file) - - -def show_options(file=sys.stdout): # noqa: C901 - """ Print options of argopy - - Parameters - ---------- - file : file-like, optional - print to the given file-like object. Defaults to sys.stdout. - """ - print("\nARGOPY OPTIONS", file=file) - print("--------------", file=file) - opts = copy.deepcopy(OPTIONS) - opts = dict(sorted(opts.items())) - for k, v in opts.items(): - print(f"{k}: {v}", file=file) - - -def badge(label="label", message="message", color="green", insert=False): - """ Return or insert shield.io badge image - - Use the shields.io service to create a badge image - - https://img.shields.io/static/v1?label=