diff --git a/argopy/__init__.py b/argopy/__init__.py index 636cdfaf..1817a206 100644 --- a/argopy/__init__.py +++ b/argopy/__init__.py @@ -26,22 +26,23 @@ from .fetchers import ArgoIndexFetcher as IndexFetcher # noqa: E402 from .xarray import ArgoAccessor # noqa: E402 -from . import tutorial # noqa: E402 # Other Import -from . import utilities # noqa: E402 +# from . import utils as utilities # noqa: E402 +from . import utilities # noqa: E402 # being deprecated until 0.1.15, then remove from . import stores # noqa: E402 from . import errors # noqa: E402 from . import plot # noqa: E402 +from . import tutorial # noqa: E402 from .plot import dashboard, ArgoColors # noqa: E402 -from .utilities import show_versions, show_options, clear_cache, lscache # noqa: E402 -from .utilities import TopoFetcher, ArgoNVSReferenceTables, OceanOPSDeployments, ArgoDocs # noqa: E402 -from .utilities import monitor_status as status # noqa: E402 from .options import set_options, reset_options # noqa: E402 from .data_fetchers import CTDRefDataFetcher # noqa: E402 from .stores import ArgoIndex # noqa: E402 -from .related import ArgoDOI # noqa: E402 -from .utils import compute # noqa: E402, F401 +from .utils import show_versions, show_options # noqa: E402 +from .utils import clear_cache, lscache # noqa: E402 +from .utils import MonitoredThreadPoolExecutor # noqa: E402, F401 +from .utils import monitor_status as status # noqa: E402 +from .related import TopoFetcher, OceanOPSDeployments, ArgoNVSReferenceTables, ArgoDocs, ArgoDOI # noqa: E402 # @@ -71,11 +72,10 @@ "ArgoDOI", # Class # Submodules: - "utilities", + "utilities", # being deprecated until 0.1.15, then remove "errors", "plot", "ArgoColors", # Class - # "plotters", # Deprec, to be removed after 0.1.13 "stores", "tutorial", # Constants diff --git a/argopy/data_fetchers/argovis_data.py b/argopy/data_fetchers/argovis_data.py index 8bab0bca..536a4862 100644 --- a/argopy/data_fetchers/argovis_data.py +++ b/argopy/data_fetchers/argovis_data.py @@ -9,14 +9,15 @@ import xarray as xr import getpass import logging -from .proto import ArgoDataFetcherProto from abc import abstractmethod import warnings -from argopy.stores import httpstore -from argopy.options import OPTIONS -from argopy.utilities import format_oneline, Chunker -from argopy.errors import DataNotFound +from ..stores import httpstore +from ..options import OPTIONS +from ..utils.format import format_oneline +from ..utils.chunking import Chunker +from ..errors import DataNotFound +from .proto import ArgoDataFetcherProto access_points = ["wmo", "box"] diff --git a/argopy/data_fetchers/erddap_data.py b/argopy/data_fetchers/erddap_data.py index 9ac5d3b4..a9869ab8 100644 --- a/argopy/data_fetchers/erddap_data.py +++ b/argopy/data_fetchers/erddap_data.py @@ -10,7 +10,6 @@ """ -import argopy.utilities import xarray as xr import pandas as pd import numpy as np @@ -21,18 +20,17 @@ import getpass from typing import Union import fnmatch +from aiohttp import ClientResponseError +import logging -from .proto import ArgoDataFetcherProto -from argopy.options import OPTIONS -from argopy.utilities import Chunker, format_oneline, to_list -from argopy.stores import httpstore +from ..options import OPTIONS +from ..utils.format import format_oneline +from ..stores import httpstore from ..errors import ErddapServerError, DataNotFound -from ..stores import ( - indexstore_pd as ArgoIndex, -) # make sure to work with the Pandas index store +from ..stores import indexstore_pd as ArgoIndex # make sure we work with the Pandas index store +from ..utils import is_list_of_strings, to_list,Chunker +from .proto import ArgoDataFetcherProto -from aiohttp import ClientResponseError -import logging # Load erddapy according to available version (breaking changes in v0.8.0) try: @@ -201,7 +199,7 @@ def __init__( # noqa: C901 raise ValueError() elif params[0] == "all": params = self._bgc_vlist_avail - elif not argopy.utilities.is_list_of_strings(params): + elif not is_list_of_strings(params): raise ValueError("'params' argument must be a list of strings") # raise ValueError("'params' argument must be a list of strings (possibly with a * wildcard)") self._bgc_vlist_requested = [p.upper() for p in params] @@ -222,7 +220,7 @@ def __init__( # noqa: C901 measured = [] elif self._bgc_measured[0] == "all": measured = self._bgc_vlist_requested - elif not argopy.utilities.is_list_of_strings(self._bgc_measured): + elif not is_list_of_strings(self._bgc_measured): raise ValueError("'measured' argument must be a list of strings") # raise ValueError("'measured' argument must be a list of strings (possibly with a * wildcard)") self._bgc_vlist_measured = [m.upper() for m in measured] diff --git a/argopy/data_fetchers/erddap_index.py b/argopy/data_fetchers/erddap_index.py index ebaff973..e224f690 100644 --- a/argopy/data_fetchers/erddap_index.py +++ b/argopy/data_fetchers/erddap_index.py @@ -17,9 +17,10 @@ from abc import ABC, abstractmethod -from argopy.utilities import load_dict, mapp_dict, format_oneline -from argopy.stores import httpstore -from argopy.options import OPTIONS +from ..utils.format import format_oneline +from ..related import load_dict, mapp_dict +from ..stores import httpstore +from ..options import OPTIONS log = logging.getLogger("argopy.fetchers.erddap_index") diff --git a/argopy/data_fetchers/erddap_refdata.py b/argopy/data_fetchers/erddap_refdata.py index fc0c216a..1aef04e8 100644 --- a/argopy/data_fetchers/erddap_refdata.py +++ b/argopy/data_fetchers/erddap_refdata.py @@ -2,11 +2,11 @@ Fetcher to retrieve CTD reference data from Ifremer erddap """ import xarray as xr -from .erddap_data import ErddapArgoDataFetcher -from argopy.options import OPTIONS -from argopy.utilities import Chunker -from argopy.stores import httpstore_erddap_auth import logging +from ..options import OPTIONS +from ..utils.chunking import Chunker +from ..stores import httpstore_erddap_auth +from .erddap_data import ErddapArgoDataFetcher # Load erddapy according to available version (breaking changes in v0.8.0) try: diff --git a/argopy/data_fetchers/gdacftp_data.py b/argopy/data_fetchers/gdacftp_data.py index 631d31d2..ef45e74d 100644 --- a/argopy/data_fetchers/gdacftp_data.py +++ b/argopy/data_fetchers/gdacftp_data.py @@ -12,11 +12,11 @@ import getpass import logging -from .proto import ArgoDataFetcherProto -from ..utilities import format_oneline, argo_split_path +from ..utils.format import format_oneline, argo_split_path from ..options import OPTIONS, check_gdac_path from ..errors import DataNotFound from ..stores import ArgoIndex +from .proto import ArgoDataFetcherProto log = logging.getLogger("argopy.gdacftp.data") access_points = ["wmo", "box"] diff --git a/argopy/data_fetchers/gdacftp_index.py b/argopy/data_fetchers/gdacftp_index.py index c1a117d9..7b0400a6 100644 --- a/argopy/data_fetchers/gdacftp_index.py +++ b/argopy/data_fetchers/gdacftp_index.py @@ -11,7 +11,7 @@ import logging import importlib -from ..utilities import format_oneline +from ..utils.format import format_oneline from ..options import OPTIONS, check_gdac_path from ..plot import dashboard @@ -161,21 +161,6 @@ def clear_cache(self): def to_dataframe(self): """ Filter index file and return a pandas dataframe """ df = self.indexfs.run().to_dataframe() - - # Post-processing of the filtered index is done at the indexstore level - # if 'wmo' not in df: - # df['wmo'] = df['file'].apply(lambda x: int(x.split('/')[1])) - # - # # institution & profiler mapping for all users - # # todo: may be we need to separate this for standard and expert users - # institution_dictionnary = load_dict('institutions') - # df['tmp1'] = df.institution.apply(lambda x: mapp_dict(institution_dictionnary, x)) - # df = df.rename(columns={"institution": "institution_code", "tmp1": "institution"}) - # - # profiler_dictionnary = load_dict('profilers') - # df['profiler'] = df.profiler_type.apply(lambda x: mapp_dict(profiler_dictionnary, int(x))) - # df = df.rename(columns={"profiler_type": "profiler_code"}) - return df def to_xarray(self): diff --git a/argopy/data_fetchers/proto.py b/argopy/data_fetchers/proto.py index 28452de5..412a01e2 100644 --- a/argopy/data_fetchers/proto.py +++ b/argopy/data_fetchers/proto.py @@ -5,7 +5,7 @@ import hashlib import warnings from ..plot import dashboard -from ..utilities import list_standard_variables +from ..utils.lists import list_standard_variables class ArgoDataFetcherProto(ABC): diff --git a/argopy/fetchers.py b/argopy/fetchers.py index da3143a7..18d57af6 100755 --- a/argopy/fetchers.py +++ b/argopy/fetchers.py @@ -15,17 +15,20 @@ import numpy as np import logging -from argopy.options import OPTIONS, _VALIDATORS +from .options import OPTIONS, _VALIDATORS from .errors import InvalidFetcherAccessPoint, InvalidFetcher, OptionValueError - -from .utilities import ( - list_available_data_src, - list_available_index_src, +from .related import ( + get_coriolis_profile_id, +) +from .utils.checkers import ( is_box, is_indexbox, check_wmo, - check_cyc, - get_coriolis_profile_id, + check_cyc +) +from .utils.lists import ( + list_available_data_src, + list_available_index_src, ) from .plot import plot_trajectory, bar_plot, open_sat_altim_report diff --git a/argopy/options.py b/argopy/options.py index 6bf99773..0e6de5cb 100644 --- a/argopy/options.py +++ b/argopy/options.py @@ -5,13 +5,13 @@ # https://github.com/pydata/xarray/blob/cafab46aac8f7a073a32ec5aa47e213a9810ed54/xarray/core/options.py """ import os -from argopy.errors import OptionValueError, FtpPathError, ErddapPathError import warnings import logging import fsspec from fsspec.core import split_protocol from socket import gaierror from urllib.parse import urlparse +from .errors import OptionValueError, FtpPathError, ErddapPathError # Define a logger diff --git a/argopy/plot/argo_colors.py b/argopy/plot/argo_colors.py index b7a3062e..33f9c4e1 100644 --- a/argopy/plot/argo_colors.py +++ b/argopy/plot/argo_colors.py @@ -1,7 +1,7 @@ import numpy as np from packaging import version from .utils import has_mpl, has_seaborn -from ..utilities import warnUnless +from ..utils.loggers import warnUnless if has_mpl: from .utils import mpl, cm, mcolors, plt diff --git a/argopy/plot/dashboards.py b/argopy/plot/dashboards.py index 6b513e1f..5fbf31b7 100644 --- a/argopy/plot/dashboards.py +++ b/argopy/plot/dashboards.py @@ -11,7 +11,9 @@ from packaging import version from .utils import has_ipython -from ..utilities import warnUnless, check_wmo, check_cyc, get_ea_profile_page +from ..utils.loggers import warnUnless +from ..related.euroargo_api import get_ea_profile_page +from ..utils import check_wmo, check_cyc from ..errors import InvalidDashboard from .. import __version__ as argopy_version diff --git a/argopy/plot/plot.py b/argopy/plot/plot.py index d9e52c12..7e8c1469 100644 --- a/argopy/plot/plot.py +++ b/argopy/plot/plot.py @@ -18,7 +18,8 @@ from .utils import axes_style, latlongrid, land_feature from .argo_colors import ArgoColors -from ..utilities import warnUnless, check_wmo +from ..utils.loggers import warnUnless +from ..utils.checkers import check_wmo from ..errors import InvalidDatasetStructure if has_mpl: diff --git a/argopy/plot/utils.py b/argopy/plot/utils.py index d4ca7577..234d7710 100644 --- a/argopy/plot/utils.py +++ b/argopy/plot/utils.py @@ -1,7 +1,7 @@ import numpy as np from contextlib import contextmanager import importlib -from ..utilities import deprecated +from ..utils.decorators import deprecated def _importorskip(modname): diff --git a/argopy/plotters.py b/argopy/plotters.py deleted file mode 100644 index 08b12669..00000000 --- a/argopy/plotters.py +++ /dev/null @@ -1,40 +0,0 @@ -import warnings -warnings.filterwarnings("default", category=DeprecationWarning, module=__name__) - - -def deprecation_of_plotters(): - warnings.warn( - "The 'argopy.plotters' has been replaced by 'argopy.plot'. After 0.1.13, importing 'plotters' " - "will raise an error. You're seeing this message because you called this function through " - "the argopy 'plotters' module.", - category=DeprecationWarning, - stacklevel=2, - ) - - -def open_dashboard(*args, **kwargs): - deprecation_of_plotters() - from .plot import dashboard - return dashboard(*args, **kwargs) - - -def open_sat_altim_report(*args, **kwargs): - deprecation_of_plotters() - from .plot import open_sat_altim_report - return open_sat_altim_report(*args, **kwargs) - - -def plot_trajectory(*args, **kwargs): - deprecation_of_plotters() - from .plot import plot_trajectory - return plot_trajectory(*args, **kwargs) - - -def bar_plot(*args, **kwargs): - deprecation_of_plotters() - from .plot import bar_plot - return bar_plot(*args, **kwargs) - - -if __name__ == "argopy.plotters": - deprecation_of_plotters() diff --git a/argopy/related/__init__.py b/argopy/related/__init__.py index e73ca216..68f16bd7 100644 --- a/argopy/related/__init__.py +++ b/argopy/related/__init__.py @@ -1,7 +1,25 @@ -from .gdac_snapshot import ArgoDOI +from .topography import TopoFetcher +from .ocean_ops_deployments import OceanOPSDeployments +from .reference_tables import ArgoNVSReferenceTables +from .argo_documentation import ArgoDocs +from .doi_snapshot import ArgoDOI +from .euroargo_api import get_coriolis_profile_id, get_ea_profile_page +from .utils import load_dict, mapp_dict # __all__ = ( # Classes: + "TopoFetcher", + "OceanOPSDeployments", + "ArgoNVSReferenceTables", + "ArgoDocs", "ArgoDOI", + + # Functions: + "get_coriolis_profile_id", + "get_ea_profile_page", + + # Utilities: + "load_dict", + "mapp_dict", ) diff --git a/argopy/related/argo_documentation.py b/argopy/related/argo_documentation.py new file mode 100644 index 00000000..a182e035 --- /dev/null +++ b/argopy/related/argo_documentation.py @@ -0,0 +1,223 @@ +import os +import json +import pandas as pd +from functools import lru_cache +from ..stores import httpstore +from ..options import OPTIONS +from .utils import path2assets + + +# Load the ADMT documentation catalogue: +with open(os.path.join(path2assets, "admt_documentation_catalogue.json"), "rb") as f: + ADMT_CATALOGUE = json.load(f)['data']['catalogue'] + + +class ArgoDocs: + """ADMT documentation helper class + + Examples + -------- + >>> ArgoDocs().list + >>> ArgoDocs().search("CDOM") + >>> ArgoDocs().search("CDOM", where='abstract') + + >>> ArgoDocs(35385) + >>> ArgoDocs(35385).ris + >>> ArgoDocs(35385).abstract + >>> ArgoDocs(35385).show() + >>> ArgoDocs(35385).open_pdf() + >>> ArgoDocs(35385).open_pdf(page=12) + + """ + _catalogue = ADMT_CATALOGUE + + class RIS: + """RIS file structure from TXT file""" + + def __init__(self, file=None, fs=None): + self.record = None + self.fs = fs + if file: + self.parse(file) + + def parse(self, file): + """Parse input file""" + # log.debug(file) + + with self.fs.open(file, 'r', encoding="utf-8") as f: + TXTlines = f.readlines() + lines = [] + # Eliminate blank lines + for line in TXTlines: + line = line.strip() + if len(line) > 0: + lines.append(line) + TXTlines = lines + + # + record = {} + for line in TXTlines: + # print("\n>", line) + if len(line) > 2: + if line[2] == " ": + tag = line[0:2] + field = line[3:] + # print("ok", {tag: field}) + record[tag] = [field] + else: + # print("-", line) + record[tag].append(line) + elif len(line) == 2: + record[line] = [] + # else: + # print("*", line) + + for key in record.keys(): + record[key] = "; ".join(record[key]) + + self.record = record + + @lru_cache + def __init__(self, docid=None, cache=False): + self.docid = None + self._ris = None + self._risfile = None + self._fs = httpstore(cache=cache, cachedir=OPTIONS['cachedir']) + self._doiserver = "https://dx.doi.org" + self._archimer = "https://archimer.ifremer.fr" + + if isinstance(docid, int): + if docid in [doc['id'] for doc in self._catalogue]: + self.docid = docid + else: + raise ValueError("Unknown document id") + elif isinstance(docid, str): + start_with = lambda f, x: f[0:len(x)] == x if len(x) <= len(f) else False # noqa: E731 + if start_with(docid, '10.13155/') and docid in [doc['doi'] for doc in self._catalogue]: + self.docid = [doc['id'] for doc in self._catalogue if docid == doc['doi']][0] + else: + raise ValueError("'docid' must be an integer or a valid Argo DOI") + + def __repr__(self): + summary = [""] + if self.docid is not None: + doc = [doc for doc in self._catalogue if doc['id'] == self.docid][0] + summary.append("Title: %s" % doc['title']) + summary.append("DOI: %s" % doc['doi']) + summary.append("url: https://dx.doi.org/%s" % doc['doi']) + summary.append("last pdf: %s" % self.pdf) + if 'AF' in self.ris: + summary.append("Authors: %s" % self.ris['AF']) + summary.append("Abstract: %s" % self.ris['AB']) + else: + summary.append("- %i documents with a DOI are available in the catalogue" % len(self._catalogue)) + summary.append("- Use the method 'search' to find a document id") + summary.append("- Use the property 'list' to check out the catalogue") + return "\n".join(summary) + + @property + def list(self): + """List of all available documents as a :class:`pandas.DataFrame`""" + return pd.DataFrame(self._catalogue) + + @property + def js(self): + """Internal json record for a document""" + if self.docid is not None: + return [doc for doc in self._catalogue if doc['id'] == self.docid][0] + else: + raise ValueError("Select a document first !") + + @property + def ris(self): + """RIS record of a document""" + if self.docid is not None: + if self._ris is None: + # Fetch RIS metadata for this document: + import re + file = self._fs.download_url("%s/%s" % (self._doiserver, self.js['doi'])) + x = re.search(r']*)rel="nofollow">TXT<\/a>', + str(file)) + export_txt_url = x[1].replace("https://archimer.ifremer.fr", self._archimer) + self._risfile = export_txt_url + self._ris = self.RIS(export_txt_url, fs=self._fs).record + return self._ris + else: + raise ValueError("Select a document first !") + + @property + def abstract(self): + """Abstract of a document""" + if self.docid is not None: + return self.ris['AB'] + else: + raise ValueError("Select a document first !") + + @property + def pdf(self): + """Link to the online pdf version of a document""" + if self.docid is not None: + return self.ris['UR'] + else: + raise ValueError("Select a document first !") + + def show(self, height=800): + """Insert document in pdf in a notebook cell + + Parameters + ---------- + height: int + Height in pixels of the cell + """ + if self.docid is not None: + from IPython.core.display import HTML + return HTML( + '' % (self.ris['UR'], height)) + else: + raise ValueError("Select a document first !") + + def open_pdf(self, page=None, url_only=False): + """Open document in new browser tab + + Parameters + ---------- + page: int, optional + Open directly a specific page number + """ + url = self.pdf + url += '#view=FitV&pagemode=thumbs' + if page: + url += '&page=%i' % page + if self.docid is not None: + if not url_only: + import webbrowser + webbrowser.open_new(url) + else: + return url + else: + raise ValueError("Select a document first !") + + def search(self, txt, where='title'): + """Search for string in all documents title or abstract + + Parameters + ---------- + txt: str + where: str, default='title' + Where to search, can be 'title' or 'abstract' + + Returns + ------- + list + + """ + results = [] + for doc in self.list.iterrows(): + docid = doc[1]['id'] + if where == 'title': + if txt.lower() in ArgoDocs(docid).js['title'].lower(): + results.append(docid) + elif where == 'abstract': + if txt.lower() in ArgoDocs(docid).abstract.lower(): + results.append(docid) + return results diff --git a/argopy/related/gdac_snapshot.py b/argopy/related/doi_snapshot.py similarity index 100% rename from argopy/related/gdac_snapshot.py rename to argopy/related/doi_snapshot.py diff --git a/argopy/related/euroargo_api.py b/argopy/related/euroargo_api.py new file mode 100644 index 00000000..a1bbf173 --- /dev/null +++ b/argopy/related/euroargo_api.py @@ -0,0 +1,101 @@ +import pandas as pd +from ..options import OPTIONS +from ..utils.checkers import check_wmo, check_cyc +from ..stores import httpstore + + +def get_coriolis_profile_id(WMO, CYC=None, **kwargs): + """ Return a :class:`pandas.DataFrame` with CORIOLIS ID of WMO/CYC profile pairs + + This method get ID by requesting the dataselection.euro-argo.eu trajectory API. + + Parameters + ---------- + WMO: int, list(int) + Define the list of Argo floats. This is a list of integers with WMO float identifiers. + WMO is the World Meteorological Organization. + CYC: int, list(int) + Define the list of cycle numbers to load ID for each Argo floats listed in ``WMO``. + + Returns + ------- + :class:`pandas.DataFrame` + """ + WMO_list = check_wmo(WMO) + if CYC is not None: + CYC_list = check_cyc(CYC) + if 'api_server' in kwargs: + api_server = kwargs['api_server'] + elif OPTIONS['server'] is not None: + api_server = OPTIONS['server'] + else: + api_server = "https://dataselection.euro-argo.eu/api" + URIs = [api_server + "/trajectory/%i" % wmo for wmo in WMO_list] + + def prec(data, url): + # Transform trajectory json to dataframe + # See: https://dataselection.euro-argo.eu/swagger-ui.html#!/cycle-controller/getCyclesByPlatformCodeUsingGET + WMO = check_wmo(url.split("/")[-1])[0] + rows = [] + for profile in data: + keys = [x for x in profile.keys() if x not in ["coordinate"]] + meta_row = dict((key, profile[key]) for key in keys) + for row in profile["coordinate"]: + meta_row[row] = profile["coordinate"][row] + meta_row["WMO"] = WMO + rows.append(meta_row) + return pd.DataFrame(rows) + + fs = httpstore(cache=True, cachedir=OPTIONS['cachedir']) + data = fs.open_mfjson(URIs, preprocess=prec, errors="raise", url_follow=True) + + # Merge results (list of dataframe): + key_map = { + "id": "ID", + "lat": "LATITUDE", + "lon": "LONGITUDE", + "cvNumber": "CYCLE_NUMBER", + "level": "level", + "WMO": "PLATFORM_NUMBER", + } + for i, df in enumerate(data): + df = df.reset_index() + df = df.rename(columns=key_map) + df = df[[value for value in key_map.values() if value in df.columns]] + data[i] = df + df = pd.concat(data, ignore_index=True) + df.sort_values(by=["PLATFORM_NUMBER", "CYCLE_NUMBER"], inplace=True) + df = df.reset_index(drop=True) + # df = df.set_index(["PLATFORM_NUMBER", "CYCLE_NUMBER"]) + df = df.astype({"ID": int}) + if CYC is not None: + df = pd.concat([df[df["CYCLE_NUMBER"] == cyc] for cyc in CYC_list]).reset_index( + drop=True + ) + return df[ + ["PLATFORM_NUMBER", "CYCLE_NUMBER", "ID", "LATITUDE", "LONGITUDE", "level"] + ] + + +def get_ea_profile_page(WMO, CYC=None, **kwargs): + """ Return a list of URL + + Parameters + ---------- + WMO: int, list(int) + WMO must be an integer or an iterable with elements that can be casted as integers + CYC: int, list(int), default (None) + CYC must be an integer or an iterable with elements that can be casted as positive integers + + Returns + ------- + list(str) + + See also + -------- + get_coriolis_profile_id + """ + df = get_coriolis_profile_id(WMO, CYC, **kwargs) + url = "https://dataselection.euro-argo.eu/cycle/{}" + return [url.format(this_id) for this_id in sorted(df["ID"])] + diff --git a/argopy/related/ocean_ops_deployments.py b/argopy/related/ocean_ops_deployments.py new file mode 100644 index 00000000..3350ebd6 --- /dev/null +++ b/argopy/related/ocean_ops_deployments.py @@ -0,0 +1,440 @@ +import pandas as pd +import numpy as np +from ..stores import httpstore +from ..errors import DataNotFound +from ..plot import scatter_map + + +class OceanOPSDeployments: + """Use the OceanOPS API for metadata access to retrieve Argo floats deployment information. + + The API is documented here: https://www.ocean-ops.org/api/swagger/?url=https://www.ocean-ops.org/api/1/oceanops-api.yaml + + Description of deployment status name: + + =========== == ==== + Status Id Description + =========== == ==== + PROBABLE 0 Starting status for some platforms, when there is only a few metadata available, like rough deployment location and date. The platform may be deployed + CONFIRMED 1 Automatically set when a ship is attached to the deployment information. The platform is ready to be deployed, deployment is planned + REGISTERED 2 Starting status for most of the networks, when deployment planning is not done. The deployment is certain, and a notification has been sent via the OceanOPS system + OPERATIONAL 6 Automatically set when the platform is emitting a pulse and observations are distributed within a certain time interval + INACTIVE 4 The platform is not emitting a pulse since a certain time + CLOSED 5 The platform is not emitting a pulse since a long time, it is considered as dead + =========== == ==== + + Examples + -------- + + Import the class: + + >>> from argopy.related import OceanOPSDeployments + >>> from argopy import OceanOPSDeployments + + Possibly define the space/time box to work with: + + >>> box = [-20, 0, 42, 51] + >>> box = [-20, 0, 42, 51, '2020-01', '2021-01'] + >>> box = [-180, 180, -90, 90, '2020-01', None] + + Instantiate the metadata fetcher: + + >>> deployment = OceanOPSDeployments() + >>> deployment = OceanOPSDeployments(box) + >>> deployment = OceanOPSDeployments(box, deployed_only=True) # Remove planification + + Load information: + + >>> df = deployment.to_dataframe() + >>> data = deployment.to_json() + + Useful attributes and methods: + + >>> deployment.uri + >>> deployment.uri_decoded + >>> deployment.status_code + >>> fig, ax = deployment.plot_status() + >>> plan_virtualfleet = deployment.plan + + """ + + api = "https://www.ocean-ops.org" + """URL to the API""" + + model = "api/1/data/platform" + """This model represents a Platform entity and is used to retrieve a platform information (schema model + named 'Ptf').""" + + api_server_check = "https://www.ocean-ops.org/api/1/oceanops-api.yaml" + """URL to check if the API is alive""" + + def __init__(self, box: list = None, deployed_only: bool = False): + """ + + Parameters + ---------- + box: list, optional, default=None + Define the domain to load the Argo deployment plan for. By default, **box** is set to None to work with the + global deployment plan starting from the current date. + The list expects one of the following format: + + - [lon_min, lon_max, lat_min, lat_max] + - [lon_min, lon_max, lat_min, lat_max, date_min] + - [lon_min, lon_max, lat_min, lat_max, date_min, date_max] + + Longitude and latitude values must be floats. Dates are strings. + If **box** is provided with a regional domain definition (only 4 values given), then ``date_min`` will be + set to the current date. + + deployed_only: bool, optional, default=False + Return only floats already deployed. If set to False (default), will return the full + deployment plan (floats with all possible status). If set to True, will return only floats with one of the + following status: ``OPERATIONAL``, ``INACTIVE``, and ``CLOSED``. + """ + if box is None: + box = [ + None, + None, + None, + None, + pd.to_datetime("now", utc=True).strftime("%Y-%m-%d"), + None, + ] + elif len(box) == 4: + box.append(pd.to_datetime("now", utc=True).strftime("%Y-%m-%d")) + box.append(None) + elif len(box) == 5: + box.append(None) + + if len(box) != 6: + raise ValueError( + "The 'box' argument must be: None or of lengths 4 or 5 or 6\n%s" + % str(box) + ) + + self.box = box + self.deployed_only = deployed_only + self.data = None + + self.fs = httpstore(cache=False) + + def __format(self, x, typ: str) -> str: + """string formatting helper""" + if typ == "lon": + return str(x) if x is not None else "-" + elif typ == "lat": + return str(x) if x is not None else "-" + elif typ == "tim": + return pd.to_datetime(x).strftime("%Y-%m-%d") if x is not None else "-" + else: + return str(x) + + def __repr__(self): + summary = [""] + summary.append("API: %s/%s" % (self.api, self.model)) + summary.append("Domain: %s" % self.box_name) + summary.append("Deployed only: %s" % self.deployed_only) + if self.data is not None: + summary.append("Nb of floats in the deployment plan: %s" % self.size) + else: + summary.append( + "Nb of floats in the deployment plan: - [Data not retrieved yet]" + ) + return "\n".join(summary) + + def __encode_inc(self, inc): + """Return encoded uri expression for 'include' parameter + + Parameters + ---------- + inc: str + + Returns + ------- + str + """ + return inc.replace('"', "%22").replace("[", "%5B").replace("]", "%5D") + + def __encode_exp(self, exp): + """Return encoded uri expression for 'exp' parameter + + Parameters + ---------- + exp: str + + Returns + ------- + str + """ + return ( + exp.replace('"', "%22") + .replace("'", "%27") + .replace(" ", "%20") + .replace(">", "%3E") + .replace("<", "%3C") + ) + + def __get_uri(self, encoded=False): + uri = "exp=%s&include=%s" % ( + self.exp(encoded=encoded), + self.include(encoded=encoded), + ) + url = "%s/%s?%s" % (self.api, self.model, uri) + return url + + def include(self, encoded=False): + """Return an Ocean-Ops API 'include' expression + + This is used to determine which variables the API call should return + + Parameters + ---------- + encoded: bool, default=False + + Returns + ------- + str + """ + # inc = ["ref", "ptfDepl.lat", "ptfDepl.lon", "ptfDepl.deplDate", "ptfStatus", "wmos"] + # inc = ["ref", "ptfDepl.lat", "ptfDepl.lon", "ptfDepl.deplDate", "ptfStatus.id", "ptfStatus.name", "wmos"] + # inc = ["ref", "ptfDepl.lat", "ptfDepl.lon", "ptfDepl.deplDate", "ptfStatus.id", "ptfStatus.name"] + inc = [ + "ref", + "ptfDepl.lat", + "ptfDepl.lon", + "ptfDepl.deplDate", + "ptfStatus.id", + "ptfStatus.name", + "ptfStatus.description", + "program.nameShort", + "program.country.nameShort", + "ptfModel.nameShort", + "ptfDepl.noSite", + ] + inc = "[%s]" % ",".join(['"%s"' % v for v in inc]) + return inc if not encoded else self.__encode_inc(inc) + + def exp(self, encoded=False): + """Return an Ocean-Ops API deployment search expression for an argopy region box definition + + Parameters + ---------- + encoded: bool, default=False + + Returns + ------- + str + """ + exp, arg = "networkPtfs.network.name='Argo'", [] + if self.box[0] is not None: + exp += " and ptfDepl.lon>=$var%i" % (len(arg) + 1) + arg.append(str(self.box[0])) + if self.box[1] is not None: + exp += " and ptfDepl.lon<=$var%i" % (len(arg) + 1) + arg.append(str(self.box[1])) + if self.box[2] is not None: + exp += " and ptfDepl.lat>=$var%i" % (len(arg) + 1) + arg.append(str(self.box[2])) + if self.box[3] is not None: + exp += " and ptfDepl.lat<=$var%i" % (len(arg) + 1) + arg.append(str(self.box[3])) + if len(self.box) > 4: + if self.box[4] is not None: + exp += " and ptfDepl.deplDate>=$var%i" % (len(arg) + 1) + arg.append( + '"%s"' % pd.to_datetime(self.box[4]).strftime("%Y-%m-%d %H:%M:%S") + ) + if self.box[5] is not None: + exp += " and ptfDepl.deplDate<=$var%i" % (len(arg) + 1) + arg.append( + '"%s"' % pd.to_datetime(self.box[5]).strftime("%Y-%m-%d %H:%M:%S") + ) + + if self.deployed_only: + exp += " and ptfStatus>=$var%i" % (len(arg) + 1) + arg.append(str(4)) # Allow for: 4, 5 or 6 + + exp = '["%s", %s]' % (exp, ", ".join(arg)) + return exp if not encoded else self.__encode_exp(exp) + + @property + def size(self): + return len(self.data["data"]) if self.data is not None else None + + @property + def status_code(self): + """Return a :class:`pandas.DataFrame` with the definition of status""" + status = { + "status_code": [0, 1, 2, 6, 4, 5], + "status_name": [ + "PROBABLE", + "CONFIRMED", + "REGISTERED", + "OPERATIONAL", + "INACTIVE", + "CLOSED", + ], + "description": [ + "Starting status for some platforms, when there is only a few metadata available, like rough deployment location and date. The platform may be deployed", + "Automatically set when a ship is attached to the deployment information. The platform is ready to be deployed, deployment is planned", + "Starting status for most of the networks, when deployment planning is not done. The deployment is certain, and a notification has been sent via the OceanOPS system", + "Automatically set when the platform is emitting a pulse and observations are distributed within a certain time interval", + "The platform is not emitting a pulse since a certain time", + "The platform is not emitting a pulse since a long time, it is considered as dead", + ], + } + return pd.DataFrame(status).set_index("status_code") + + @property + def box_name(self): + """Return a string to print the box property""" + BOX = self.box + cname = ("[lon=%s/%s; lat=%s/%s]") % ( + self.__format(BOX[0], "lon"), + self.__format(BOX[1], "lon"), + self.__format(BOX[2], "lat"), + self.__format(BOX[3], "lat"), + ) + if len(BOX) == 6: + cname = ("[lon=%s/%s; lat=%s/%s; t=%s/%s]") % ( + self.__format(BOX[0], "lon"), + self.__format(BOX[1], "lon"), + self.__format(BOX[2], "lat"), + self.__format(BOX[3], "lat"), + self.__format(BOX[4], "tim"), + self.__format(BOX[5], "tim"), + ) + return cname + + @property + def uri(self): + """Return encoded URL to post an Ocean-Ops API request + + Returns + ------- + str + """ + return self.__get_uri(encoded=True) + + @property + def uri_decoded(self): + """Return decoded URL to post an Ocean-Ops API request + + Returns + ------- + str + """ + return self.__get_uri(encoded=False) + + @property + def plan(self): + """Return a dictionary to be used as argument in a :class:`virtualargofleet.VirtualFleet` + + This method is for dev, but will be moved to the VirtualFleet software utilities + """ + df = self.to_dataframe() + plan = ( + df[["lon", "lat", "date"]] + .rename(columns={"date": "time"}) + .to_dict("series") + ) + for key in plan.keys(): + plan[key] = plan[key].to_list() + plan["time"] = np.array(plan["time"], dtype="datetime64") + return plan + + def to_json(self): + """Return OceanOPS API request response as a json object""" + if self.data is None: + self.data = self.fs.open_json(self.uri) + return self.data + + def to_dataframe(self): + """Return the deployment plan as :class:`pandas.DataFrame` + + Returns + ------- + :class:`pandas.DataFrame` + """ + data = self.to_json() + if data["total"] == 0: + raise DataNotFound("Your search matches no results") + + # res = {'date': [], 'lat': [], 'lon': [], 'wmo': [], 'status_name': [], 'status_code': []} + # res = {'date': [], 'lat': [], 'lon': [], 'wmo': [], 'status_name': [], 'status_code': [], 'ship_name': []} + res = { + "date": [], + "lat": [], + "lon": [], + "wmo": [], + "status_name": [], + "status_code": [], + "program": [], + "country": [], + "model": [], + } + # status = {'REGISTERED': None, 'OPERATIONAL': None, 'INACTIVE': None, 'CLOSED': None, + # 'CONFIRMED': None, 'OPERATIONAL': None, 'PROBABLE': None, 'REGISTERED': None} + + for irow, ptf in enumerate(data["data"]): + # if irow == 0: + # print(ptf) + res["lat"].append(ptf["ptfDepl"]["lat"]) + res["lon"].append(ptf["ptfDepl"]["lon"]) + res["date"].append(ptf["ptfDepl"]["deplDate"]) + res["wmo"].append(ptf["ref"]) + # res['wmo'].append(ptf['wmos'][-1]['wmo']) + # res['wmo'].append(float_wmo(ptf['ref'])) # will not work for some CONFIRMED, PROBABLE or REGISTERED floats + # res['wmo'].append(float_wmo(ptf['wmos'][-1]['wmo'])) + res["status_code"].append(ptf["ptfStatus"]["id"]) + res["status_name"].append(ptf["ptfStatus"]["name"]) + + # res['ship_name'].append(ptf['ptfDepl']['shipName']) + program = ( + ptf["program"]["nameShort"].replace("_", " ") + if ptf["program"]["nameShort"] + else ptf["program"]["nameShort"] + ) + res["program"].append(program) + res["country"].append(ptf["program"]["country"]["nameShort"]) + res["model"].append(ptf["ptfModel"]["nameShort"]) + + # if status[ptf['ptfStatus']['name']] is None: + # status[ptf['ptfStatus']['name']] = ptf['ptfStatus']['description'] + + df = pd.DataFrame(res) + df = df.astype({"date": "datetime64[s]"}) + df = df.sort_values(by="date").reset_index(drop=True) + # df = df[ (df['status_name'] == 'CLOSED') | (df['status_name'] == 'OPERATIONAL')] # Select only floats that have been deployed and returned data + # print(status) + return df + + def plot_status(self, **kwargs): + """Quick plot of the deployment plan + + Named arguments are passed to :class:`plot.scatter_map` + + Returns + ------- + fig: :class:`matplotlib.figure.Figure` + ax: :class:`matplotlib.axes.Axes` + """ + df = self.to_dataframe() + fig, ax = scatter_map( + df, + x="lon", + y="lat", + hue="status_code", + traj=False, + cmap="deployment_status", + **kwargs + ) + ax.set_title( + "Argo network deployment plan\n%s\nSource: OceanOPS API as of %s" + % ( + self.box_name, + pd.to_datetime("now", utc=True).strftime("%Y-%m-%d %H:%M:%S"), + ), + fontsize=12, + ) + return fig, ax diff --git a/argopy/related/reference_tables.py b/argopy/related/reference_tables.py new file mode 100644 index 00000000..622eee7f --- /dev/null +++ b/argopy/related/reference_tables.py @@ -0,0 +1,245 @@ +import pandas as pd +from functools import lru_cache +import collections +from ..stores import httpstore +from ..options import OPTIONS + + +class ArgoNVSReferenceTables: + """Argo Reference Tables + + Utility function to retrieve Argo Reference Tables from a NVS server. + + By default, this relies on: https://vocab.nerc.ac.uk/collection + + Examples + -------- + Methods: + + >>> R = ArgoNVSReferenceTables() + >>> R.search('sensor') + >>> R.tbl(3) + >>> R.tbl('R09') + + Properties: + + >>> R.all_tbl_name + >>> R.all_tbl + >>> R.valid_ref + + """ + + valid_ref = [ + "R01", + "RR2", + "RD2", + "RP2", + "R03", + "R04", + "R05", + "R06", + "R07", + "R08", + "R09", + "R10", + "R11", + "R12", + "R13", + "R15", + "RMC", + "RTV", + "R16", + # "R18", + "R19", + "R20", + "R21", + "R22", + "R23", + "R24", + "R25", + "R26", + "R27", + # "R28", + # "R29", + # "R30", + "R40", + ] + """List of all available Reference Tables""" + + def __init__( + self, + nvs="https://vocab.nerc.ac.uk/collection", + cache: bool = True, + cachedir: str = "", + ): + """Argo Reference Tables from NVS""" + + cachedir = OPTIONS["cachedir"] if cachedir == "" else cachedir + self.fs = httpstore(cache=cache, cachedir=cachedir) + self.nvs = nvs + + def _valid_ref(self, rtid): + if rtid not in self.valid_ref: + rtid = "R%0.2d" % rtid + if rtid not in self.valid_ref: + raise ValueError( + "Invalid Argo Reference Table, should be one in: %s" + % ", ".join(self.valid_ref) + ) + return rtid + + def _jsConcept2df(self, data): + """Return all skos:Concept as class:`pandas.DataFrame`""" + content = { + "altLabel": [], + "prefLabel": [], + "definition": [], + "deprecated": [], + "id": [], + } + for k in data["@graph"]: + if k["@type"] == "skos:Collection": + Collection_name = k["alternative"] + elif k["@type"] == "skos:Concept": + content["altLabel"].append(k["altLabel"]) + content["prefLabel"].append(k["prefLabel"]["@value"]) + content["definition"].append(k["definition"]["@value"]) + content["deprecated"].append(k["deprecated"]) + content["id"].append(k["@id"]) + df = pd.DataFrame.from_dict(content) + df.name = Collection_name + return df + + def _jsCollection(self, data): + """Return last skos:Collection information as data""" + for k in data["@graph"]: + if k["@type"] == "skos:Collection": + name = k["alternative"] + desc = k["description"] + rtid = k["@id"] + return (name, desc, rtid) + + def get_url(self, rtid, fmt="ld+json"): + """Return URL toward a given reference table for a given format + + Parameters + ---------- + rtid: {str, int} + Name or number of the reference table to retrieve. Eg: 'R01', 12 + fmt: str, default: "ld+json" + Format of the NVS server response. Can be: "ld+json", "rdf+xml" or "text/turtle". + + Returns + ------- + str + """ + rtid = self._valid_ref(rtid) + if fmt == "ld+json": + fmt_ext = "?_profile=nvs&_mediatype=application/ld+json" + elif fmt == "rdf+xml": + fmt_ext = "?_profile=nvs&_mediatype=application/rdf+xml" + elif fmt == "text/turtle": + fmt_ext = "?_profile=nvs&_mediatype=text/turtle" + else: + raise ValueError( + "Invalid format. Must be in: 'ld+json', 'rdf+xml' or 'text/turtle'." + ) + url = "{}/{}/current/{}".format + return url(self.nvs, rtid, fmt_ext) + + @lru_cache + def tbl(self, rtid): + """Return an Argo Reference table + + Parameters + ---------- + rtid: {str, int} + Name or number of the reference table to retrieve. Eg: 'R01', 12 + + Returns + ------- + class:`pandas.DataFrame` + """ + rtid = self._valid_ref(rtid) + js = self.fs.open_json(self.get_url(rtid)) + df = self._jsConcept2df(js) + return df + + def tbl_name(self, rtid): + """Return name of an Argo Reference table + + Parameters + ---------- + rtid: {str, int} + Name or number of the reference table to retrieve. Eg: 'R01', 12 + + Returns + ------- + tuple('short name', 'description', 'NVS id link') + """ + rtid = self._valid_ref(rtid) + js = self.fs.open_json(self.get_url(rtid)) + return self._jsCollection(js) + + def search(self, txt, where="all"): + """Search for string in tables title and/or description + + Parameters + ---------- + txt: str + where: str, default='all' + Where to search, can be: 'title', 'description', 'all' + + Returns + ------- + list of table id matching the search + """ + results = [] + for tbl_id in self.all_tbl_name: + title = self.tbl_name(tbl_id)[0] + description = self.tbl_name(tbl_id)[1] + if where == "title": + if txt.lower() in title.lower(): + results.append(tbl_id) + elif where == "description": + if txt.lower() in description.lower(): + results.append(tbl_id) + elif where == "all": + if txt.lower() in description.lower() or txt.lower() in title.lower(): + results.append(tbl_id) + return results + + @property + def all_tbl(self): + """Return all Argo Reference tables + + Returns + ------- + OrderedDict + Dictionary with all table short names as key and table content as class:`pandas.DataFrame` + """ + URLs = [self.get_url(rtid) for rtid in self.valid_ref] + df_list = self.fs.open_mfjson(URLs, preprocess=self._jsConcept2df) + all_tables = {} + [all_tables.update({t.name: t}) for t in df_list] + all_tables = collections.OrderedDict(sorted(all_tables.items())) + return all_tables + + @property + def all_tbl_name(self): + """Return names of all Argo Reference tables + + Returns + ------- + OrderedDict + Dictionary with all table short names as key and table names as tuple('short name', 'description', 'NVS id link') + """ + URLs = [self.get_url(rtid) for rtid in self.valid_ref] + name_list = self.fs.open_mfjson(URLs, preprocess=self._jsCollection) + all_tables = {} + [ + all_tables.update({rtid.split("/")[-3]: (name, desc, rtid)}) + for name, desc, rtid in name_list + ] + all_tables = collections.OrderedDict(sorted(all_tables.items())) + return all_tables diff --git a/argopy/related/topography.py b/argopy/related/topography.py new file mode 100644 index 00000000..c03b7618 --- /dev/null +++ b/argopy/related/topography.py @@ -0,0 +1,207 @@ +from typing import Union +from ..options import OPTIONS +from ..stores import httpstore +from ..utils.format import format_oneline + + +class TopoFetcher: + """Fetch topographic data through an ERDDAP server for an ocean rectangle + + Example: + >>> from argopy import TopoFetcher + >>> box = [-75, -45, 20, 30] # Lon_min, lon_max, lat_min, lat_max + >>> ds = TopoFetcher(box).to_xarray() + >>> ds = TopoFetcher(box, ds='gebco', stride=[10, 10], cache=True).to_xarray() + + """ + + class ERDDAP: + def __init__(self, server: str, protocol: str = "tabledap"): + self.server = server + self.protocol = protocol + self.response = "nc" + self.dataset_id = "" + self.constraints = "" + + def __init__( + self, + box: list, + ds: str = "gebco", + cache: bool = False, + cachedir: str = "", + api_timeout: int = 0, + stride: list = [1, 1], + server: Union[str] = None, + **kwargs, + ): + """Instantiate an ERDDAP topo data fetcher + + Parameters + ---------- + ds: str (optional), default: 'gebco' + Dataset to load: + + - 'gebco' will load the GEBCO_2020 Grid, a continuous terrain model for oceans and land at 15 arc-second intervals + stride: list, default [1, 1] + Strides along longitude and latitude. This allows to change the output resolution + cache: bool (optional) + Cache data or not (default: False) + cachedir: str (optional) + Path to cache folder + api_timeout: int (optional) + Erddap request time out in seconds. Set to OPTIONS['api_timeout'] by default. + """ + timeout = OPTIONS["api_timeout"] if api_timeout == 0 else api_timeout + self.fs = httpstore( + cache=cache, cachedir=cachedir, timeout=timeout, size_policy="head" + ) + self.definition = "Erddap topographic data fetcher" + + self.BOX = box + self.stride = stride + if ds == "gebco": + self.definition = "NOAA erddap gebco data fetcher for a space region" + self.server = ( + server + if server is not None + else "https://coastwatch.pfeg.noaa.gov/erddap" + ) + self.server_name = "NOAA" + self.dataset_id = "gebco" + + self._init_erddap() + + def _init_erddap(self): + # Init erddap + self.erddap = self.ERDDAP(server=self.server, protocol="griddap") + self.erddap.response = "nc" + + if self.dataset_id == "gebco": + self.erddap.dataset_id = "GEBCO_2020" + else: + raise ValueError( + "Invalid database short name for %s erddap" % self.server_name + ) + return self + + def _cname(self) -> str: + """Fetcher one line string definition helper""" + cname = "?" + + if hasattr(self, "BOX"): + BOX = self.BOX + cname = ("[x=%0.2f/%0.2f; y=%0.2f/%0.2f]") % ( + BOX[0], + BOX[1], + BOX[2], + BOX[3], + ) + return cname + + def __repr__(self): + summary = [""] + summary.append("Name: %s" % self.definition) + summary.append("API: %s" % self.server) + summary.append("Domain: %s" % format_oneline(self.cname())) + return "\n".join(summary) + + def cname(self): + """Return a unique string defining the constraints""" + return self._cname() + + @property + def cachepath(self): + """Return path to cached file(s) for this request + + Returns + ------- + list(str) + """ + return [self.fs.cachepath(uri) for uri in self.uri] + + def define_constraints(self): + """Define request constraints""" + # Eg: https://coastwatch.pfeg.noaa.gov/erddap/griddap/GEBCO_2020.nc?elevation%5B(34):5:(42)%5D%5B(-21):7:(-12)%5D + self.erddap.constraints = "%s(%0.2f):%i:(%0.2f)%s%s(%0.2f):%i:(%0.2f)%s" % ( + "%5B", + self.BOX[2], + self.stride[1], + self.BOX[3], + "%5D", + "%5B", + self.BOX[0], + self.stride[0], + self.BOX[1], + "%5D", + ) + return None + + # @property + # def _minimal_vlist(self): + # """ Return the minimal list of variables to retrieve """ + # vlist = list() + # vlist.append("latitude") + # vlist.append("longitude") + # vlist.append("elevation") + # return vlist + + def url_encode(self, url): + """Return safely encoded list of urls + + This is necessary because fsspec cannot handle in cache paths/urls with a '[' character + """ + + # return urls + def safe_for_fsspec_cache(url): + url = url.replace("[", "%5B") # This is the one really necessary + url = url.replace("]", "%5D") # For consistency + return url + + return safe_for_fsspec_cache(url) + + def get_url(self): + """Return the URL to download data requested + + Returns + ------- + str + """ + # First part of the URL: + protocol = self.erddap.protocol + dataset_id = self.erddap.dataset_id + response = self.erddap.response + url = f"{self.erddap.server}/{protocol}/{dataset_id}.{response}?" + + # Add variables to retrieve: + variables = ["elevation"] + variables = ",".join(variables) + url += f"{variables}" + + # Add constraints: + self.define_constraints() # Define constraint to select this box of data (affect self.erddap.constraints) + url += f"{self.erddap.constraints}" + + return self.url_encode(url) + + @property + def uri(self): + """List of files to load for a request + + Returns + ------- + list(str) + """ + return [self.get_url()] + + def to_xarray(self, errors: str = "ignore"): + """Load Topographic data and return a xarray.DataSet""" + + # Download data + if len(self.uri) == 1: + ds = self.fs.open_dataset(self.uri[0]) + + return ds + + def load(self, errors: str = "ignore"): + """Load Topographic data and return a xarray.DataSet""" + return self.to_xarray(errors=errors) diff --git a/argopy/related/utils.py b/argopy/related/utils.py new file mode 100644 index 00000000..0463b102 --- /dev/null +++ b/argopy/related/utils.py @@ -0,0 +1,42 @@ +import importlib +import os +import json +from . import ArgoNVSReferenceTables + + +path2assets = importlib.util.find_spec('argopy.static.assets').submodule_search_locations[0] + + +def load_dict(ptype): + if ptype == "profilers": + try: + nvs = ArgoNVSReferenceTables(cache=True) + profilers = {} + for row in nvs.tbl(8).iterrows(): + profilers.update({int(row[1]['altLabel']): row[1]['prefLabel']}) + return profilers + except Exception: + with open(os.path.join(path2assets, "profilers.json"), "rb") as f: + loaded_dict = json.load(f)['data']['profilers'] + return loaded_dict + elif ptype == "institutions": + try: + nvs = ArgoNVSReferenceTables(cache=True) + institutions = {} + for row in nvs.tbl(4).iterrows(): + institutions.update({row[1]['altLabel']: row[1]['prefLabel']}) + return institutions + except Exception: + with open(os.path.join(path2assets, "institutions.json"), "rb") as f: + loaded_dict = json.load(f)['data']['institutions'] + return loaded_dict + else: + raise ValueError("Invalid dictionary name") + + +def mapp_dict(Adictionnary, Avalue): + if Avalue not in Adictionnary: + return "Unknown" + else: + return Adictionnary[Avalue] + diff --git a/argopy/static/assets/admt_documentation_catalogue.json b/argopy/static/assets/admt_documentation_catalogue.json new file mode 100644 index 00000000..fa0b28b5 --- /dev/null +++ b/argopy/static/assets/admt_documentation_catalogue.json @@ -0,0 +1,153 @@ +{ + "name": "ADMT documentation catalogue", + "long_name": "Titles and DOIs of all the official ADMT documentation", + "last_update": "2023-09-18T09:14:50.015167+00:00", + "data": { + "catalogue": [ + { + "category": "Argo data formats", + "title": "Argo user's manual", + "doi": "10.13155/29825", + "id": 29825 + }, + { + "category": "Quality control", + "title": "Argo Quality Control Manual for CTD and Trajectory Data", + "doi": "10.13155/33951", + "id": 33951 + }, + { + "category": "Quality control", + "title": "Argo quality control manual for dissolved oxygen concentration", + "doi": "10.13155/46542", + "id": 46542 + }, + { + "category": "Quality control", + "title": "Argo quality control manual for biogeochemical data", + "doi": "10.13155/40879", + "id": 40879 + }, + { + "category": "Quality control", + "title": "BGC-Argo quality control manual for the Chlorophyll-A concentration", + "doi": "10.13155/35385", + "id": 35385 + }, + { + "category": "Quality control", + "title": "BGC-Argo quality control manual for nitrate concentration", + "doi": "10.13155/84370", + "id": 84370 + }, + { + "category": "Quality control", + "title": "Quality control for BGC-Argo radiometry", + "doi": "10.13155/62466", + "id": 62466 + }, + { + "category": "Cookbooks", + "title": "Argo DAC profile cookbook", + "doi": "10.13155/41151", + "id": 41151 + }, + { + "category": "Cookbooks", + "title": "Argo DAC trajectory cookbook", + "doi": "10.13155/29824", + "id": 29824 + }, + { + "category": "Cookbooks", + "title": "DMQC Cookbook for Core Argo parameters", + "doi": "10.13155/78994", + "id": 78994 + }, + { + "category": "Cookbooks", + "title": "Processing Argo oxygen data at the DAC level", + "doi": "10.13155/39795", + "id": 39795 + }, + { + "category": "Cookbooks", + "title": "Processing Bio-Argo particle backscattering at the DAC level", + "doi": "10.13155/39459", + "id": 39459 + }, + { + "category": "Cookbooks", + "title": "Processing BGC-Argo chlorophyll-A concentration at the DAC level", + "doi": "10.13155/39468", + "id": 39468 + }, + { + "category": "Cookbooks", + "title": "Processing Argo measurement timing information at the DAC level", + "doi": "10.13155/47998", + "id": 47998 + }, + { + "category": "Cookbooks", + "title": "Processing BGC-Argo CDOM concentration at the DAC level", + "doi": "10.13155/54541", + "id": 54541 + }, + { + "category": "Cookbooks", + "title": "Processing Bio-Argo nitrate concentration at the DAC Level", + "doi": "10.13155/46121", + "id": 46121 + }, + { + "category": "Cookbooks", + "title": "Processing BGC-Argo Radiometric data at the DAC level", + "doi": "10.13155/51541", + "id": 51541 + }, + { + "category": "Cookbooks", + "title": "Processing BGC-Argo pH data at the DAC level", + "doi": "10.13155/57195", + "id": 57195 + }, + { + "category": "Cookbooks", + "title": "Description of the Argo GDAC File Checks: Data Format and Consistency Checks", + "doi": "10.13155/46120", + "id": 46120 + }, + { + "category": "Cookbooks", + "title": "Description of the Argo GDAC File Merge Process", + "doi": "10.13155/52154", + "id": 52154 + }, + { + "category": "Cookbooks", + "title": "BGC-Argo synthetic profile file processing and format on Coriolis GDAC", + "doi": "10.13155/55637", + "id": 55637 + }, + { + "category": "Cookbooks", + "title": "Argo GDAC cookbook", + "doi": "10.13155/46202", + "id": 46202 + }, + { + "category": "Cookbooks", + "title": "Processing BGC-Argo pH data at the DAC level", + "doi": "10.13155/57195", + "id": 57195 + }, + { + "category": "Cookbooks", + "title": "Processing BGC-Argo nitrate concentration at the DAC Level", + "doi": "10.13155/46121", + "id": 46121 + } + ] + } +} \ No newline at end of file diff --git a/argopy/stores/__init__.py b/argopy/stores/__init__.py index d5f892d9..498210e4 100644 --- a/argopy/stores/__init__.py +++ b/argopy/stores/__init__.py @@ -1,4 +1,4 @@ -from .argo_index_deprec import indexstore, indexfilter_wmo, indexfilter_box +# from .argo_index_deprec import indexstore, indexfilter_wmo, indexfilter_box from .filesystems import filestore, httpstore, memorystore, ftpstore from .filesystems import httpstore_erddap, httpstore_erddap_auth diff --git a/argopy/stores/argo_index_pa.py b/argopy/stores/argo_index_pa.py index 83d10ea4..239034b9 100644 --- a/argopy/stores/argo_index_pa.py +++ b/argopy/stores/argo_index_pa.py @@ -11,9 +11,6 @@ import gzip from packaging import version -from ..errors import DataNotFound, InvalidDatasetStructure -from ..utilities import check_index_cols, is_indexbox, check_wmo, check_cyc, to_list -from .argo_index_proto import ArgoIndexStoreProto try: import pyarrow.csv as csv # noqa: F401 import pyarrow as pa @@ -22,6 +19,11 @@ except ModuleNotFoundError: pass +from ..errors import DataNotFound, InvalidDatasetStructure +from ..utils.checkers import check_index_cols, is_indexbox, check_wmo, check_cyc +from ..utils.casting import to_list +from .argo_index_proto import ArgoIndexStoreProto + log = logging.getLogger("argopy.stores.index.pa") diff --git a/argopy/stores/argo_index_pd.py b/argopy/stores/argo_index_pd.py index 5e69daed..37d316db 100644 --- a/argopy/stores/argo_index_pd.py +++ b/argopy/stores/argo_index_pd.py @@ -10,7 +10,8 @@ import gzip from ..errors import DataNotFound, InvalidDatasetStructure -from ..utilities import check_index_cols, is_indexbox, check_wmo, check_cyc, to_list +from ..utils.checkers import check_index_cols, is_indexbox, check_wmo, check_cyc +from ..utils.casting import to_list from .argo_index_proto import ArgoIndexStoreProto diff --git a/argopy/stores/argo_index_proto.py b/argopy/stores/argo_index_proto.py index f7ed13d0..8257683f 100644 --- a/argopy/stores/argo_index_proto.py +++ b/argopy/stores/argo_index_proto.py @@ -13,7 +13,8 @@ from ..options import OPTIONS from ..errors import FtpPathError, InvalidDataset, OptionValueError -from ..utilities import Registry, isconnected +from ..utils.checkers import isconnected +from ..utils.accessories import Registry from .filesystems import httpstore, memorystore, filestore, ftpstore try: @@ -505,7 +506,7 @@ def get_filename(s, index): else: log.debug("Converting [%s] to dataframe from scratch ..." % src) # Post-processing for user: - from argopy.utilities import load_dict, mapp_dict + from ..related import load_dict, mapp_dict if nrows is not None: df = df.loc[0: nrows - 1].copy() diff --git a/argopy/stores/filesystems.py b/argopy/stores/filesystems.py index b37aedd9..a74d321b 100644 --- a/argopy/stores/filesystems.py +++ b/argopy/stores/filesystems.py @@ -34,7 +34,7 @@ from typing import Union from urllib.parse import urlparse, parse_qs from functools import lru_cache - +from abc import ABC, abstractmethod import concurrent.futures import multiprocessing @@ -47,14 +47,12 @@ ErddapHTTPUnauthorized, ErddapHTTPNotFound, ) -from abc import ABC, abstractmethod -from ..utilities import ( - Registry, - # log_argopy_callerstack, +from ..utils.transform import ( drop_variables_not_in_all_datasets, fill_variables_not_in_all_datasets, ) -from ..utils.compute import MyThreadPoolExecutor as MyExecutor +from ..utils.monitored_threadpool import MyThreadPoolExecutor as MyExecutor +from ..utils.accessories import Registry log = logging.getLogger("argopy.stores") @@ -916,7 +914,7 @@ def open_mfdataset( - :class:`distributed.client.Client`: Experimental, expect this method to fail ! - ``seq``: open data sequentially, no parallelization applied - ``erddap``: use a pool of at most ``max_workers`` threads, comes with a nice dashboard dedicated - to erddap server requests. + to erddap server requests. progress: bool, default: False Display a progress bar concat: bool, default: True diff --git a/argopy/tests/helpers/utils.py b/argopy/tests/helpers/utils.py index 648b3fdf..e688c589 100644 --- a/argopy/tests/helpers/utils.py +++ b/argopy/tests/helpers/utils.py @@ -19,16 +19,19 @@ import asyncio from packaging import version import warnings +import logging + from argopy.options import set_options from argopy.errors import ErddapServerError, ArgovisServerError, DataNotFound, FtpPathError -from argopy.utilities import ( +from argopy.utils.lists import ( list_available_data_src, list_available_index_src, +) +from argopy.utils.checkers import ( isconnected, erddap_ds_exists, isAPIconnected, ) -import logging from mocked_http import mocked_server_address, serve_mocked_httpserver diff --git a/argopy/tests/test_fetchers_data_argovis.py b/argopy/tests/test_fetchers_data_argovis.py index 6a083837..d6f6f119 100644 --- a/argopy/tests/test_fetchers_data_argovis.py +++ b/argopy/tests/test_fetchers_data_argovis.py @@ -13,7 +13,7 @@ CacheFileNotFound, FileSystemHasNoCache, ) -from argopy.utilities import is_list_of_strings +from argopy.utils.checkers import is_list_of_strings from utils import requires_connected_argovis, safe_to_server_errors diff --git a/argopy/tests/test_fetchers_data_erddap.py b/argopy/tests/test_fetchers_data_erddap.py index a0f299e6..d57375ca 100644 --- a/argopy/tests/test_fetchers_data_erddap.py +++ b/argopy/tests/test_fetchers_data_erddap.py @@ -1,7 +1,7 @@ import logging from argopy import DataFetcher as ArgoDataFetcher -from argopy.utilities import is_list_of_strings +from argopy.utils.checkers import is_list_of_strings import pytest import xarray as xr diff --git a/argopy/tests/test_fetchers_data_erddap_bgc.py b/argopy/tests/test_fetchers_data_erddap_bgc.py index c01f230e..ad0769b6 100644 --- a/argopy/tests/test_fetchers_data_erddap_bgc.py +++ b/argopy/tests/test_fetchers_data_erddap_bgc.py @@ -2,7 +2,7 @@ import numpy as np from argopy import DataFetcher as ArgoDataFetcher -from argopy.utilities import is_list_of_strings +from argopy.utils.checkers import is_list_of_strings from argopy.stores import indexstore_pd as ArgoIndex # make sure to work with the Pandas index store with erddap-bgc import pytest diff --git a/argopy/tests/test_fetchers_data_gdac.py b/argopy/tests/test_fetchers_data_gdac.py index 4fd6c184..9bae3ee1 100644 --- a/argopy/tests/test_fetchers_data_gdac.py +++ b/argopy/tests/test_fetchers_data_gdac.py @@ -11,6 +11,7 @@ import shutil from urllib.parse import urlparse import logging +from collections import ChainMap import argopy from argopy import DataFetcher as ArgoDataFetcher @@ -19,10 +20,9 @@ FileSystemHasNoCache, FtpPathError, ) -from argopy.utilities import is_list_of_strings, isconnected +from argopy.utils.checkers import isconnected, is_list_of_strings from utils import requires_gdac from mocked_http import mocked_httpserver, mocked_server_address -from collections import ChainMap log = logging.getLogger("argopy.tests.data.gdac") diff --git a/argopy/tests/test_fetchers_facade_data.py b/argopy/tests/test_fetchers_facade_data.py index 944a1611..17e0646c 100644 --- a/argopy/tests/test_fetchers_facade_data.py +++ b/argopy/tests/test_fetchers_facade_data.py @@ -10,7 +10,7 @@ InvalidFetcher, OptionValueError, ) -from argopy.utilities import is_list_of_strings +from argopy.utils import is_list_of_strings from utils import ( requires_fetcher, requires_connection, diff --git a/argopy/tests/test_fetchers_index_gdac.py b/argopy/tests/test_fetchers_index_gdac.py index efd73afc..02e71e92 100644 --- a/argopy/tests/test_fetchers_index_gdac.py +++ b/argopy/tests/test_fetchers_index_gdac.py @@ -13,7 +13,7 @@ FileSystemHasNoCache, FtpPathError ) -from argopy.utilities import is_list_of_strings, isconnected +from argopy.utils.checkers import isconnected, is_list_of_strings from utils import requires_gdac from mocked_http import mocked_httpserver, mocked_server_address diff --git a/argopy/tests/test_fetchers_proto.py b/argopy/tests/test_fetchers_proto.py index 0912cad7..24f669ff 100644 --- a/argopy/tests/test_fetchers_proto.py +++ b/argopy/tests/test_fetchers_proto.py @@ -1,7 +1,7 @@ import pytest import xarray from argopy.data_fetchers.proto import ArgoDataFetcherProto -from argopy.utilities import to_list +from argopy.utils import to_list class Fetcher(ArgoDataFetcherProto): diff --git a/argopy/tests/test_plot_argo_colors.py b/argopy/tests/test_plot_argo_colors.py index 99f8bad1..5613ff2b 100644 --- a/argopy/tests/test_plot_argo_colors.py +++ b/argopy/tests/test_plot_argo_colors.py @@ -4,7 +4,6 @@ """ import pytest import logging -import warnings from utils import ( requires_matplotlib, @@ -12,7 +11,7 @@ has_matplotlib, has_seaborn, ) -from ..plot import ArgoColors +from argopy.plot import ArgoColors if has_matplotlib: import matplotlib as mpl diff --git a/argopy/tests/test_plot_plot.py b/argopy/tests/test_plot_plot.py index a3db3903..f05bdfe1 100644 --- a/argopy/tests/test_plot_plot.py +++ b/argopy/tests/test_plot_plot.py @@ -19,7 +19,7 @@ has_ipywidgets, ) -from ..plot import bar_plot, plot_trajectory, open_sat_altim_report, scatter_map +from argopy.plot import bar_plot, plot_trajectory, open_sat_altim_report, scatter_map from argopy.errors import InvalidDatasetStructure from argopy import DataFetcher as ArgoDataFetcher from mocked_http import mocked_server_address diff --git a/argopy/tests/test_plotters.py b/argopy/tests/test_plotters.py deleted file mode 100644 index 8d662357..00000000 --- a/argopy/tests/test_plotters.py +++ /dev/null @@ -1,35 +0,0 @@ -""" -This file covers the plotters module -We test plotting functions from IndexFetcher and DataFetcher -""" -import pytest -import logging -from typing import Callable - -import argopy -from argopy.errors import InvalidDashboard -from utils import ( - requires_gdac, - requires_connection, - requires_matplotlib, - requires_ipython, - requires_cartopy, - has_matplotlib, - has_seaborn, - has_cartopy, - has_ipython, - has_ipywidgets, -) -from ..plot import bar_plot, plot_trajectory, open_sat_altim_report, scatter_map -from argopy import DataFetcher as ArgoDataFetcher - -if has_matplotlib: - import matplotlib as mpl - -if has_cartopy: - import cartopy - -if has_ipython: - import IPython - -log = logging.getLogger("argopy.tests.plot") diff --git a/argopy/tests/test_related.py b/argopy/tests/test_related.py new file mode 100644 index 00000000..031d2710 --- /dev/null +++ b/argopy/tests/test_related.py @@ -0,0 +1,327 @@ +import pytest +import tempfile +import xarray as xr +import pandas as pd +from collections import ChainMap, OrderedDict +import shutil + +from mocked_http import mocked_httpserver, mocked_server_address +from utils import ( + requires_matplotlib, + requires_cartopy, + requires_oops, + has_matplotlib, + has_cartopy, + has_ipython, +) +import argopy +from argopy.related import ( + TopoFetcher, + ArgoNVSReferenceTables, + OceanOPSDeployments, + ArgoDocs, + load_dict, mapp_dict, + get_coriolis_profile_id, get_ea_profile_page +) +from argopy.utils.checkers import ( + is_list_of_strings, +) + +if has_matplotlib: + import matplotlib as mpl + +if has_cartopy: + import cartopy + +if has_ipython: + import IPython + + +class Test_TopoFetcher(): + box = [81, 123, -67, -54] + + def setup_class(self): + """setup any state specific to the execution of the given class""" + # Create the cache folder here, so that it's not the same for the pandas and pyarrow tests + self.cachedir = tempfile.mkdtemp() + + def teardown_class(self): + """Cleanup once we are finished.""" + def remove_test_dir(): + shutil.rmtree(self.cachedir) + remove_test_dir() + + def make_a_fetcher(self, cached=False): + opts = {'ds': 'gebco', 'stride': [10, 10], 'server': mocked_server_address} + if cached: + opts = ChainMap(opts, {'cache': True, 'cachedir': self.cachedir}) + return TopoFetcher(self.box, **opts) + + def assert_fetcher(self, f): + ds = f.to_xarray() + assert isinstance(ds, xr.Dataset) + assert 'elevation' in ds.data_vars + + def test_load_mocked_server(self, mocked_httpserver): + """This will easily ensure that the module scope fixture is available to all methods !""" + assert True + + params = [True, False] + ids_params = ["cached=%s" % p for p in params] + @pytest.mark.parametrize("params", params, indirect=False, ids=ids_params) + def test_fetching(self, params): + fetcher = self.make_a_fetcher(cached=params) + self.assert_fetcher(fetcher) + + +class Test_ArgoNVSReferenceTables: + + def setup_class(self): + """setup any state specific to the execution of the given class""" + # Create the cache folder here, so that it's not the same for the pandas and pyarrow tests + self.cachedir = tempfile.mkdtemp() + self.nvs = ArgoNVSReferenceTables(cache=True, cachedir=self.cachedir, nvs=mocked_server_address) + + def teardown_class(self): + """Cleanup once we are finished.""" + def remove_test_dir(): + shutil.rmtree(self.cachedir) + remove_test_dir() + + def test_load_mocked_server(self, mocked_httpserver): + """This will easily ensure that the module scope fixture is available to all methods !""" + assert True + + def test_valid_ref(self): + assert is_list_of_strings(self.nvs.valid_ref) + + opts = [3, 'R09'] + opts_ids = ["rtid is a %s" % type(o) for o in opts] + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_tbl(self, opts): + assert isinstance(self.nvs.tbl(opts), pd.DataFrame) + + opts = [3, 'R09'] + opts_ids = ["rtid is a %s" % type(o) for o in opts] + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_tbl_name(self, opts): + names = self.nvs.tbl_name(opts) + assert isinstance(names, tuple) + assert isinstance(names[0], str) + assert isinstance(names[1], str) + assert isinstance(names[2], str) + + def test_all_tbl(self): + all = self.nvs.all_tbl + assert isinstance(all, OrderedDict) + assert isinstance(all[list(all.keys())[0]], pd.DataFrame) + + def test_all_tbl_name(self): + all = self.nvs.all_tbl_name + assert isinstance(all, OrderedDict) + assert isinstance(all[list(all.keys())[0]], tuple) + + opts = ["ld+json", "rdf+xml", "text/turtle", "invalid"] + opts_ids = ["fmt=%s" % o for o in opts] + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_get_url(self, opts): + if opts != 'invalid': + url = self.nvs.get_url(3, fmt=opts) + assert isinstance(url, str) + if "json" in opts: + data = self.nvs.fs.open_json(url) + assert isinstance(data, dict) + elif "xml" in opts: + data = self.nvs.fs.fs.cat_file(url) + assert data[0:5] == b'= float_wmo(2901746) - assert float_wmo(2901746) > float_wmo(2901745) - assert float_wmo(2901746) <= float_wmo(2901746) - assert float_wmo(2901746) < float_wmo(2901747) - - def test_hashable(self): - assert isinstance(hash(float_wmo(2901746)), int) - - -class Test_Registry(): - - opts = [(None, 'str'), (['hello', 'world'], str), (None, float_wmo), ([2901746, 4902252], float_wmo)] - opts_ids = ["%s, %s" % ((lambda x: 'iterlist' if x is not None else x)(opt[0]), repr(opt[1])) for opt in opts] - - @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) - def test_init(self, opts): - assert isinstance(Registry(opts[0], dtype=opts[1]), Registry) - - opts = [(['hello', 'world'], str), ([2901746, 4902252], float_wmo)] - opts_ids = ["%s, %s" % ((lambda x: 'iterlist' if x is not None else x)(opt[0]), repr(opt[1])) for opt in opts] - - @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) - def test_commit(self, opts): - R = Registry(dtype=opts[1]) - R.commit(opts[0]) - - @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) - def test_append(self, opts): - R = Registry(dtype=opts[1]) - R.append(opts[0][0]) - - @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) - def test_extend(self, opts): - R = Registry(dtype=opts[1]) - R.append(opts[0]) - - @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) - def test_insert(self, opts): - R = Registry(opts[0][0], dtype=opts[1]) - R.insert(0, opts[0][-1]) - assert R[0] == opts[0][-1] - - @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) - def test_remove(self, opts): - R = Registry(opts[0], dtype=opts[1]) - R.remove(opts[0][0]) - assert opts[0][0] not in R - - @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) - def test_copy(self, opts): - R = Registry(opts[0], dtype=opts[1]) - assert R == R.copy() - - bad_opts = [(['hello', 12], str), ([2901746, 1], float_wmo)] - bad_opts_ids = ["%s, %s" % ((lambda x: 'iterlist' if x is not None else x)(opt[0]), repr(opt[1])) for opt in opts] - - @pytest.mark.parametrize("opts", bad_opts, indirect=False, ids=bad_opts_ids) - def test_invalid_dtype(self, opts): - with pytest.raises(ValueError): - Registry(opts[0][0], dtype=opts[1], invalid='raise').commit(opts[0][-1]) - with pytest.warns(UserWarning): - Registry(opts[0][0], dtype=opts[1], invalid='warn').commit(opts[0][-1]) - # Raise nothing: - Registry(opts[0][0], dtype=opts[1], invalid='ignore').commit(opts[0][-1]) - - -@pytest.mark.parametrize("params", [[6901929, None], [6901929, 12]], indirect=False, ids=['float', 'profile']) -def test_get_coriolis_profile_id(params, mocked_httpserver): - with argopy.set_options(cachedir=tempfile.mkdtemp()): - assert isinstance(get_coriolis_profile_id(params[0], params[1], api_server=mocked_server_address), pd.core.frame.DataFrame) - -@pytest.mark.parametrize("params", [[6901929, None], [6901929, 12]], indirect=False, ids=['float', 'profile']) -def test_get_ea_profile_page(params, mocked_httpserver): - with argopy.set_options(cachedir=tempfile.mkdtemp()): - assert is_list_of_strings(get_ea_profile_page(params[0], params[1], api_server=mocked_server_address)) - - -class Test_ArgoNVSReferenceTables: - - def setup_class(self): - """setup any state specific to the execution of the given class""" - # Create the cache folder here, so that it's not the same for the pandas and pyarrow tests - self.cachedir = tempfile.mkdtemp() - self.nvs = ArgoNVSReferenceTables(cache=True, cachedir=self.cachedir, nvs=mocked_server_address) - - def teardown_class(self): - """Cleanup once we are finished.""" - def remove_test_dir(): - shutil.rmtree(self.cachedir) - remove_test_dir() - - def test_load_mocked_server(self, mocked_httpserver): - """This will easily ensure that the module scope fixture is available to all methods !""" - assert True - - def test_valid_ref(self): - assert is_list_of_strings(self.nvs.valid_ref) - - opts = [3, 'R09'] - opts_ids = ["rtid is a %s" % type(o) for o in opts] - @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) - def test_tbl(self, opts): - assert isinstance(self.nvs.tbl(opts), pd.DataFrame) - - opts = [3, 'R09'] - opts_ids = ["rtid is a %s" % type(o) for o in opts] - @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) - def test_tbl_name(self, opts): - names = self.nvs.tbl_name(opts) - assert isinstance(names, tuple) - assert isinstance(names[0], str) - assert isinstance(names[1], str) - assert isinstance(names[2], str) - - def test_all_tbl(self): - all = self.nvs.all_tbl - assert isinstance(all, OrderedDict) - assert isinstance(all[list(all.keys())[0]], pd.DataFrame) - - def test_all_tbl_name(self): - all = self.nvs.all_tbl_name - assert isinstance(all, OrderedDict) - assert isinstance(all[list(all.keys())[0]], tuple) - - opts = ["ld+json", "rdf+xml", "text/turtle", "invalid"] - opts_ids = ["fmt=%s" % o for o in opts] - @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) - def test_get_url(self, opts): - if opts != 'invalid': - url = self.nvs.get_url(3, fmt=opts) - assert isinstance(url, str) - if "json" in opts: - data = self.nvs.fs.open_json(url) - assert isinstance(data, dict) - elif "xml" in opts: - data = self.nvs.fs.fs.cat_file(url) - assert data[0:5] == b'= float_wmo(2901746) + assert float_wmo(2901746) > float_wmo(2901745) + assert float_wmo(2901746) <= float_wmo(2901746) + assert float_wmo(2901746) < float_wmo(2901747) + + def test_hashable(self): + assert isinstance(hash(float_wmo(2901746)), int) + + +class Test_Registry(): + + opts = [(None, 'str'), (['hello', 'world'], str), (None, float_wmo), ([2901746, 4902252], float_wmo)] + opts_ids = ["%s, %s" % ((lambda x: 'iterlist' if x is not None else x)(opt[0]), repr(opt[1])) for opt in opts] + + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_init(self, opts): + assert isinstance(Registry(opts[0], dtype=opts[1]), Registry) + + opts = [(['hello', 'world'], str), ([2901746, 4902252], float_wmo)] + opts_ids = ["%s, %s" % ((lambda x: 'iterlist' if x is not None else x)(opt[0]), repr(opt[1])) for opt in opts] + + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_commit(self, opts): + R = Registry(dtype=opts[1]) + R.commit(opts[0]) + + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_append(self, opts): + R = Registry(dtype=opts[1]) + R.append(opts[0][0]) + + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_extend(self, opts): + R = Registry(dtype=opts[1]) + R.append(opts[0]) + + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_insert(self, opts): + R = Registry(opts[0][0], dtype=opts[1]) + R.insert(0, opts[0][-1]) + assert R[0] == opts[0][-1] + + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_remove(self, opts): + R = Registry(opts[0], dtype=opts[1]) + R.remove(opts[0][0]) + assert opts[0][0] not in R + + @pytest.mark.parametrize("opts", opts, indirect=False, ids=opts_ids) + def test_copy(self, opts): + R = Registry(opts[0], dtype=opts[1]) + assert R == R.copy() + + bad_opts = [(['hello', 12], str), ([2901746, 1], float_wmo)] + bad_opts_ids = ["%s, %s" % ((lambda x: 'iterlist' if x is not None else x)(opt[0]), repr(opt[1])) for opt in opts] + + @pytest.mark.parametrize("opts", bad_opts, indirect=False, ids=bad_opts_ids) + def test_invalid_dtype(self, opts): + with pytest.raises(ValueError): + Registry(opts[0][0], dtype=opts[1], invalid='raise').commit(opts[0][-1]) + with pytest.warns(UserWarning): + Registry(opts[0][0], dtype=opts[1], invalid='warn').commit(opts[0][-1]) + # Raise nothing: + Registry(opts[0][0], dtype=opts[1], invalid='ignore').commit(opts[0][-1]) + diff --git a/argopy/tests/test_utils_caching.py b/argopy/tests/test_utils_caching.py new file mode 100644 index 00000000..e0841fdb --- /dev/null +++ b/argopy/tests/test_utils_caching.py @@ -0,0 +1,36 @@ +import os +import pandas as pd +import argopy +import tempfile +from argopy import DataFetcher as ArgoDataFetcher +from utils import ( + requires_gdac, +) +from argopy.utils.caching import lscache, clear_cache + + +@requires_gdac +def test_clear_cache(): + ftproot, flist = argopy.tutorial.open_dataset("gdac") + with tempfile.TemporaryDirectory() as cachedir: + with argopy.set_options(cachedir=cachedir): + loader = ArgoDataFetcher(src="gdac", ftp=ftproot, cache=True).profile(2902696, 12) + loader.to_xarray() + clear_cache() + assert os.path.exists(cachedir) is True + assert len(os.listdir(cachedir)) == 0 + + +@requires_gdac +def test_lscache(): + ftproot, flist = argopy.tutorial.open_dataset("gdac") + with tempfile.TemporaryDirectory() as cachedir: + with argopy.set_options(cachedir=cachedir): + loader = ArgoDataFetcher(src="gdac", ftp=ftproot, cache=True).profile(2902696, 12) + loader.to_xarray() + result = lscache(cache_path=cachedir, prt=True) + assert isinstance(result, str) + + result = lscache(cache_path=cachedir, prt=False) + assert isinstance(result, pd.DataFrame) + diff --git a/argopy/tests/test_utils_checkers.py b/argopy/tests/test_utils_checkers.py new file mode 100644 index 00000000..b8c2d53d --- /dev/null +++ b/argopy/tests/test_utils_checkers.py @@ -0,0 +1,226 @@ +import pytest +import numpy as np +from mocked_http import mocked_httpserver, mocked_server_address +from utils import ( + requires_erddap, +) +import argopy +from argopy.errors import FtpPathError +from argopy.utils.checkers import ( + is_box, is_indexbox, + check_wmo, is_wmo, + check_cyc, is_cyc, + check_gdac_path, + isconnected, urlhaskeyword, isAPIconnected, erddap_ds_exists, isalive +) + + +class Test_is_box: + @pytest.fixture(autouse=True) + def create_data(self): + self.BOX3d = [0, 20, 40, 60, 0, 1000] + self.BOX4d = [0, 20, 40, 60, 0, 1000, "2001-01", "2001-6"] + + def test_box_ok(self): + assert is_box(self.BOX3d) + assert is_box(self.BOX4d) + + def test_box_notok(self): + for box in [[], list(range(0, 12))]: + with pytest.raises(ValueError): + is_box(box) + with pytest.raises(ValueError): + is_box(box, errors="raise") + assert not is_box(box, errors="ignore") + + def test_box_invalid_num(self): + for i in [0, 1, 2, 3, 4, 5]: + box = self.BOX3d + box[i] = "str" + with pytest.raises(ValueError): + is_box(box) + with pytest.raises(ValueError): + is_box(box, errors="raise") + assert not is_box(box, errors="ignore") + + def test_box_invalid_range(self): + for i in [0, 1, 2, 3, 4, 5]: + box = self.BOX3d + box[i] = -1000 + with pytest.raises(ValueError): + is_box(box) + with pytest.raises(ValueError): + is_box(box, errors="raise") + assert not is_box(box, errors="ignore") + + def test_box_invalid_str(self): + for i in [6, 7]: + box = self.BOX4d + box[i] = "str" + with pytest.raises(ValueError): + is_box(box) + with pytest.raises(ValueError): + is_box(box, errors="raise") + assert not is_box(box, errors="ignore") + + +class Test_is_indexbox: + @pytest.fixture(autouse=True) + def create_data(self): + self.BOX2d = [0, 20, 40, 60] + self.BOX3d = [0, 20, 40, 60, "2001-01", "2001-6"] + + def test_box_ok(self): + assert is_indexbox(self.BOX2d) + assert is_indexbox(self.BOX3d) + + def test_box_notok(self): + for box in [[], list(range(0, 12))]: + with pytest.raises(ValueError): + is_indexbox(box) + with pytest.raises(ValueError): + is_indexbox(box, errors="raise") + assert not is_indexbox(box, errors="ignore") + + def test_box_invalid_num(self): + for i in [0, 1, 2, 3]: + box = self.BOX2d + box[i] = "str" + with pytest.raises(ValueError): + is_indexbox(box) + with pytest.raises(ValueError): + is_indexbox(box, errors="raise") + assert not is_indexbox(box, errors="ignore") + + def test_box_invalid_range(self): + for i in [0, 1, 2, 3]: + box = self.BOX2d + box[i] = -1000 + with pytest.raises(ValueError): + is_indexbox(box) + with pytest.raises(ValueError): + is_indexbox(box, errors="raise") + assert not is_indexbox(box, errors="ignore") + + def test_box_invalid_str(self): + for i in [4, 5]: + box = self.BOX3d + box[i] = "str" + with pytest.raises(ValueError): + is_indexbox(box) + with pytest.raises(ValueError): + is_indexbox(box, errors="raise") + assert not is_indexbox(box, errors="ignore") + + +def test_is_wmo(): + assert is_wmo(12345) + assert is_wmo([12345]) + assert is_wmo([12345, 1234567]) + + with pytest.raises(ValueError): + is_wmo(1234, errors="raise") + with pytest.raises(ValueError): + is_wmo(-1234, errors="raise") + with pytest.raises(ValueError): + is_wmo(1234.12, errors="raise") + with pytest.raises(ValueError): + is_wmo(12345.7, errors="raise") + + with pytest.warns(UserWarning): + is_wmo(1234, errors="warn") + with pytest.warns(UserWarning): + is_wmo(-1234, errors="warn") + with pytest.warns(UserWarning): + is_wmo(1234.12, errors="warn") + with pytest.warns(UserWarning): + is_wmo(12345.7, errors="warn") + + assert not is_wmo(12, errors="ignore") + assert not is_wmo(-12, errors="ignore") + assert not is_wmo(1234.12, errors="ignore") + assert not is_wmo(12345.7, errors="ignore") + + +def test_check_wmo(): + assert check_wmo(12345) == [12345] + assert check_wmo([1234567]) == [1234567] + assert check_wmo([12345, 1234567]) == [12345, 1234567] + assert check_wmo(np.array((12345, 1234567), dtype='int')) == [12345, 1234567] + + +def test_is_cyc(): + assert is_cyc(123) + assert is_cyc([123]) + assert is_cyc([12, 123, 1234]) + + with pytest.raises(ValueError): + is_cyc(12345, errors="raise") + with pytest.raises(ValueError): + is_cyc(-1234, errors="raise") + with pytest.raises(ValueError): + is_cyc(1234.12, errors="raise") + with pytest.raises(ValueError): + is_cyc(12345.7, errors="raise") + + with pytest.warns(UserWarning): + is_cyc(12345, errors="warn") + with pytest.warns(UserWarning): + is_cyc(-1234, errors="warn") + with pytest.warns(UserWarning): + is_cyc(1234.12, errors="warn") + with pytest.warns(UserWarning): + is_cyc(12345.7, errors="warn") + + assert not is_cyc(12345, errors="ignore") + assert not is_cyc(-12, errors="ignore") + assert not is_cyc(1234.12, errors="ignore") + assert not is_cyc(12345.7, errors="ignore") + + +def test_check_cyc(): + assert check_cyc(123) == [123] + assert check_cyc([12]) == [12] + assert check_cyc([12, 123]) == [12, 123] + assert check_cyc(np.array((123, 1234), dtype='int')) == [123, 1234] + + +def test_check_gdac_path(): + assert check_gdac_path("dummy_path", errors='ignore') is False + with pytest.raises(FtpPathError): + check_gdac_path("dummy_path", errors='raise') + with pytest.warns(UserWarning): + assert check_gdac_path("dummy_path", errors='warn') is False + + +def test_isconnected(mocked_httpserver): + assert isinstance(isconnected(host=mocked_server_address), bool) + assert isconnected(host="http://dummyhost") is False + + +def test_urlhaskeyword(mocked_httpserver): + url = "https://api.ifremer.fr/argopy/data/ARGO-FULL.json" + url.replace("https://api.ifremer.fr", mocked_server_address) + assert isinstance(urlhaskeyword(url, "label"), bool) + + +params = [mocked_server_address, + {"url": mocked_server_address + "/argopy/data/ARGO-FULL.json", "keyword": "label"} + ] +params_ids = ["url is a %s" % str(type(p)) for p in params] +@pytest.mark.parametrize("params", params, indirect=False, ids=params_ids) +def test_isalive(params, mocked_httpserver): + assert isinstance(isalive(params), bool) + + +@requires_erddap +@pytest.mark.parametrize("data", [True, False], indirect=False, ids=["data=%s" % t for t in [True, False]]) +def test_isAPIconnected(data, mocked_httpserver): + with argopy.set_options(erddap=mocked_server_address): + assert isinstance(isAPIconnected(src="erddap", data=data), bool) + + +def test_erddap_ds_exists(mocked_httpserver): + with argopy.set_options(erddap=mocked_server_address): + assert isinstance(erddap_ds_exists(ds="ArgoFloats"), bool) + assert erddap_ds_exists(ds="DummyDS") is False diff --git a/argopy/tests/test_utils_chunking.py b/argopy/tests/test_utils_chunking.py new file mode 100644 index 00000000..3aee8c86 --- /dev/null +++ b/argopy/tests/test_utils_chunking.py @@ -0,0 +1,196 @@ +import pytest +import types +import numpy as np +import pandas as pd + +from argopy.errors import InvalidFetcherAccessPoint +from argopy.utils.chunking import Chunker +from argopy.utils.checkers import is_box + + +class Test_Chunker: + @pytest.fixture(autouse=True) + def create_data(self): + self.WMO = [ + 6902766, + 6902772, + 6902914, + 6902746, + 6902916, + 6902915, + 6902757, + 6902771, + ] + self.BOX3d = [0, 20, 40, 60, 0, 1000] + self.BOX4d = [0, 20, 40, 60, 0, 1000, "2001-01", "2001-6"] + + def test_InvalidFetcherAccessPoint(self): + with pytest.raises(InvalidFetcherAccessPoint): + Chunker({"invalid": self.WMO}) + + def test_invalid_chunks(self): + with pytest.raises(ValueError): + Chunker({"box": self.BOX3d}, chunks='toto') + + def test_invalid_chunksize(self): + with pytest.raises(ValueError): + Chunker({"box": self.BOX3d}, chunksize='toto') + + def test_chunk_wmo(self): + C = Chunker({"wmo": self.WMO}) + assert all( + [all(isinstance(x, int) for x in chunk) for chunk in C.fit_transform()] + ) + + C = Chunker({"wmo": self.WMO}, chunks="auto") + assert all( + [all(isinstance(x, int) for x in chunk) for chunk in C.fit_transform()] + ) + + C = Chunker({"wmo": self.WMO}, chunks={"wmo": 1}) + assert all( + [all(isinstance(x, int) for x in chunk) for chunk in C.fit_transform()] + ) + assert len(C.fit_transform()) == 1 + + with pytest.raises(ValueError): + Chunker({"wmo": self.WMO}, chunks=["wmo", 1]) + + C = Chunker({"wmo": self.WMO}) + assert isinstance(C.this_chunker, types.FunctionType) or isinstance( + C.this_chunker, types.MethodType + ) + + def test_chunk_box3d(self): + C = Chunker({"box": self.BOX3d}) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + + C = Chunker({"box": self.BOX3d}, chunks="auto") + assert all([is_box(chunk) for chunk in C.fit_transform()]) + + C = Chunker({"box": self.BOX3d}, chunks={"lon": 12, "lat": 1, "dpt": 1}) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + assert len(C.fit_transform()) == 12 + + C = Chunker( + {"box": self.BOX3d}, chunks={"lat": 1, "dpt": 1}, chunksize={"lon": 10} + ) + chunks = C.fit_transform() + assert all([is_box(chunk) for chunk in chunks]) + assert chunks[0][1] - chunks[0][0] == 10 + + C = Chunker({"box": self.BOX3d}, chunks={"lon": 1, "lat": 12, "dpt": 1}) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + assert len(C.fit_transform()) == 12 + + C = Chunker( + {"box": self.BOX3d}, chunks={"lon": 1, "dpt": 1}, chunksize={"lat": 10} + ) + chunks = C.fit_transform() + assert all([is_box(chunk) for chunk in chunks]) + assert chunks[0][3] - chunks[0][2] == 10 + + C = Chunker({"box": self.BOX3d}, chunks={"lon": 1, "lat": 1, "dpt": 12}) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + assert len(C.fit_transform()) == 12 + + C = Chunker( + {"box": self.BOX3d}, chunks={"lon": 1, "lat": 1}, chunksize={"dpt": 10} + ) + chunks = C.fit_transform() + assert all([is_box(chunk) for chunk in chunks]) + assert chunks[0][5] - chunks[0][4] == 10 + + C = Chunker({"box": self.BOX3d}, chunks={"lon": 4, "lat": 2, "dpt": 1}) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + assert len(C.fit_transform()) == 2 * 4 + + C = Chunker({"box": self.BOX3d}, chunks={"lon": 2, "lat": 3, "dpt": 4}) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + assert len(C.fit_transform()) == 2 * 3 * 4 + + with pytest.raises(ValueError): + Chunker({"box": self.BOX3d}, chunks=["lon", 1]) + + C = Chunker({"box": self.BOX3d}) + assert isinstance(C.this_chunker, types.FunctionType) or isinstance( + C.this_chunker, types.MethodType + ) + + def test_chunk_box4d(self): + C = Chunker({"box": self.BOX4d}) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + + C = Chunker({"box": self.BOX4d}, chunks="auto") + assert all([is_box(chunk) for chunk in C.fit_transform()]) + + C = Chunker( + {"box": self.BOX4d}, chunks={"lon": 2, "lat": 1, "dpt": 1, "time": 1} + ) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + assert len(C.fit_transform()) == 2 + + C = Chunker( + {"box": self.BOX4d}, + chunks={"lat": 1, "dpt": 1, "time": 1}, + chunksize={"lon": 10}, + ) + chunks = C.fit_transform() + assert all([is_box(chunk) for chunk in chunks]) + assert chunks[0][1] - chunks[0][0] == 10 + + C = Chunker( + {"box": self.BOX4d}, chunks={"lon": 1, "lat": 2, "dpt": 1, "time": 1} + ) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + assert len(C.fit_transform()) == 2 + + C = Chunker( + {"box": self.BOX4d}, + chunks={"lon": 1, "dpt": 1, "time": 1}, + chunksize={"lat": 10}, + ) + chunks = C.fit_transform() + assert all([is_box(chunk) for chunk in chunks]) + assert chunks[0][3] - chunks[0][2] == 10 + + C = Chunker( + {"box": self.BOX4d}, chunks={"lon": 1, "lat": 1, "dpt": 2, "time": 1} + ) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + assert len(C.fit_transform()) == 2 + + C = Chunker( + {"box": self.BOX4d}, + chunks={"lon": 1, "lat": 1, "time": 1}, + chunksize={"dpt": 10}, + ) + chunks = C.fit_transform() + assert all([is_box(chunk) for chunk in chunks]) + assert chunks[0][5] - chunks[0][4] == 10 + + C = Chunker( + {"box": self.BOX4d}, chunks={"lon": 1, "lat": 1, "dpt": 1, "time": 2} + ) + assert all([is_box(chunk) for chunk in C.fit_transform()]) + assert len(C.fit_transform()) == 2 + + C = Chunker( + {"box": self.BOX4d}, + chunks={"lon": 1, "lat": 1, "dpt": 1}, + chunksize={"time": 5}, + ) + chunks = C.fit_transform() + assert all([is_box(chunk) for chunk in chunks]) + assert np.timedelta64( + pd.to_datetime(chunks[0][7]) - pd.to_datetime(chunks[0][6]), "D" + ) <= np.timedelta64(5, "D") + + with pytest.raises(ValueError): + Chunker({"box": self.BOX4d}, chunks=["lon", 1]) + + C = Chunker({"box": self.BOX4d}) + assert isinstance(C.this_chunker, types.FunctionType) or isinstance( + C.this_chunker, types.MethodType + ) + diff --git a/argopy/tests/test_utils_compute.py b/argopy/tests/test_utils_compute.py new file mode 100644 index 00000000..2806fd14 --- /dev/null +++ b/argopy/tests/test_utils_compute.py @@ -0,0 +1,75 @@ +import pytest +import numpy as np +import xarray as xr + +from argopy.utils.compute import linear_interpolation_remap + + +class Test_linear_interpolation_remap: + @pytest.fixture(autouse=True) + def create_data(self): + # create fake data to test interpolation: + temp = np.random.rand(200, 100) + pres = np.sort( + np.floor( + np.zeros([200, 100]) + + np.linspace(50, 950, 100) + + np.random.randint(-5, 5, [200, 100]) + ) + ) + self.dsfake = xr.Dataset( + { + "TEMP": (["N_PROF", "N_LEVELS"], temp), + "PRES": (["N_PROF", "N_LEVELS"], pres), + }, + coords={ + "N_PROF": ("N_PROF", range(200)), + "N_LEVELS": ("N_LEVELS", range(100)), + "Z_LEVELS": ("Z_LEVELS", np.arange(100, 900, 20)), + }, + ) + + def test_interpolation(self): + # Run it with success: + dsi = linear_interpolation_remap( + self.dsfake["PRES"], + self.dsfake["TEMP"], + self.dsfake["Z_LEVELS"], + z_dim="N_LEVELS", + z_regridded_dim="Z_LEVELS", + ) + assert "remapped" in dsi.dims + + def test_interpolation_1d(self): + # Run it with success: + dsi = linear_interpolation_remap( + self.dsfake["PRES"].isel(N_PROF=0), + self.dsfake["TEMP"].isel(N_PROF=0), + self.dsfake["Z_LEVELS"], + z_regridded_dim="Z_LEVELS", + ) + assert "remapped" in dsi.dims + + def test_error_zdim(self): + # Test error: + # catches error from _regular_interp linked to z_dim + with pytest.raises(RuntimeError): + linear_interpolation_remap( + self.dsfake["PRES"], + self.dsfake["TEMP"], + self.dsfake["Z_LEVELS"], + z_regridded_dim="Z_LEVELS", + ) + + def test_error_ds(self): + # Test error: + # catches error from linear_interpolation_remap linked to datatype + with pytest.raises(ValueError): + linear_interpolation_remap( + self.dsfake["PRES"], + self.dsfake, + self.dsfake["Z_LEVELS"], + z_dim="N_LEVELS", + z_regridded_dim="Z_LEVELS", + ) + diff --git a/argopy/tests/test_utils_format.py b/argopy/tests/test_utils_format.py new file mode 100644 index 00000000..6d3c161c --- /dev/null +++ b/argopy/tests/test_utils_format.py @@ -0,0 +1,60 @@ +import os +import pytest +import argopy +from argopy.utils.format import format_oneline, argo_split_path + + +def test_format_oneline(): + s = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore" + assert isinstance(format_oneline(s), str) + assert isinstance(format_oneline(s[0:5]), str) + s = format_oneline(s, max_width=12) + assert isinstance(s, str) and len(s) == 12 + + +class Test_argo_split_path: + ############# + # UTILITIES # + ############# + # src = "https://data-argo.ifremer.fr/dac" + src = argopy.tutorial.open_dataset("gdac")[0] + "/dac" + list_of_files = [ + src + "/bodc/6901929/6901929_prof.nc", # core / multi-profile + src + "/coriolis/3902131/3902131_Sprof.nc", # bgc / synthetic multi-profile + + src + "/meds/4901079/profiles/D4901079_110.nc", # core / mono-profile / Delayed + src + "/aoml/13857/profiles/R13857_001.nc", # core / mono-profile / Real + + src + "/coriolis/3902131/profiles/SD3902131_001.nc", # bgc / synthetic mono-profile / Delayed + src + "/coriolis/3902131/profiles/SD3902131_001D.nc", # bgc / synthetic mono-profile / Delayed / Descent + src + "/coriolis/6903247/profiles/SR6903247_134.nc", # bgc / synthetic mono-profile / Real + src + "/coriolis/6903247/profiles/SR6903247_134D.nc", # bgc / synthetic mono-profile / Real / Descent + + src + "/coriolis/3902131/profiles/BR3902131_001.nc", # bgc / mono-profile / Real + src + "/coriolis/3902131/profiles/BR3902131_001D.nc", # bgc / mono-profile / Real / Descent + + src + "/aoml/5900446/5900446_Dtraj.nc", # traj / Delayed + src + "/csio/2902696/2902696_Rtraj.nc", # traj / Real + + src + "/coriolis/3902131/3902131_BRtraj.nc", # bgc / traj / Real + # src + "/coriolis/6903247/6903247_BRtraj.nc", # bgc / traj / Real + + src + "/incois/2902269/2902269_tech.nc", # technical + # src + "/nmdis/2901623/2901623_tech.nc", # technical + + src + "/jma/4902252/4902252_meta.nc", # meta-data + # src + "/coriolis/1900857/1900857_meta.nc", # meta-data + ] + list_of_files = [f.replace("/", os.path.sep) for f in list_of_files] + + ######### + # TESTS # + ######### + + @pytest.mark.parametrize("file", list_of_files, + indirect=False) + def test_argo_split_path(self, file): + desc = argo_split_path(file) + assert isinstance(desc, dict) + for key in ['origin', 'path', 'name', 'type', 'extension', 'wmo', 'dac']: + assert key in desc diff --git a/argopy/tests/test_utils_geo.py b/argopy/tests/test_utils_geo.py new file mode 100644 index 00000000..609242c9 --- /dev/null +++ b/argopy/tests/test_utils_geo.py @@ -0,0 +1,42 @@ +import pytest +import numpy as np +import pandas as pd +from argopy.utils.geo import wmo2box, wrap_longitude, toYearFraction, YearFraction_to_datetime +from argopy.utils.checkers import is_box + + +def test_wmo2box(): + with pytest.raises(ValueError): + wmo2box(12) + with pytest.raises(ValueError): + wmo2box(8000) + with pytest.raises(ValueError): + wmo2box(2000) + + def complete_box(b): + b2 = b.copy() + b2.insert(4, 0.) + b2.insert(5, 10000.) + return b2 + + assert is_box(complete_box(wmo2box(1212))) + assert is_box(complete_box(wmo2box(3324))) + assert is_box(complete_box(wmo2box(5402))) + assert is_box(complete_box(wmo2box(7501))) + + +def test_wrap_longitude(): + assert wrap_longitude(np.array([-20])) == 340 + assert wrap_longitude(np.array([40])) == 40 + assert np.all(np.equal(wrap_longitude(np.array([340, 20])), np.array([340, 380]))) + + +def test_toYearFraction(): + assert toYearFraction(pd.to_datetime('202001010000')) == 2020 + assert toYearFraction(pd.to_datetime('202001010000', utc=True)) == 2020 + assert toYearFraction(pd.to_datetime('202001010000')+pd.offsets.DateOffset(years=1)) == 2021 + + +def test_YearFraction_to_datetime(): + assert YearFraction_to_datetime(2020) == pd.to_datetime('202001010000') + assert YearFraction_to_datetime(2020+1) == pd.to_datetime('202101010000') diff --git a/argopy/tests/test_utils_lists.py b/argopy/tests/test_utils_lists.py new file mode 100644 index 00000000..06aaa893 --- /dev/null +++ b/argopy/tests/test_utils_lists.py @@ -0,0 +1,7 @@ +# import pytest +from argopy.utils.checkers import is_list_of_strings +from argopy.utils.lists import list_multiprofile_file_variables + + +def test_list_multiprofile_file_variables(): + assert is_list_of_strings(list_multiprofile_file_variables()) diff --git a/argopy/tests/test_utils_locals.py b/argopy/tests/test_utils_locals.py new file mode 100644 index 00000000..4ad32bfd --- /dev/null +++ b/argopy/tests/test_utils_locals.py @@ -0,0 +1,22 @@ +import os +import pytest +import io +import argopy +from argopy.utils.locals import modified_environ + + +@pytest.mark.parametrize("conda", [False, True], + indirect=False, + ids=["conda=%s" % str(p) for p in [False, True]]) +def test_show_versions(conda): + f = io.StringIO() + argopy.show_versions(file=f, conda=conda) + assert "SYSTEM" in f.getvalue() + + +def test_modified_environ(): + os.environ["DUMMY_ENV_ARGOPY"] = 'initial' + with modified_environ(DUMMY_ENV_ARGOPY='toto'): + assert os.environ['DUMMY_ENV_ARGOPY'] == 'toto' + assert os.environ['DUMMY_ENV_ARGOPY'] == 'initial' + os.environ.pop('DUMMY_ENV_ARGOPY') diff --git a/argopy/tests/test_xarray_engine.py b/argopy/tests/test_xarray_engine.py index 0bf655d8..b67701f0 100644 --- a/argopy/tests/test_xarray_engine.py +++ b/argopy/tests/test_xarray_engine.py @@ -4,7 +4,7 @@ import logging import warnings import argopy -from argopy.utilities import argo_split_path +from argopy.utils.format import argo_split_path log = logging.getLogger("argopy.tests.xarray.engine") diff --git a/argopy/tutorial.py b/argopy/tutorial.py index b619f3bb..37762764 100644 --- a/argopy/tutorial.py +++ b/argopy/tutorial.py @@ -20,6 +20,7 @@ from urllib.request import urlretrieve import shutil + _DEFAULT_CACHE_DIR = os.path.expanduser(os.path.sep.join(["~", ".argopy_tutorial_data"])) diff --git a/argopy/utilities.py b/argopy/utilities.py index f11dfac7..a2f232ff 100644 --- a/argopy/utilities.py +++ b/argopy/utilities.py @@ -1,4364 +1,304 @@ -#!/bin/env python -# -*coding: UTF-8 -*- -# -# Disclaimer: -# Functions get_sys_info, netcdf_and_hdf5_versions and show_versions are from: -# xarray/util/print_versions.py -# - -import os -import sys import warnings -import urllib -import json -import collections -from collections import UserList -import copy -from functools import reduce, wraps -from packaging import version -import logging -from abc import ABC, abstractmethod -from urllib.parse import urlparse -from typing import Union -import inspect -import pathlib import importlib -import locale -import platform -import struct -import subprocess # nosec B404 only used without user inputs -import contextlib -from fsspec.core import split_protocol -import fsspec -from functools import lru_cache - -import xarray as xr -import pandas as pd -import numpy as np -from scipy import interpolate - -import pickle # nosec B403 only used with internal files/assets -import shutil - -import threading -from socket import gaierror - -import time -import setuptools # noqa: F401 - -from .options import OPTIONS -from .errors import ( - FtpPathError, - InvalidFetcher, - InvalidFetcherAccessPoint, - InvalidOption, - InvalidDatasetStructure, - FileSystemHasNoCache, - DataNotFound, -) - -try: - collectionsAbc = collections.abc -except AttributeError: - collectionsAbc = collections - -try: - importlib.import_module('matplotlib') # noqa: E402 - from matplotlib.colors import to_hex -except ImportError: - pass - -path2assets = importlib.util.find_spec('argopy.static.assets').submodule_search_locations[0] - -log = logging.getLogger("argopy.utilities") - -with open(os.path.join(path2assets, "data_types.json"), "r") as f: - DATA_TYPES = json.load(f) - - - -def clear_cache(fs=None): - """ Delete argopy cache folder content """ - if os.path.exists(OPTIONS["cachedir"]): - # shutil.rmtree(OPTIONS["cachedir"]) - for filename in os.listdir(OPTIONS["cachedir"]): - file_path = os.path.join(OPTIONS["cachedir"], filename) - try: - if os.path.isfile(file_path) or os.path.islink(file_path): - os.unlink(file_path) - elif os.path.isdir(file_path): - shutil.rmtree(file_path) - except Exception as e: - print("Failed to delete %s. Reason: %s" % (file_path, e)) - if fs: - fs.clear_cache() - - -def lscache(cache_path: str = "", prt=True): - """ Decode and list cache folder content - - Parameters - ---------- - cache_path: str - prt: bool, default=True - Return a printable string or a :class:`pandas.DataFrame` - - Returns - ------- - str or :class:`pandas.DataFrame` - """ - from datetime import datetime - import math - summary = [] - - cache_path = OPTIONS['cachedir'] if cache_path == '' else cache_path - apath = os.path.abspath(cache_path) - log.debug("Listing cache content at: %s" % cache_path) - - def convert_size(size_bytes): - if size_bytes == 0: - return "0B" - size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB") - i = int(math.floor(math.log(size_bytes, 1024))) - p = math.pow(1024, i) - s = round(size_bytes / p, 2) - return "%s %s" % (s, size_name[i]) - - cached_files = [] - fn = os.path.join(apath, "cache") - if os.path.exists(fn): - with open(fn, "rb") as f: - loaded_cached_files = pickle.load(f) # nosec B301 because files controlled internally - for c in loaded_cached_files.values(): - if isinstance(c["blocks"], list): - c["blocks"] = set(c["blocks"]) - cached_files.append(loaded_cached_files) - else: - raise FileSystemHasNoCache("No fsspec cache system at: %s" % apath) - - cached_files = cached_files or [{}] - cached_files = cached_files[-1] - - N_FILES = len(cached_files) - TOTAL_SIZE = 0 - for cfile in cached_files: - path = os.path.join(apath, cached_files[cfile]['fn']) - TOTAL_SIZE += os.path.getsize(path) - - summary.append("%s %s" % ("=" * 20, "%i files in fsspec cache folder (%s)" % (N_FILES, convert_size(TOTAL_SIZE)))) - summary.append("lscache %s" % os.path.sep.join([apath, ""])) - summary.append("=" * 20) - - listing = {'fn': [], 'size': [], 'time': [], 'original': [], 'uid': [], 'blocks': []} - for cfile in cached_files: - summary.append("- %s" % cached_files[cfile]['fn']) - listing['fn'].append(cached_files[cfile]['fn']) - - path = os.path.join(cache_path, cached_files[cfile]['fn']) - summary.append("\t%8s: %s" % ('SIZE', convert_size(os.path.getsize(path)))) - listing['size'].append(os.path.getsize(path)) - - key = 'time' - ts = cached_files[cfile][key] - tsf = pd.to_datetime(datetime.fromtimestamp(ts)).strftime("%c") - summary.append("\t%8s: %s (%s)" % (key, tsf, ts)) - listing['time'].append(pd.to_datetime(datetime.fromtimestamp(ts))) - - if version.parse(fsspec.__version__) > version.parse("0.8.7"): - key = 'original' - summary.append("\t%8s: %s" % (key, cached_files[cfile][key])) - listing[key].append(cached_files[cfile][key]) - - key = 'uid' - summary.append("\t%8s: %s" % (key, cached_files[cfile][key])) - listing[key].append(cached_files[cfile][key]) - - key = 'blocks' - summary.append("\t%8s: %s" % (key, cached_files[cfile][key])) - listing[key].append(cached_files[cfile][key]) - - summary.append("=" * 20) - summary = "\n".join(summary) - if prt: - # Return string to be printed: - return summary - else: - # Return dataframe listing: - # log.debug(summary) - return pd.DataFrame(listing) - - -def load_dict(ptype): - if ptype == "profilers": - try: - nvs = ArgoNVSReferenceTables(cache=True) - profilers = {} - for row in nvs.tbl(8).iterrows(): - profilers.update({int(row[1]['altLabel']): row[1]['prefLabel']}) - return profilers - except Exception: - with open(os.path.join(path2assets, "profilers.json"), "rb") as f: - loaded_dict = json.load(f)['data']['profilers'] - return loaded_dict - elif ptype == "institutions": - try: - nvs = ArgoNVSReferenceTables(cache=True) - institutions = {} - for row in nvs.tbl(4).iterrows(): - institutions.update({row[1]['altLabel']: row[1]['prefLabel']}) - return institutions - except Exception: - with open(os.path.join(path2assets, "institutions.json"), "rb") as f: - loaded_dict = json.load(f)['data']['institutions'] - return loaded_dict - else: - raise ValueError("Invalid dictionary name") - -def mapp_dict(Adictionnary, Avalue): - if Avalue not in Adictionnary: - return "Unknown" - else: - return Adictionnary[Avalue] - - -def list_available_data_src(): - """ List all available data sources """ - sources = {} - try: - from .data_fetchers import erddap_data as Erddap_Fetchers - # Ensure we're loading the erddap data fetcher with the current options: - Erddap_Fetchers.api_server_check = Erddap_Fetchers.api_server_check.replace(Erddap_Fetchers.api_server, OPTIONS['erddap']) - Erddap_Fetchers.api_server = OPTIONS['erddap'] - - sources["erddap"] = Erddap_Fetchers - except Exception: - warnings.warn( - "An error occurred while loading the ERDDAP data fetcher, " - "it will not be available !\n%s\n%s" - % (sys.exc_info()[0], sys.exc_info()[1]) - ) - pass - - try: - from .data_fetchers import argovis_data as ArgoVis_Fetchers - - sources["argovis"] = ArgoVis_Fetchers - except Exception: - warnings.warn( - "An error occurred while loading the ArgoVis data fetcher, " - "it will not be available !\n%s\n%s" - % (sys.exc_info()[0], sys.exc_info()[1]) - ) - pass +import inspect +from functools import wraps - try: - from .data_fetchers import gdacftp_data as GDAC_Fetchers - # Ensure we're loading the gdac data fetcher with the current options: - GDAC_Fetchers.api_server_check = OPTIONS['ftp'] - GDAC_Fetchers.api_server = OPTIONS['ftp'] +warnings.filterwarnings("default", category=DeprecationWarning, module=__name__) - sources["gdac"] = GDAC_Fetchers - except Exception: - warnings.warn( - "An error occurred while loading the GDAC data fetcher, " - "it will not be available !\n%s\n%s" - % (sys.exc_info()[0], sys.exc_info()[1]) - ) - pass - # return dict(sorted(sources.items())) - return sources +def refactored(func1): + rel = importlib.import_module('argopy.related') + utils = importlib.import_module('argopy.utils') + in_related = hasattr(rel, func1.__name__) + func2 = getattr(rel, func1.__name__) if in_related else getattr(utils, func1.__name__) -def list_available_index_src(): - """ List all available index sources """ - sources = {} - try: - from .data_fetchers import erddap_index as Erddap_Fetchers - # Ensure we're loading the erddap data fetcher with the current options: - Erddap_Fetchers.api_server_check = Erddap_Fetchers.api_server_check.replace(Erddap_Fetchers.api_server, OPTIONS['erddap']) - Erddap_Fetchers.api_server = OPTIONS['erddap'] + func1_type = 'function' + if inspect.isclass(func1): + func1_type = 'class' - sources["erddap"] = Erddap_Fetchers - except Exception: - warnings.warn( - "An error occurred while loading the ERDDAP index fetcher, " - "it will not be available !\n%s\n%s" - % (sys.exc_info()[0], sys.exc_info()[1]) - ) - pass + func2_loc = 'utils' + if in_related: + func2_loc = 'related' - try: - from .data_fetchers import gdacftp_index as GDAC_Fetchers - # Ensure we're loading the gdac data fetcher with the current options: - GDAC_Fetchers.api_server_check = OPTIONS['ftp'] - GDAC_Fetchers.api_server = OPTIONS['ftp'] + msg = "The 'argopy.utilities.{name}' {ftype} has moved to 'argopy.{where}.{name}'. \ +You're seeing this message because you called '{name}' imported from 'argopy.utilities'. \ +Please update your script to import '{name}' from 'argopy.{where}'. \ +After 0.1.15, importing 'utilities' will raise an error." - sources["gdac"] = GDAC_Fetchers - except Exception: + @wraps(func1) + def decorator(*args, **kwargs): + # warnings.simplefilter('always', DeprecationWarning) warnings.warn( - "An error occurred while loading the GDAC index fetcher, " - "it will not be available !\n%s\n%s" - % (sys.exc_info()[0], sys.exc_info()[1]) - ) - pass - - return sources - - -def list_standard_variables(): - """ List of variables for standard users """ - return [ - "DATA_MODE", - "LATITUDE", - "LONGITUDE", - "POSITION_QC", - "DIRECTION", - "PLATFORM_NUMBER", - "CYCLE_NUMBER", - "PRES", - "TEMP", - "PSAL", - "PRES_QC", - "TEMP_QC", - "PSAL_QC", - "PRES_ADJUSTED", - "TEMP_ADJUSTED", - "PSAL_ADJUSTED", - "PRES_ADJUSTED_QC", - "TEMP_ADJUSTED_QC", - "PSAL_ADJUSTED_QC", - "PRES_ADJUSTED_ERROR", - "TEMP_ADJUSTED_ERROR", - "PSAL_ADJUSTED_ERROR", - "PRES_ERROR", # can be created from PRES_ADJUSTED_ERROR after a filter_data_mode - "TEMP_ERROR", - "PSAL_ERROR", - "JULD", - "JULD_QC", - "TIME", - "TIME_QC", - # "CONFIG_MISSION_NUMBER", - ] - - -def list_multiprofile_file_variables(): - """ List of variables in a netcdf multiprofile file. - - This is for files created by GDAC under //_prof.nc - """ - return [ - "CONFIG_MISSION_NUMBER", - "CYCLE_NUMBER", - "DATA_CENTRE", - "DATA_MODE", - "DATA_STATE_INDICATOR", - "DATA_TYPE", - "DATE_CREATION", - "DATE_UPDATE", - "DC_REFERENCE", - "DIRECTION", - "FIRMWARE_VERSION", - "FLOAT_SERIAL_NO", - "FORMAT_VERSION", - "HANDBOOK_VERSION", - "HISTORY_ACTION", - "HISTORY_DATE", - "HISTORY_INSTITUTION", - "HISTORY_PARAMETER", - "HISTORY_PREVIOUS_VALUE", - "HISTORY_QCTEST", - "HISTORY_REFERENCE", - "HISTORY_SOFTWARE", - "HISTORY_SOFTWARE_RELEASE", - "HISTORY_START_PRES", - "HISTORY_STEP", - "HISTORY_STOP_PRES", - "JULD", - "JULD_LOCATION", - "JULD_QC", - "LATITUDE", - "LONGITUDE", - "PARAMETER", - "PI_NAME", - "PLATFORM_NUMBER", - "PLATFORM_TYPE", - "POSITIONING_SYSTEM", - "POSITION_QC", - "PRES", - "PRES_ADJUSTED", - "PRES_ADJUSTED_ERROR", - "PRES_ADJUSTED_QC", - "PRES_QC", - "PROFILE_PRES_QC", - "PROFILE_PSAL_QC", - "PROFILE_TEMP_QC", - "PROJECT_NAME", - "PSAL", - "PSAL_ADJUSTED", - "PSAL_ADJUSTED_ERROR", - "PSAL_ADJUSTED_QC", - "PSAL_QC", - "REFERENCE_DATE_TIME", - "SCIENTIFIC_CALIB_COEFFICIENT", - "SCIENTIFIC_CALIB_COMMENT", - "SCIENTIFIC_CALIB_DATE", - "SCIENTIFIC_CALIB_EQUATION", - "STATION_PARAMETERS", - "TEMP", - "TEMP_ADJUSTED", - "TEMP_ADJUSTED_ERROR", - "TEMP_ADJUSTED_QC", - "TEMP_QC", - "VERTICAL_SAMPLING_SCHEME", - "WMO_INST_TYPE", - ] - - -def get_sys_info(): - "Returns system information as a dict" - - blob = [] - - # get full commit hash - commit = None - if os.path.isdir(".git") and os.path.isdir("argopy"): - try: - pipe = subprocess.Popen( # nosec No user provided input to control here - 'git log --format="%H" -n 1'.split(" "), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - so, serr = pipe.communicate() - except Exception: - pass - else: - if pipe.returncode == 0: - commit = so - try: - commit = so.decode("utf-8") - except ValueError: - pass - commit = commit.strip().strip('"') - - blob.append(("commit", commit)) - - try: - (sysname, nodename, release, version_, machine, processor) = platform.uname() - blob.extend( - [ - ("python", sys.version), - ("python-bits", struct.calcsize("P") * 8), - ("OS", "%s" % (sysname)), - ("OS-release", "%s" % (release)), - ("machine", "%s" % (machine)), - ("processor", "%s" % (processor)), - ("byteorder", "%s" % sys.byteorder), - ("LC_ALL", "%s" % os.environ.get("LC_ALL", "None")), - ("LANG", "%s" % os.environ.get("LANG", "None")), - ("LOCALE", "%s.%s" % locale.getlocale()), - ] + msg.format(name=func1.__name__, ftype=func1_type, where=func2_loc), + category=DeprecationWarning, + stacklevel=2 ) - except Exception: - pass - - return blob + # warnings.simplefilter('default', DeprecationWarning) + return func2(*args, **kwargs) + return decorator -def netcdf_and_hdf5_versions(): - libhdf5_version = None - libnetcdf_version = None - try: - import netCDF4 +# Argo related dataset and Meta-data fetchers - libhdf5_version = netCDF4.__hdf5libversion__ - libnetcdf_version = netCDF4.__netcdf4libversion__ - except ImportError: - try: - import h5py - - libhdf5_version = h5py.version.hdf5_version - except ImportError: - pass - return [("libhdf5", libhdf5_version), ("libnetcdf", libnetcdf_version)] - - -def show_versions(file=sys.stdout, conda=False): # noqa: C901 - """ Print the versions of argopy and its dependencies - - Parameters - ---------- - file : file-like, optional - print to the given file-like object. Defaults to sys.stdout. - conda: bool, optional - format versions to be copy/pasted on a conda environment file (default, False) - """ - sys_info = get_sys_info() - - try: - sys_info.extend(netcdf_and_hdf5_versions()) - except Exception as e: - print(f"Error collecting netcdf / hdf5 version: {e}") - - DEPS = { - 'core': sorted([ - ("argopy", lambda mod: mod.__version__), - - ("xarray", lambda mod: mod.__version__), - ("scipy", lambda mod: mod.__version__), - ("netCDF4", lambda mod: mod.__version__), - ("erddapy", lambda mod: mod.__version__), # This could go away from requirements ? - ("fsspec", lambda mod: mod.__version__), - ("aiohttp", lambda mod: mod.__version__), - ("packaging", lambda mod: mod.__version__), # will come with xarray, Using 'version' to make API compatible with several fsspec releases - ("requests", lambda mod: mod.__version__), - ("toolz", lambda mod: mod.__version__), - ]), - 'ext.util': sorted([ - ("gsw", lambda mod: mod.__version__), # Used by xarray accessor to compute new variables - ("tqdm", lambda mod: mod.__version__), - ("zarr", lambda mod: mod.__version__), - ]), - 'ext.perf': sorted([ - ("dask", lambda mod: mod.__version__), - ("distributed", lambda mod: mod.__version__), - ("pyarrow", lambda mod: mod.__version__), - ]), - 'ext.plot': sorted([ - ("matplotlib", lambda mod: mod.__version__), - ("cartopy", lambda mod: mod.__version__), - ("seaborn", lambda mod: mod.__version__), - ("IPython", lambda mod: mod.__version__), - ("ipywidgets", lambda mod: mod.__version__), - ("ipykernel", lambda mod: mod.__version__), - ]), - 'dev': sorted([ - - ("bottleneck", lambda mod: mod.__version__), - ("cftime", lambda mod: mod.__version__), - ("cfgrib", lambda mod: mod.__version__), - ("conda", lambda mod: mod.__version__), - ("nc_time_axis", lambda mod: mod.__version__), - - ("numpy", lambda mod: mod.__version__), # will come with xarray and pandas - ("pandas", lambda mod: mod.__version__), # will come with xarray - - ("pip", lambda mod: mod.__version__), - ("black", lambda mod: mod.__version__), - ("flake8", lambda mod: mod.__version__), - ("pytest", lambda mod: mod.__version__), # will come with pandas - ("pytest_env", lambda mod: mod.__version__), # will come with pandas - ("pytest_cov", lambda mod: mod.__version__), # will come with pandas - ("pytest_localftpserver", lambda mod: mod.__version__), # will come with pandas - ("pytest_reportlog", lambda mod: mod.__version__), # will come with pandas - ("setuptools", lambda mod: mod.__version__), - ("aiofiles", lambda mod: mod.__version__), - ("sphinx", lambda mod: mod.__version__), - ]), - } - - DEPS_blob = {} - for level in DEPS.keys(): - deps = DEPS[level] - deps_blob = list() - for (modname, ver_f) in deps: - try: - if modname in sys.modules: - mod = sys.modules[modname] - else: - mod = importlib.import_module(modname) - except Exception: - deps_blob.append((modname, '-')) - else: - try: - ver = ver_f(mod) - deps_blob.append((modname, ver)) - except Exception: - deps_blob.append((modname, "installed")) - DEPS_blob[level] = deps_blob +@refactored +class TopoFetcher: + pass - print("\nSYSTEM", file=file) - print("------", file=file) - for k, stat in sys_info: - print(f"{k}: {stat}", file=file) +@refactored +class ArgoDocs: + pass - for level in DEPS_blob: - if conda: - print("\n# %s:" % level.upper(), file=file) - else: - title = "INSTALLED VERSIONS: %s" % level.upper() - print("\n%s" % title, file=file) - print("-" * len(title), file=file) - deps_blob = DEPS_blob[level] - for k, stat in deps_blob: - if conda: - if k != 'argopy': - kf = k.replace("_", "-") - comment = ' ' if stat != '-' else '# ' - print(f"{comment} - {kf} = {stat}", file=file) # Format like a conda env line, useful to update ci/requirements - else: - print("{:<12}: {:<12}".format(k, stat), file=file) +@refactored +class ArgoNVSReferenceTables: + pass +@refactored +class OceanOPSDeployments: + pass -def show_options(file=sys.stdout): # noqa: C901 - """ Print options of argopy +@refactored +def get_coriolis_profile_id(*args, **kwargs): + pass - Parameters - ---------- - file : file-like, optional - print to the given file-like object. Defaults to sys.stdout. - """ - print("\nARGOPY OPTIONS", file=file) - print("--------------", file=file) - opts = copy.deepcopy(OPTIONS) - opts = dict(sorted(opts.items())) - for k, v in opts.items(): - print(f"{k}: {v}", file=file) +@refactored +def get_ea_profile_page(*args, **kwargs): + pass +@refactored +def load_dict(*args, **kwargs): + pass -def check_gdac_path(path, errors='ignore'): # noqa: C901 - """ Check if a path has the expected GDAC ftp structure +@refactored +def mapp_dict(*args, **kwargs): + pass - Expected GDAC ftp structure:: +# Checkers +@refactored +def is_box(*args, **kwargs): + pass - . - └── dac - ├── aoml - ├── ... - ├── coriolis - ├── ... - ├── meds - └── nmdis +@refactored +def is_indexbox(*args, **kwargs): + pass - This check will return True if at least one DAC sub-folder is found under path/dac/ +@refactored +def is_list_of_strings(*args, **kwargs): + pass - Examples:: - >>> check_gdac_path("https://data-argo.ifremer.fr") # True - >>> check_gdac_path("ftp://ftp.ifremer.fr/ifremer/argo") # True - >>> check_gdac_path("ftp://usgodae.org/pub/outgoing/argo") # True - >>> check_gdac_path("/home/ref-argo/gdac") # True - >>> check_gdac_path("https://www.ifremer.fr") # False - >>> check_gdac_path("ftp://usgodae.org/pub/outgoing") # False +@refactored +def is_list_of_dicts(*args, **kwargs): + pass - Parameters - ---------- - path: str - Path name to check, including access protocol - errors: str - "ignore" or "raise" (or "warn") +@refactored +def is_list_of_datasets(*args, **kwargs): + pass - Returns - ------- - checked: boolean - True if at least one DAC folder is found under path/dac/ - False otherwise - """ - # Create a file system for this path - if split_protocol(path)[0] is None: - fs = fsspec.filesystem('file') - elif 'https' in split_protocol(path)[0]: - fs = fsspec.filesystem('http') - elif 'ftp' in split_protocol(path)[0]: - try: - host = split_protocol(path)[-1].split('/')[0] - fs = fsspec.filesystem('ftp', host=host) - except gaierror: - if errors == 'raise': - raise FtpPathError("Can't get address info (GAIerror) on '%s'" % host) - elif errors == "warn": - warnings.warn("Can't get address info (GAIerror) on '%s'" % host) - return False - else: - return False - else: - raise FtpPathError("Unknown protocol for an Argo GDAC host: %s" % split_protocol(path)[0]) +@refactored +def is_list_equal(*args, **kwargs): + pass - # dacs = [ - # "aoml", - # "bodc", - # "coriolis", - # "csio", - # "csiro", - # "incois", - # "jma", - # "kma", - # "kordi", - # "meds", - # "nmdis", - # ] +@refactored +def check_wmo(*args, **kwargs): + pass - # Case 1: - check1 = ( - fs.exists(path) - and fs.exists(fs.sep.join([path, "dac"])) - # and np.any([fs.exists(fs.sep.join([path, "dac", dac])) for dac in dacs]) # Take too much time on http/ftp GDAC server - ) - if check1: - return True - elif errors == "raise": - raise FtpPathError("This path is not GDAC compliant (no `dac` folder with legitimate sub-folder):\n%s" % path) +@refactored +def is_wmo(*args, **kwargs): + pass - elif errors == "warn": - warnings.warn("This path is not GDAC compliant:\n%s" % path) - return False - else: - return False +@refactored +def check_cyc(*args, **kwargs): + pass +@refactored +def is_cyc(*args, **kwargs): + pass -def isconnected(host: str = "https://www.ifremer.fr", maxtry: int = 10): - """Check if an URL is alive +@refactored +def check_index_cols(*args, **kwargs): + pass - Parameters - ---------- - host: str - URL to use, 'https://www.ifremer.fr' by default - maxtry: int, default: 10 - Maximum number of host connections to try before +@refactored +def check_gdac_path(*args, **kwargs): + pass - Returns - ------- - bool - """ - # log.debug("isconnected: %s" % host) - if split_protocol(host)[0] in ["http", "https", "ftp", "sftp"]: - it = 0 - while it < maxtry: - try: - # log.debug("Checking if %s is connected ..." % host) - urllib.request.urlopen(host, timeout=1) # nosec B310 because host protocol already checked - result, it = True, maxtry - except Exception: - result, it = False, it + 1 - return result - else: - return os.path.exists(host) +@refactored +def isconnected(*args, **kwargs): + pass +@refactored +def isalive(*args, **kwargs): + pass -def urlhaskeyword(url: str = "", keyword: str = '', maxtry: int = 10): - """ Check if a keyword is in the content of a URL +@refactored +def isAPIconnected(*args, **kwargs): + pass - Parameters - ---------- - url: str - keyword: str - maxtry: int, default: 10 - Maximum number of host connections to try before returning False +@refactored +def erddap_ds_exists(*args, **kwargs): + pass - Returns - ------- - bool - """ - it = 0 - while it < maxtry: - try: - with fsspec.open(url) as f: - data = f.read() - result = keyword in str(data) - it = maxtry - except Exception: - result, it = False, it + 1 - return result +@refactored +def urlhaskeyword(*args, **kwargs): + pass -def isalive(api_server_check: Union[str, dict] = "") -> bool: - """Check if an API is alive or not +# Data type casting: - 2 methods are available: +@refactored +def to_list(*args, **kwargs): + pass - - URL Ping - - keyword Check +@refactored +def cast_Argo_variable_type(*args, **kwargs): + pass - Parameters - ---------- - api_server_check - Url string or dictionary with [``url``, ``keyword``] keys. +from .utils.casting import DATA_TYPES - - For a string, uses: :class:`argopy.utilities.isconnected` - - For a dictionary, uses: :class:`argopy.utilities.urlhaskeyword` +# Decorators - Returns - ------- - bool - """ - # log.debug("isalive: %s" % api_server_check) - if isinstance(api_server_check, dict): - return urlhaskeyword(url=api_server_check['url'], keyword=api_server_check['keyword']) - else: - return isconnected(api_server_check) +@refactored +def deprecated(*args, **kwargs): + pass +@refactored +def doc_inherit(*args, **kwargs): + pass -def isAPIconnected(src="erddap", data=True): - """ Check if a source API is alive or not +# Lists: - The API is connected when it has a live URL or valid folder path. +@refactored +def list_available_data_src(*args, **kwargs): + pass - Parameters - ---------- - src: str - The data or index source name, 'erddap' default - data: bool - If True check the data fetcher (default), if False, check the index fetcher +@refactored +def list_available_index_src(*args, **kwargs): + pass - Returns - ------- - bool - """ - if data: - list_src = list_available_data_src() - else: - list_src = list_available_index_src() +@refactored +def list_standard_variables(*args, **kwargs): + pass - if src in list_src and getattr(list_src[src], "api_server_check", None): - return isalive(list_src[src].api_server_check) - else: - raise InvalidFetcher +@refactored +def list_multiprofile_file_variables(*args, **kwargs): + pass +# Cache management: +@refactored +def clear_cache(*args, **kwargs): + pass -def erddap_ds_exists( - ds: Union[list, str] = "ArgoFloats", - erddap: str = None, - maxtry: int = 2 -) -> bool: - """ Check if a dataset exists on a remote erddap server +@refactored +def lscache(*args, **kwargs): + pass - Parameter - --------- - ds: str, default='ArgoFloats' - Name of the erddap dataset to check - erddap: str, default=OPTIONS['erddap'] - Url of the erddap server - maxtry: int, default: 2 - Maximum number of host connections to try +# Computation and performances: +@refactored +class Chunker: + pass - Return - ------ - bool - """ - if erddap is None: - erddap = OPTIONS['erddap'] - # log.debug("from erddap_ds_exists: %s" % erddap) - from .stores import httpstore - if isconnected(erddap, maxtry=maxtry): - with httpstore(timeout=OPTIONS['api_timeout']).open("".join([erddap, "/info/index.json"])) as of: - erddap_index = json.load(of) - if is_list_of_strings(ds): - return [this_ds in [row[-1] for row in erddap_index["table"]["rows"]] for this_ds in ds] - else: - return ds in [row[-1] for row in erddap_index["table"]["rows"]] - else: - log.debug("Cannot reach erddap server: %s" % erddap) - warnings.warn("Return False because we cannot reach the erddap server %s" % erddap) - return False +# Accessories classes (specific objects): +@refactored +class float_wmo: + pass +@refactored +class Registry: + pass -def badge(label="label", message="message", color="green", insert=False): - """ Return or insert shield.io badge image +# Locals (environments, versions, systems): +@refactored +def get_sys_info(*args, **kwargs): + pass - Use the shields.io service to create a badge image +@refactored +def netcdf_and_hdf5_versions(*args, **kwargs): + pass - https://img.shields.io/static/v1?label=]*)rel="nofollow">TXT<\/a>', - str(file)) - export_txt_url = x[1].replace("https://archimer.ifremer.fr", self._archimer) - self._risfile = export_txt_url - self._ris = self.RIS(export_txt_url, fs=self._fs).record - return self._ris - else: - raise ValueError("Select a document first !") - - @property - def abstract(self): - """Abstract of a document""" - if self.docid is not None: - return self.ris['AB'] - else: - raise ValueError("Select a document first !") - - @property - def pdf(self): - """Link to the online pdf version of a document""" - if self.docid is not None: - return self.ris['UR'] - else: - raise ValueError("Select a document first !") - - def show(self, height=800): - """Insert document in pdf in a notebook cell - - Parameters - ---------- - height: int - Height in pixels of the cell - """ - if self.docid is not None: - from IPython.core.display import HTML - return HTML( - '' % (self.ris['UR'], height)) - else: - raise ValueError("Select a document first !") - - def open_pdf(self, page=None, url_only=False): - """Open document in new browser tab - - Parameters - ---------- - page: int, optional - Open directly a specific page number - """ - url = self.pdf - url += '#view=FitV&pagemode=thumbs' - if page: - url += '&page=%i' % page - if self.docid is not None: - if not url_only: - import webbrowser - webbrowser.open_new(url) - else: - return url - else: - raise ValueError("Select a document first !") - - def search(self, txt, where='title'): - """Search for string in all documents title or abstract - - Parameters - ---------- - txt: str - where: str, default='title' - Where to search, can be 'title' or 'abstract' - - Returns - ------- - list - - """ - results = [] - for doc in self.list.iterrows(): - docid = doc[1]['id'] - if where == 'title': - if txt.lower() in ArgoDocs(docid).js['title'].lower(): - results.append(docid) - elif where == 'abstract': - if txt.lower() in ArgoDocs(docid).abstract.lower(): - results.append(docid) - return results - - -def drop_variables_not_in_all_datasets(ds_collection): - """Drop variables that are not in all datasets (the lowest common denominator) - - Parameters - ---------- - list of :class:`xr.DataSet` - - Returns - ------- - list of :class:`xr.DataSet` - """ - - # List all possible data variables: - vlist = [] - for res in ds_collection: - [vlist.append(v) for v in list(res.data_vars)] - vlist = np.unique(vlist) - - # Check if each variables are in each datasets: - ishere = np.zeros((len(vlist), len(ds_collection))) - for ir, res in enumerate(ds_collection): - for iv, v in enumerate(res.data_vars): - for iu, u in enumerate(vlist): - if v == u: - ishere[iu, ir] = 1 - - # List of dataset with missing variables: - # ir_missing = np.sum(ishere, axis=0) < len(vlist) - # List of variables missing in some dataset: - iv_missing = np.sum(ishere, axis=1) < len(ds_collection) - if len(iv_missing) > 0: - log.debug("Dropping these variables that are missing from some dataset in this list: %s" % vlist[iv_missing]) - - # List of variables to keep - iv_tokeep = np.sum(ishere, axis=1) == len(ds_collection) - for ir, res in enumerate(ds_collection): - # print("\n", res.attrs['Fetched_uri']) - v_to_drop = [] - for iv, v in enumerate(res.data_vars): - if v not in vlist[iv_tokeep]: - v_to_drop.append(v) - ds_collection[ir] = ds_collection[ir].drop_vars(v_to_drop) - return ds_collection - - -def fill_variables_not_in_all_datasets(ds_collection, concat_dim='rows'): - """Add empty variables to dataset so that all the collection have the same data_vars and coords - - This is to make sure that the collection of dataset can be concatenated - - Parameters - ---------- - list of :class:`xr.DataSet` - - Returns - ------- - list of :class:`xr.DataSet` - """ - def first_variable_with_concat_dim(this_ds, concat_dim='rows'): - """Return the 1st variable in the collection that have the concat_dim in dims""" - first = None - for v in this_ds.data_vars: - if concat_dim in this_ds[v].dims: - first = v - pass - return first - - def fillvalue(da): - """ Return fillvalue for a dataarray """ - # https://docs.scipy.org/doc/numpy/reference/generated/numpy.dtype.kind.html#numpy.dtype.kind - if da.dtype.kind in ["U"]: - fillvalue = " " - elif da.dtype.kind == "i": - fillvalue = 99999 - elif da.dtype.kind == "M": - fillvalue = np.datetime64("NaT") - else: - fillvalue = np.nan - return fillvalue - - # List all possible data variables: - vlist = [] - for res in ds_collection: - [vlist.append(v) for v in list(res.variables) if concat_dim in res[v].dims] - vlist = np.unique(vlist) - # log.debug('variables', vlist) - - # List all possible coordinates: - clist = [] - for res in ds_collection: - [clist.append(c) for c in list(res.coords) if concat_dim in res[c].dims] - clist = np.unique(clist) - # log.debu('coordinates', clist) - - # Get the first occurrence of each variable, to be used as a template for attributes and dtype - meta = {} - for ir, ds in enumerate(ds_collection): - for v in vlist: - if v in ds.variables: - meta[v] = {'attrs': ds[v].attrs, 'dtype': ds[v].dtype, 'fill_value': fillvalue(ds[v])} - # [log.debug(meta[m]) for m in meta.keys()] - - # Add missing variables to dataset - datasets = [ds.copy() for ds in ds_collection] - for ir, ds in enumerate(datasets): - for v in vlist: - if v not in ds.variables: - like = ds[first_variable_with_concat_dim(ds, concat_dim=concat_dim)] - datasets[ir][v] = xr.full_like(like, fill_value=meta[v]['fill_value'], dtype=meta[v]['dtype']) - datasets[ir][v].attrs = meta[v]['attrs'] - - # Make sure that all datasets have the same set of coordinates - results = [] - for ir, ds in enumerate(datasets): - results.append(datasets[ir].set_coords(clist)) - - # - return results diff --git a/argopy/utils/__init__.py b/argopy/utils/__init__.py index e69de29b..0d1b9b6e 100644 --- a/argopy/utils/__init__.py +++ b/argopy/utils/__init__.py @@ -0,0 +1,111 @@ +from .checkers import ( + is_box, + is_indexbox, + is_list_of_strings, + is_list_of_dicts, + is_list_of_datasets, + is_list_equal, + is_wmo, + check_wmo, + is_cyc, + check_cyc, + check_index_cols, + check_gdac_path, + isconnected, + urlhaskeyword, + isalive, + isAPIconnected, + erddap_ds_exists, +) +from .casting import DATA_TYPES, cast_Argo_variable_type, to_list +from .decorators import deprecated, doc_inherit +from .lists import ( + list_available_data_src, + list_available_index_src, + list_standard_variables, + list_multiprofile_file_variables, +) +from .caching import clear_cache, lscache +from .monitored_threadpool import MyThreadPoolExecutor as MonitoredThreadPoolExecutor +from .chunking import Chunker +from .accessories import Registry, float_wmo +from .locals import ( + show_versions, + show_options, + modified_environ, + get_sys_info, + netcdf_and_hdf5_versions, +) +from .monitors import monitor_status, badge, fetch_status +from .geo import wmo2box, wrap_longitude, toYearFraction, YearFraction_to_datetime +from .compute import linear_interpolation_remap, groupby_remap +from .transform import ( + fill_variables_not_in_all_datasets, + drop_variables_not_in_all_datasets, +) +from .format import argo_split_path, format_oneline +from .loggers import warnUnless, log_argopy_callerstack + +__all__ = ( + # Checkers: + "is_box", + "is_indexbox", + "is_list_of_strings", + "is_list_of_dicts", + "is_list_of_datasets", + "is_list_equal", + "is_wmo", + "check_wmo", + "is_cyc", + "check_cyc", + "check_index_cols", + "check_gdac_path", + "isconnected", + "isalive", + "isAPIconnected", + "erddap_ds_exists", + # Data type casting: + "DATA_TYPES", + "cast_Argo_variable_type", + "to_list", + # Decorators: + "deprecated", + "doc_inherit", + # Lists: + "list_available_data_src", + "list_available_index_src", + "list_standard_variables", + "list_multiprofile_file_variables", + # Cache management: + "clear_cache", + "lscache", + # Computation and performances: + "MonitoredThreadPoolExecutor", + "Chunker", + # Accessories classes (specific objects): + "Registry", + "float_wmo", + # Locals (environments, versions, systems): + "show_versions", + "show_options", + "modified_environ", + # Monitors + "monitor_status", + # Geo (space/time data utilities) + "wmo2box", + "wrap_longitude", + "toYearFraction", + "YearFraction_to_datetime", + # Computation with datasets: + "linear_interpolation_remap", + "groupby_remap", + # Manipulate datasets: + "fill_variables_not_in_all_datasets", + "drop_variables_not_in_all_datasets", + # Formatters: + "format_oneline", + "argo_split_path", + # Loggers: + "warnUnless", + "log_argopy_callerstack", +) diff --git a/argopy/utils/accessories.py b/argopy/utils/accessories.py new file mode 100644 index 00000000..d3109c59 --- /dev/null +++ b/argopy/utils/accessories.py @@ -0,0 +1,267 @@ +from abc import ABC, abstractmethod +from collections import UserList +import warnings +import logging +import copy + +from .checkers import check_wmo, is_wmo + + +log = logging.getLogger("argopy.utils.accessories") + + +class RegistryItem(ABC): + """Prototype for possible custom items in a Registry""" + + @property + @abstractmethod + def value(self): + raise NotImplementedError("Not implemented") + + @property + @abstractmethod + def isvalid(self, item): + raise NotImplementedError("Not implemented") + + @abstractmethod + def __str__(self): + raise NotImplementedError("Not implemented") + + @abstractmethod + def __repr__(self): + raise NotImplementedError("Not implemented") + + +class float_wmo(RegistryItem): + """Argo float WMO number object""" + + def __init__(self, WMO_number, errors="raise"): + """Create an Argo float WMO number object + + Parameters + ---------- + WMO_number: object + Anything that could be casted as an integer + errors: {'raise', 'warn', 'ignore'} + Possibly raises a ValueError exception or UserWarning, otherwise fails silently if WMO_number is not valid + + Returns + ------- + :class:`argopy.utilities.float_wmo` + """ + self.errors = errors + if isinstance(WMO_number, float_wmo): + item = WMO_number.value + else: + item = check_wmo(WMO_number, errors=self.errors)[ + 0 + ] # This will automatically validate item + self.item = item + + @property + def isvalid(self): + """Check if WMO number is valid""" + return is_wmo(self.item, errors=self.errors) + # return True # Because it was checked at instantiation + + @property + def value(self): + """Return WMO number as in integer""" + return int(self.item) + + def __str__(self): + # return "%s" % check_wmo(self.item)[0] + return "%s" % self.item + + def __repr__(self): + return f"WMO({self.item})" + + def __check_other__(self, other): + return check_wmo(other)[0] if type(other) is not float_wmo else other.item + + def __eq__(self, other): + return self.item.__eq__(self.__check_other__(other)) + + def __ne__(self, other): + return self.item.__ne__(self.__check_other__(other)) + + def __gt__(self, other): + return self.item.__gt__(self.__check_other__(other)) + + def __lt__(self, other): + return self.item.__lt__(self.__check_other__(other)) + + def __ge__(self, other): + return self.item.__ge__(self.__check_other__(other)) + + def __le__(self, other): + return self.item.__le__(self.__check_other__(other)) + + def __hash__(self): + return hash(self.item) + + +class Registry(UserList): + """A list manager can that validate item type + + Examples + -------- + You can commit new entry to the registry, one by one: + + >>> R = Registry(name='file') + >>> R.commit('meds/4901105/profiles/D4901105_017.nc') + >>> R.commit('aoml/1900046/profiles/D1900046_179.nc') + + Or with a list: + + >>> R = Registry(name='My floats', dtype='wmo') + >>> R.commit([2901746, 4902252]) + + And also at instantiation time (name and dtype are optional): + + >>> R = Registry([2901746, 4902252], name='My floats', dtype=float_wmo) + + Registry can be used like a list. + + It is iterable: + + >>> for wmo in R: + >>> print(wmo) + + It has a ``len`` property: + + >>> len(R) + + It can be checked for values: + + >>> 4902252 in R + + You can also remove items from the registry, again one by one or with a list: + + >>> R.remove('2901746') + + """ + + def _complain(self, msg): + if self._invalid == "raise": + raise ValueError(msg) + elif self._invalid == "warn": + warnings.warn(msg) + else: + log.debug(msg) + + def _str(self, item): + is_valid = isinstance(item, str) + if not is_valid: + self._complain("%s is not a valid %s" % (str(item), self.dtype)) + return is_valid + + def _dict(self, item): + is_valid = isinstance(item, dict) + if not is_valid: + self._complain("%s is not a valid %s" % (str(item), self.dtype)) + return is_valid + + def _wmo(self, item): + return item.isvalid + + def __init__( + self, initlist=None, name: str = "unnamed", dtype="str", invalid="raise" + ): + """Create a registry, i.e. a controlled list + + Parameters + ---------- + initlist: list, optional + List of values to register + name: str, default: 'unnamed' + Name of the Registry + dtype: :class:`str` or dtype, default: :class:`str` + Data type of registry content. Supported values are: 'str', 'wmo', float_wmo + invalid: str, default: 'raise' + Define what do to when a new item is not valid. Can be 'raise' or 'ignore' + """ + self.name = name + self._invalid = invalid + if repr(dtype) == "" or dtype == "str": + self._validator = self._str + self.dtype = str + elif dtype == float_wmo or str(dtype).lower() == "wmo": + self._validator = self._wmo + self.dtype = float_wmo + elif repr(dtype) == "" or dtype == "dict": + self._validator = self._dict + self.dtype = dict + elif hasattr(dtype, "isvalid"): + self._validator = dtype.isvalid + self.dtype = dtype + else: + raise ValueError("Unrecognised Registry data type '%s'" % dtype) + + if initlist is not None: + initlist = self._process_items(initlist) + super().__init__(initlist) + + def __repr__(self): + summary = ["%s" % str(self.dtype)] + summary.append("Name: %s" % self.name) + N = len(self.data) + msg = "Nitems: %s" % N if N > 1 else "Nitem: %s" % N + summary.append(msg) + if N > 0: + items = [str(item) for item in self.data] + # msg = format_oneline("[%s]" % "; ".join(items), max_width=120) + msg = "[%s]" % "; ".join(items) + summary.append("Content: %s" % msg) + return "\n".join(summary) + + def _process_items(self, items): + if not isinstance(items, list): + items = [items] + if self.dtype == float_wmo: + items = [float_wmo(item, errors=self._invalid) for item in items] + return items + + def commit(self, values): + """R.commit(values) -- append values to the end of the registry if not already in""" + items = self._process_items(values) + for item in items: + if item not in self.data and self._validator(item): + super().append(item) + return self + + def append(self, value): + """R.append(value) -- append value to the end of the registry""" + items = self._process_items(value) + for item in items: + if self._validator(item): + super().append(item) + return self + + def extend(self, other): + """R.extend(iterable) -- extend registry by appending elements from the iterable""" + self.append(other) + return self + + def remove(self, values): + """R.remove(valueS) -- remove first occurrence of values.""" + items = self._process_items(values) + for item in items: + if item in self.data: + super().remove(item) + return self + + def insert(self, index, value): + """R.insert(index, value) -- insert value before index.""" + item = self._process_items(value)[0] + if self._validator(item): + super().insert(index, item) + return self + + def __copy__(self): + # Called with copy.copy(R) + return Registry(copy.copy(self.data), dtype=self.dtype) + + def copy(self): + """Return a shallow copy of the registry""" + return self.__copy__() diff --git a/argopy/utils/caching.py b/argopy/utils/caching.py new file mode 100644 index 00000000..5389aabd --- /dev/null +++ b/argopy/utils/caching.py @@ -0,0 +1,139 @@ +import os +import shutil +import logging +import pickle +import fsspec +import pandas as pd +from packaging import version +from ..options import OPTIONS +from ..errors import FileSystemHasNoCache + +log = logging.getLogger("argopy.utils.caching") + + +def clear_cache(fs=None): + """Delete argopy cache folder content""" + if os.path.exists(OPTIONS["cachedir"]): + # shutil.rmtree(OPTIONS["cachedir"]) + for filename in os.listdir(OPTIONS["cachedir"]): + file_path = os.path.join(OPTIONS["cachedir"], filename) + try: + if os.path.isfile(file_path) or os.path.islink(file_path): + os.unlink(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) + except Exception as e: + print("Failed to delete %s. Reason: %s" % (file_path, e)) + if fs: + fs.clear_cache() + + +def lscache(cache_path: str = "", prt=True): + """Decode and list cache folder content + + Parameters + ---------- + cache_path: str + prt: bool, default=True + Return a printable string or a :class:`pandas.DataFrame` + + Returns + ------- + str or :class:`pandas.DataFrame` + """ + from datetime import datetime + import math + + summary = [] + + cache_path = OPTIONS["cachedir"] if cache_path == "" else cache_path + apath = os.path.abspath(cache_path) + log.debug("Listing cache content at: %s" % cache_path) + + def convert_size(size_bytes): + if size_bytes == 0: + return "0B" + size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB") + i = int(math.floor(math.log(size_bytes, 1024))) + p = math.pow(1024, i) + s = round(size_bytes / p, 2) + return "%s %s" % (s, size_name[i]) + + cached_files = [] + fn = os.path.join(apath, "cache") + if os.path.exists(fn): + with open(fn, "rb") as f: + loaded_cached_files = pickle.load( + f + ) # nosec B301 because files controlled internally + for c in loaded_cached_files.values(): + if isinstance(c["blocks"], list): + c["blocks"] = set(c["blocks"]) + cached_files.append(loaded_cached_files) + else: + raise FileSystemHasNoCache("No fsspec cache system at: %s" % apath) + + cached_files = cached_files or [{}] + cached_files = cached_files[-1] + + N_FILES = len(cached_files) + TOTAL_SIZE = 0 + for cfile in cached_files: + path = os.path.join(apath, cached_files[cfile]["fn"]) + TOTAL_SIZE += os.path.getsize(path) + + summary.append( + "%s %s" + % ( + "=" * 20, + "%i files in fsspec cache folder (%s)" + % (N_FILES, convert_size(TOTAL_SIZE)), + ) + ) + summary.append("lscache %s" % os.path.sep.join([apath, ""])) + summary.append("=" * 20) + + listing = { + "fn": [], + "size": [], + "time": [], + "original": [], + "uid": [], + "blocks": [], + } + for cfile in cached_files: + summary.append("- %s" % cached_files[cfile]["fn"]) + listing["fn"].append(cached_files[cfile]["fn"]) + + path = os.path.join(cache_path, cached_files[cfile]["fn"]) + summary.append("\t%8s: %s" % ("SIZE", convert_size(os.path.getsize(path)))) + listing["size"].append(os.path.getsize(path)) + + key = "time" + ts = cached_files[cfile][key] + tsf = pd.to_datetime(datetime.fromtimestamp(ts)).strftime("%c") + summary.append("\t%8s: %s (%s)" % (key, tsf, ts)) + listing["time"].append(pd.to_datetime(datetime.fromtimestamp(ts))) + + if version.parse(fsspec.__version__) > version.parse("0.8.7"): + key = "original" + summary.append("\t%8s: %s" % (key, cached_files[cfile][key])) + listing[key].append(cached_files[cfile][key]) + + key = "uid" + summary.append("\t%8s: %s" % (key, cached_files[cfile][key])) + listing[key].append(cached_files[cfile][key]) + + key = "blocks" + summary.append("\t%8s: %s" % (key, cached_files[cfile][key])) + listing[key].append(cached_files[cfile][key]) + + summary.append("=" * 20) + summary = "\n".join(summary) + if prt: + # Return string to be printed: + return summary + else: + # Return dataframe listing: + # log.debug(summary) + return pd.DataFrame(listing) diff --git a/argopy/utils/casting.py b/argopy/utils/casting.py new file mode 100644 index 00000000..2de6ef76 --- /dev/null +++ b/argopy/utils/casting.py @@ -0,0 +1,389 @@ +import sys +import os +import numpy as np +import pandas as pd +import xarray as xr +import importlib +import json +import logging + +from .decorators import deprecated + + +log = logging.getLogger("argopy.utils.casting") + +path2assets = importlib.util.find_spec( + "argopy.static.assets" +).submodule_search_locations[0] + +with open(os.path.join(path2assets, "data_types.json"), "r") as f: + DATA_TYPES = json.load(f) + + +@deprecated +def cast_types(ds): # noqa: C901 + """Make sure variables are of the appropriate types according to Argo + + #todo: This is hard coded, but should be retrieved from an API somewhere. + Should be able to handle all possible variables encountered in the Argo dataset. + + Parameter + --------- + :class:`xarray.DataSet` + + Returns + ------- + :class:`xarray.DataSet` + """ + + list_str = [ + "PLATFORM_NUMBER", + "DATA_MODE", + "DIRECTION", + "DATA_CENTRE", + "DATA_TYPE", + "FORMAT_VERSION", + "HANDBOOK_VERSION", + "PROJECT_NAME", + "PI_NAME", + "STATION_PARAMETERS", + "DATA_CENTER", + "DC_REFERENCE", + "DATA_STATE_INDICATOR", + "PLATFORM_TYPE", + "FIRMWARE_VERSION", + "POSITIONING_SYSTEM", + "PROFILE_PRES_QC", + "PROFILE_PSAL_QC", + "PROFILE_TEMP_QC", + "PARAMETER", + "SCIENTIFIC_CALIB_EQUATION", + "SCIENTIFIC_CALIB_COEFFICIENT", + "SCIENTIFIC_CALIB_COMMENT", + "HISTORY_INSTITUTION", + "HISTORY_STEP", + "HISTORY_SOFTWARE", + "HISTORY_SOFTWARE_RELEASE", + "HISTORY_REFERENCE", + "HISTORY_QCTEST", + "HISTORY_ACTION", + "HISTORY_PARAMETER", + "VERTICAL_SAMPLING_SCHEME", + "FLOAT_SERIAL_NO", + "SOURCE", + "EXPOCODE", + "QCLEVEL", + ] + list_int = [ + "PLATFORM_NUMBER", + "WMO_INST_TYPE", + "WMO_INST_TYPE", + "CYCLE_NUMBER", + "CONFIG_MISSION_NUMBER", + ] + list_datetime = [ + "REFERENCE_DATE_TIME", + "DATE_CREATION", + "DATE_UPDATE", + "JULD", + "JULD_LOCATION", + "SCIENTIFIC_CALIB_DATE", + "HISTORY_DATE", + "TIME", + ] + + def fix_weird_bytes(x): + x = x.replace(b"\xb1", b"+/-") + return x + + fix_weird_bytes = np.vectorize(fix_weird_bytes) + + def cast_this(da, type): + """Low-level casting of DataArray values""" + try: + da.values = da.values.astype(type) + da.attrs["casted"] = 1 + except Exception: + msg = ( + "Oops! %s occurred. Fail to cast <%s> into %s for: %s. Encountered unique values: %s" + % (sys.exc_info()[0], str(da.dtype), type, da.name, str(np.unique(da))) + ) + log.debug(msg) + return da + + def cast_this_da(da): + """Cast any DataArray""" + v = da.name + da.attrs["casted"] = 0 + if v in list_str and da.dtype == "O": # Object + if v in ["SCIENTIFIC_CALIB_COEFFICIENT"]: + da.values = fix_weird_bytes(da.values) + da = cast_this(da, str) + + if v in list_int: # and da.dtype == 'O': # Object + da = cast_this(da, np.int32) + + if v in list_datetime and da.dtype == "O": # Object + if ( + "conventions" in da.attrs + and da.attrs["conventions"] == "YYYYMMDDHHMISS" + ): + if da.size != 0: + if len(da.dims) <= 1: + val = da.astype(str).values.astype("U14") + # This should not happen, but still ! That's real world data + val[val == " "] = "nan" + da.values = pd.to_datetime(val, format="%Y%m%d%H%M%S") + else: + s = da.stack(dummy_index=da.dims) + val = s.astype(str).values.astype("U14") + # This should not happen, but still ! That's real world data + val[val == ""] = "nan" + val[val == " "] = "nan" + # + s.values = pd.to_datetime(val, format="%Y%m%d%H%M%S") + da.values = s.unstack("dummy_index") + da = cast_this(da, "datetime64[s]") + else: + da = cast_this(da, "datetime64[s]") + + elif v == "SCIENTIFIC_CALIB_DATE": + da = cast_this(da, str) + s = da.stack(dummy_index=da.dims) + s.values = pd.to_datetime(s.values, format="%Y%m%d%H%M%S") + da.values = (s.unstack("dummy_index")).values + da = cast_this(da, "datetime64[s]") + + if "QC" in v and "PROFILE" not in v and "QCTEST" not in v: + if da.dtype == "O": # convert object to string + da = cast_this(da, str) + + # Address weird string values: + # (replace missing or nan values by a '0' that will be cast as an integer later + + if da.dtype == " 4: + tests[ + "datetim_min must be a string convertible to a Pandas datetime" + ] = lambda b: isinstance(b[-2], str) and is_dateconvertible(b[-2]) + tests[ + "datetim_max must be a string convertible to a Pandas datetime" + ] = lambda b: isinstance(b[-1], str) and is_dateconvertible(b[-1]) + + # Ranges: + tests["lon_min must be in [-180;180] or [0;360]"] = ( + lambda b: b[0] >= -180.0 and b[0] <= 360.0 + ) + tests["lon_max must be in [-180;180] or [0;360]"] = ( + lambda b: b[1] >= -180.0 and b[1] <= 360.0 + ) + tests["lat_min must be in [-90;90]"] = lambda b: b[2] >= -90.0 and b[2] <= 90 + tests["lat_max must be in [-90;90]"] = lambda b: b[3] >= -90.0 and b[3] <= 90.0 + + # Orders: + tests["lon_max must be larger than lon_min"] = lambda b: b[0] < b[1] + tests["lat_max must be larger than lat_min"] = lambda b: b[2] < b[3] + if len(box) > 4: + tests["datetim_max must come after datetim_min"] = lambda b: pd.to_datetime( + b[-2] + ) < pd.to_datetime(b[-1]) + + error = None + for msg, test in tests.items(): + if not test(box): + error = msg + break + + if error and errors == "raise": + raise ValueError("%s: %s" % (box, error)) + elif error: + return False + else: + return True + + +def is_box(box: list, errors="raise"): + """Check if this array matches a 3d or 4d data box definition + + Argopy expects one of the following 2 format to define a box: + + - box = [lon_min, lon_max, lat_min, lat_max, pres_min, pres_max] + - box = [lon_min, lon_max, lat_min, lat_max, pres_min, pres_max, datim_min, datim_max] + + This function check for this format compliance. + + Parameters + ---------- + box: list + errors: 'raise' + + Returns + ------- + bool + """ + + def is_dateconvertible(d): + try: + pd.to_datetime(d) + isit = True + except Exception: + isit = False + return isit + + tests = {} + # print(box) + # Formats: + tests["box must be a list"] = lambda b: isinstance(b, list) + tests["box must be a list with 6 or 8 elements"] = lambda b: len(b) in [6, 8] + + # Types: + tests["lon_min must be numeric"] = lambda b: ( + isinstance(b[0], int) or isinstance(b[0], (np.floating, float)) + ) + tests["lon_max must be numeric"] = lambda b: ( + isinstance(b[1], int) or isinstance(b[1], (np.floating, float)) + ) + tests["lat_min must be numeric"] = lambda b: ( + isinstance(b[2], int) or isinstance(b[2], (np.floating, float)) + ) + tests["lat_max must be numeric"] = lambda b: ( + isinstance(b[3], int) or isinstance(b[3], (np.floating, float)) + ) + tests["pres_min must be numeric"] = lambda b: ( + isinstance(b[4], int) or isinstance(b[4], (np.floating, float)) + ) + tests["pres_max must be numeric"] = lambda b: ( + isinstance(b[5], int) or isinstance(b[5], (np.floating, float)) + ) + if len(box) == 8: + tests[ + "datetim_min must be an object convertible to a Pandas datetime" + ] = lambda b: is_dateconvertible(b[-2]) + tests[ + "datetim_max must be an object convertible to a Pandas datetime" + ] = lambda b: is_dateconvertible(b[-1]) + + # Ranges: + tests["lon_min must be in [-180;180] or [0;360]"] = ( + lambda b: b[0] >= -180.0 and b[0] <= 360.0 + ) + tests["lon_max must be in [-180;180] or [0;360]"] = ( + lambda b: b[1] >= -180.0 and b[1] <= 360.0 + ) + tests["lat_min must be in [-90;90]"] = lambda b: b[2] >= -90.0 and b[2] <= 90 + tests["lat_max must be in [-90;90]"] = lambda b: b[3] >= -90.0 and b[3] <= 90.0 + tests["pres_min must be in [0;10000]"] = lambda b: b[4] >= 0 and b[4] <= 10000 + tests["pres_max must be in [0;10000]"] = lambda b: b[5] >= 0 and b[5] <= 10000 + + # Orders: + tests["lon_max must be larger than lon_min"] = lambda b: b[0] <= b[1] + tests["lat_max must be larger than lat_min"] = lambda b: b[2] <= b[3] + tests["pres_max must be larger than pres_min"] = lambda b: b[4] <= b[5] + if len(box) == 8: + tests["datetim_max must come after datetim_min"] = lambda b: pd.to_datetime( + b[-2] + ) <= pd.to_datetime(b[-1]) + + error = None + for msg, test in tests.items(): + if not test(box): + error = msg + break + + if error and errors == "raise": + raise ValueError("%s: %s" % (box, error)) + elif error: + return False + else: + return True + + +def is_list_of_strings(lst): + return isinstance(lst, list) and all(isinstance(elem, str) for elem in lst) + + +def is_list_of_dicts(lst): + return all(isinstance(x, dict) for x in lst) + + +def is_list_of_datasets(lst): + return all(isinstance(x, xr.Dataset) for x in lst) + + +def is_list_equal(lst1, lst2): + """Return true if 2 lists contain same elements""" + return len(lst1) == len(lst2) and len(lst1) == sum( + [1 for i, j in zip(lst1, lst2) if i == j] + ) + + +def check_wmo(lst, errors="raise"): + """Validate a WMO option and returned it as a list of integers + + Parameters + ---------- + wmo: int + WMO must be an integer or an iterable with elements that can be casted as integers + errors: {'raise', 'warn', 'ignore'} + Possibly raises a ValueError exception or UserWarning, otherwise fails silently. + + Returns + ------- + list(int) + """ + is_wmo(lst, errors=errors) + + # Make sure we deal with a list + lst = to_list(lst) + + # Then cast list elements as integers + return [abs(int(x)) for x in lst] + + +def is_wmo(lst, errors="raise"): # noqa: C901 + """Check if a WMO is valid + + Parameters + ---------- + wmo: int, list(int), array(int) + WMO must be a single or a list of 5/7 digit positive numbers + errors: {'raise', 'warn', 'ignore'} + Possibly raises a ValueError exception or UserWarning, otherwise fails silently. + + Returns + ------- + bool + True if wmo is indeed a list of integers + """ + + # Make sure we deal with a list + lst = to_list(lst) + + # Error message: + # msg = "WMO must be an integer or an iterable with elements that can be casted as integers" + msg = "WMO must be a single or a list of 5/7 digit positive numbers. Invalid: '{}'".format + + # Then try to cast list elements as integers, return True if ok + result = True + try: + for x in lst: + if not str(x).isdigit(): + result = False + + if (len(str(x)) != 5) and (len(str(x)) != 7): + result = False + + if int(x) <= 0: + result = False + + except Exception: + result = False + if errors == "raise": + raise ValueError(msg(x)) + elif errors == "warn": + warnings.warn(msg(x)) + + if not result: + if errors == "raise": + raise ValueError(msg(x)) + elif errors == "warn": + warnings.warn(msg(x)) + else: + return result + + +def check_cyc(lst, errors="raise"): + """Validate a CYC option and returned it as a list of integers + + Parameters + ---------- + cyc: int + CYC must be an integer or an iterable with elements that can be casted as positive integers + errors: {'raise', 'warn', 'ignore'} + Possibly raises a ValueError exception or UserWarning, otherwise fails silently. + + Returns + ------- + list(int) + """ + is_cyc(lst, errors=errors) + + # Make sure we deal with a list + lst = to_list(lst) + + # Then cast list elements as integers + return [abs(int(x)) for x in lst] + + +def is_cyc(lst, errors="raise"): # noqa: C901 + """Check if a CYC is valid + Parameters + ---------- + cyc: int, list(int), array(int) + CYC must be a single or a list of at most 4 digit positive numbers + errors: {'raise', 'warn', 'ignore'} + Possibly raises a ValueError exception or UserWarning, otherwise fails silently. + Returns + ------- + bool + True if cyc is indeed a list of integers + """ + # Make sure we deal with a list + lst = to_list(lst) + + # Error message: + msg = "CYC must be a single or a list of at most 4 digit positive numbers. Invalid: '{}'".format + + # Then try to cast list elements as integers, return True if ok + result = True + try: + for x in lst: + if not str(x).isdigit(): + result = False + + if len(str(x)) > 4: + result = False + + if int(x) < 0: + result = False + + except Exception: + result = False + if errors == "raise": + raise ValueError(msg(x)) + elif errors == "warn": + warnings.warn(msg(x)) + + if not result: + if errors == "raise": + raise ValueError(msg(x)) + elif errors == "warn": + warnings.warn(msg(x)) + else: + return result + + +def check_index_cols(column_names: list, convention: str = "ar_index_global_prof"): + """ + ar_index_global_prof.txt: Index of profile files + Profile directory file of the Argo Global Data Assembly Center + file,date,latitude,longitude,ocean,profiler_type,institution,date_update + + argo_bio-profile_index.txt: bgc Argo profiles index file + The directory file describes all individual bio-profile files of the argo GDAC ftp site. + file,date,latitude,longitude,ocean,profiler_type,institution,parameters,parameter_data_mode,date_update + """ + # Default for 'ar_index_global_prof' + ref = [ + "file", + "date", + "latitude", + "longitude", + "ocean", + "profiler_type", + "institution", + "date_update", + ] + if ( + convention == "argo_bio-profile_index" + or convention == "argo_synthetic-profile_index" + ): + ref = [ + "file", + "date", + "latitude", + "longitude", + "ocean", + "profiler_type", + "institution", + "parameters", + "parameter_data_mode", + "date_update", + ] + + if not is_list_equal(column_names, ref): + # log.debug("Expected: %s, got: %s" % (";".join(ref), ";".join(column_names))) + raise InvalidDatasetStructure("Unexpected column names in this index !") + else: + return column_names + + +def check_gdac_path(path, errors="ignore"): # noqa: C901 + """Check if a path has the expected GDAC ftp structure + + Expected GDAC ftp structure:: + + . + └── dac + ├── aoml + ├── ... + ├── coriolis + ├── ... + ├── meds + └── nmdis + + This check will return True if at least one DAC sub-folder is found under path/dac/ + + Examples:: + >>> check_gdac_path("https://data-argo.ifremer.fr") # True + >>> check_gdac_path("ftp://ftp.ifremer.fr/ifremer/argo") # True + >>> check_gdac_path("ftp://usgodae.org/pub/outgoing/argo") # True + >>> check_gdac_path("/home/ref-argo/gdac") # True + >>> check_gdac_path("https://www.ifremer.fr") # False + >>> check_gdac_path("ftp://usgodae.org/pub/outgoing") # False + + Parameters + ---------- + path: str + Path name to check, including access protocol + errors: str + "ignore" or "raise" (or "warn") + + Returns + ------- + checked: boolean + True if at least one DAC folder is found under path/dac/ + False otherwise + """ + # Create a file system for this path + if split_protocol(path)[0] is None: + fs = fsspec.filesystem("file") + elif "https" in split_protocol(path)[0]: + fs = fsspec.filesystem("http") + elif "ftp" in split_protocol(path)[0]: + try: + host = split_protocol(path)[-1].split("/")[0] + fs = fsspec.filesystem("ftp", host=host) + except gaierror: + if errors == "raise": + raise FtpPathError("Can't get address info (GAIerror) on '%s'" % host) + elif errors == "warn": + warnings.warn("Can't get address info (GAIerror) on '%s'" % host) + return False + else: + return False + else: + raise FtpPathError( + "Unknown protocol for an Argo GDAC host: %s" % split_protocol(path)[0] + ) + + # dacs = [ + # "aoml", + # "bodc", + # "coriolis", + # "csio", + # "csiro", + # "incois", + # "jma", + # "kma", + # "kordi", + # "meds", + # "nmdis", + # ] + + # Case 1: + check1 = ( + fs.exists(path) + and fs.exists(fs.sep.join([path, "dac"])) + # and np.any([fs.exists(fs.sep.join([path, "dac", dac])) for dac in dacs]) # Take too much time on http/ftp GDAC server + ) + if check1: + return True + elif errors == "raise": + raise FtpPathError( + "This path is not GDAC compliant (no `dac` folder with legitimate sub-folder):\n%s" + % path + ) + + elif errors == "warn": + warnings.warn("This path is not GDAC compliant:\n%s" % path) + return False + else: + return False + + +def isconnected(host: str = "https://www.ifremer.fr", maxtry: int = 10): + """Check if an URL is alive + + Parameters + ---------- + host: str + URL to use, 'https://www.ifremer.fr' by default + maxtry: int, default: 10 + Maximum number of host connections to try before + + Returns + ------- + bool + """ + # log.debug("isconnected: %s" % host) + if split_protocol(host)[0] in ["http", "https", "ftp", "sftp"]: + it = 0 + while it < maxtry: + try: + # log.debug("Checking if %s is connected ..." % host) + urllib.request.urlopen( + host, timeout=1 + ) # nosec B310 because host protocol already checked + result, it = True, maxtry + except Exception: + result, it = False, it + 1 + return result + else: + return os.path.exists(host) + + +def urlhaskeyword(url: str = "", keyword: str = "", maxtry: int = 10): + """Check if a keyword is in the content of a URL + + Parameters + ---------- + url: str + keyword: str + maxtry: int, default: 10 + Maximum number of host connections to try before returning False + + Returns + ------- + bool + """ + it = 0 + while it < maxtry: + try: + with fsspec.open(url) as f: + data = f.read() + result = keyword in str(data) + it = maxtry + except Exception: + result, it = False, it + 1 + return result + + +def isalive(api_server_check: Union[str, dict] = "") -> bool: + """Check if an API is alive or not + + 2 methods are available: + + - URL Ping + - keyword Check + + Parameters + ---------- + api_server_check + Url string or dictionary with [``url``, ``keyword``] keys. + + - For a string, uses: :class:`argopy.utilities.isconnected` + - For a dictionary, uses: :class:`argopy.utilities.urlhaskeyword` + + Returns + ------- + bool + """ + # log.debug("isalive: %s" % api_server_check) + if isinstance(api_server_check, dict): + return urlhaskeyword( + url=api_server_check["url"], keyword=api_server_check["keyword"] + ) + else: + return isconnected(api_server_check) + + +def isAPIconnected(src="erddap", data=True): + """Check if a source API is alive or not + + The API is connected when it has a live URL or valid folder path. + + Parameters + ---------- + src: str + The data or index source name, 'erddap' default + data: bool + If True check the data fetcher (default), if False, check the index fetcher + + Returns + ------- + bool + """ + if data: + list_src = list_available_data_src() + else: + list_src = list_available_index_src() + + if src in list_src and getattr(list_src[src], "api_server_check", None): + return isalive(list_src[src].api_server_check) + else: + raise InvalidFetcher + + +def erddap_ds_exists( + ds: Union[list, str] = "ArgoFloats", erddap: str = None, maxtry: int = 2 +) -> bool: + """Check if a dataset exists on a remote erddap server + + Parameter + --------- + ds: str, default='ArgoFloats' + Name of the erddap dataset to check + erddap: str, default=OPTIONS['erddap'] + Url of the erddap server + maxtry: int, default: 2 + Maximum number of host connections to try + + Return + ------ + bool + """ + if erddap is None: + erddap = OPTIONS["erddap"] + # log.debug("from erddap_ds_exists: %s" % erddap) + if isconnected(erddap, maxtry=maxtry): + from ..stores import httpstore # must import here to avoid circular import + + with httpstore(timeout=OPTIONS["api_timeout"]).open( + "".join([erddap, "/info/index.json"]) + ) as of: + erddap_index = json.load(of) + if is_list_of_strings(ds): + return [ + this_ds in [row[-1] for row in erddap_index["table"]["rows"]] + for this_ds in ds + ] + else: + return ds in [row[-1] for row in erddap_index["table"]["rows"]] + else: + log.debug("Cannot reach erddap server: %s" % erddap) + warnings.warn( + "Return False because we cannot reach the erddap server %s" % erddap + ) + return False diff --git a/argopy/utils/chunking.py b/argopy/utils/chunking.py new file mode 100644 index 00000000..9e900d54 --- /dev/null +++ b/argopy/utils/chunking.py @@ -0,0 +1,283 @@ +import numpy as np +import pandas as pd +from functools import reduce +from ..errors import InvalidFetcherAccessPoint +from .checkers import is_box + +import collections + +try: + collectionsAbc = collections.abc +except AttributeError: + collectionsAbc = collections + + +class Chunker: + """To chunk fetcher requests""" + + # Default maximum chunks size for all possible request parameters + default_chunksize = { + "box": { + "lon": 20, # degree + "lat": 20, # degree + "dpt": 500, # meters/db + "time": 3 * 30, + }, # Days + "wmo": {"wmo": 5, "cyc": 100}, # Nb of floats + } # Nb of cycles + + def __init__(self, request: dict, chunks: str = "auto", chunksize: dict = {}): + """Create a request Chunker + + Allow to easily split an access point request into chunks + + Parameters + ---------- + request: dict + Access point request to be chunked. One of the following: + + - {'box': [lon_min, lon_max, lat_min, lat_max, dpt_min, dpt_max, time_min, time_max]} + - {'box': [lon_min, lon_max, lat_min, lat_max, dpt_min, dpt_max]} + - {'wmo': [wmo1, wmo2, ...], 'cyc': [0,1, ...]} + chunks: 'auto' or dict + Dictionary with request access point as keys and number of chunks to create as values. + + Eg: {'wmo':10} will create a maximum of 10 chunks along WMOs. + chunksize: dict, optional + Dictionary with request access point as keys and chunk size as values (used as maximum values in + 'auto' chunking). + + Eg: {'wmo': 5} will create chunks with as many as 5 WMOs each. + + """ + self.request = request + + if "box" in self.request: + is_box(self.request["box"]) + if len(self.request["box"]) == 8: + self.this_chunker = self._chunker_box4d + elif len(self.request["box"]) == 6: + self.this_chunker = self._chunker_box3d + elif "wmo" in self.request: + self.this_chunker = self._chunker_wmo + else: + raise InvalidFetcherAccessPoint( + "'%s' not valid access point" % ",".join(self.request.keys()) + ) + + default = self.default_chunksize[[k for k in self.request.keys()][0]] + if len(chunksize) == 0: # chunksize = {} + chunksize = default + if not isinstance(chunksize, collectionsAbc.Mapping): + raise ValueError("chunksize must be mappable") + else: # merge with default: + chunksize = {**default, **chunksize} + self.chunksize = collections.OrderedDict(sorted(chunksize.items())) + + default = {k: "auto" for k in self.chunksize.keys()} + if chunks == "auto": # auto for all + chunks = default + elif len(chunks) == 0: # chunks = {}, i.e. chunk=1 for all + chunks = {k: 1 for k in self.request} + if not isinstance(chunks, collectionsAbc.Mapping): + raise ValueError("chunks must be 'auto' or mappable") + chunks = {**default, **chunks} + self.chunks = collections.OrderedDict(sorted(chunks.items())) + + def _split(self, lst, n=1): + """Yield successive n-sized chunks from lst""" + for i in range(0, len(lst), n): + yield lst[i : i + n] + + def _split_list_bychunknb(self, lst, n=1): + """Split list in n-imposed chunks of similar size + The last chunk may contain less element than the others, depending on the size of the list. + """ + res = [] + s = int(np.floor_divide(len(lst), n)) + for i in self._split(lst, s): + res.append(i) + if len(res) > n: + res[n - 1 : :] = [reduce(lambda i, j: i + j, res[n - 1 : :])] + return res + + def _split_list_bychunksize(self, lst, max_size=1): + """Split list in chunks of imposed size + The last chunk may contain less element than the others, depending on the size of the list. + """ + res = [] + for i in self._split(lst, max_size): + res.append(i) + return res + + def _split_box(self, large_box, n=1, d="x"): # noqa: C901 + """Split a box domain in one direction in n-imposed equal chunks""" + if d == "x": + i_left, i_right = 0, 1 + if d == "y": + i_left, i_right = 2, 3 + if d == "z": + i_left, i_right = 4, 5 + if d == "t": + i_left, i_right = 6, 7 + if n == 1: + return [large_box] + boxes = [] + if d in ["x", "y", "z"]: + n += 1 # Required because we split in linspace + bins = np.linspace(large_box[i_left], large_box[i_right], n) + for ii, left in enumerate(bins): + if ii < len(bins) - 1: + right = bins[ii + 1] + this_box = large_box.copy() + this_box[i_left] = left + this_box[i_right] = right + boxes.append(this_box) + elif "t" in d: + dates = pd.to_datetime(large_box[i_left : i_right + 1]) + date_bounds = [ + d.strftime("%Y%m%d%H%M%S") + for d in pd.date_range(dates[0], dates[1], periods=n + 1) + ] + for i1, i2 in zip(np.arange(0, n), np.arange(1, n + 1)): + left, right = date_bounds[i1], date_bounds[i2] + this_box = large_box.copy() + this_box[i_left] = left + this_box[i_right] = right + boxes.append(this_box) + return boxes + + def _split_this_4Dbox(self, box, nx=1, ny=1, nz=1, nt=1): + box_list = [] + split_x = self._split_box(box, n=nx, d="x") + for bx in split_x: + split_y = self._split_box(bx, n=ny, d="y") + for bxy in split_y: + split_z = self._split_box(bxy, n=nz, d="z") + for bxyz in split_z: + split_t = self._split_box(bxyz, n=nt, d="t") + for bxyzt in split_t: + box_list.append(bxyzt) + return box_list + + def _split_this_3Dbox(self, box, nx=1, ny=1, nz=1): + box_list = [] + split_x = self._split_box(box, n=nx, d="x") + for bx in split_x: + split_y = self._split_box(bx, n=ny, d="y") + for bxy in split_y: + split_z = self._split_box(bxy, n=nz, d="z") + for bxyz in split_z: + box_list.append(bxyz) + return box_list + + def _chunker_box4d(self, request, chunks, chunks_maxsize): # noqa: C901 + BOX = request["box"] + n_chunks = chunks + for axis, n in n_chunks.items(): + if n == "auto": + if axis == "lon": + Lx = BOX[1] - BOX[0] + if Lx > chunks_maxsize["lon"]: # Max box size in longitude + n_chunks["lon"] = int( + np.ceil(np.divide(Lx, chunks_maxsize["lon"])) + ) + else: + n_chunks["lon"] = 1 + if axis == "lat": + Ly = BOX[3] - BOX[2] + if Ly > chunks_maxsize["lat"]: # Max box size in latitude + n_chunks["lat"] = int( + np.ceil(np.divide(Ly, chunks_maxsize["lat"])) + ) + else: + n_chunks["lat"] = 1 + if axis == "dpt": + Lz = BOX[5] - BOX[4] + if Lz > chunks_maxsize["dpt"]: # Max box size in depth + n_chunks["dpt"] = int( + np.ceil(np.divide(Lz, chunks_maxsize["dpt"])) + ) + else: + n_chunks["dpt"] = 1 + if axis == "time": + Lt = np.timedelta64( + pd.to_datetime(BOX[7]) - pd.to_datetime(BOX[6]), "D" + ) + MaxLen = np.timedelta64(chunks_maxsize["time"], "D") + if Lt > MaxLen: # Max box size in time + n_chunks["time"] = int(np.ceil(np.divide(Lt, MaxLen))) + else: + n_chunks["time"] = 1 + + boxes = self._split_this_4Dbox( + BOX, + nx=n_chunks["lon"], + ny=n_chunks["lat"], + nz=n_chunks["dpt"], + nt=n_chunks["time"], + ) + return {"chunks": sorted(n_chunks), "values": boxes} + + def _chunker_box3d(self, request, chunks, chunks_maxsize): + BOX = request["box"] + n_chunks = chunks + for axis, n in n_chunks.items(): + if n == "auto": + if axis == "lon": + Lx = BOX[1] - BOX[0] + if Lx > chunks_maxsize["lon"]: # Max box size in longitude + n_chunks["lon"] = int( + np.floor_divide(Lx, chunks_maxsize["lon"]) + ) + else: + n_chunks["lon"] = 1 + if axis == "lat": + Ly = BOX[3] - BOX[2] + if Ly > chunks_maxsize["lat"]: # Max box size in latitude + n_chunks["lat"] = int( + np.floor_divide(Ly, chunks_maxsize["lat"]) + ) + else: + n_chunks["lat"] = 1 + if axis == "dpt": + Lz = BOX[5] - BOX[4] + if Lz > chunks_maxsize["dpt"]: # Max box size in depth + n_chunks["dpt"] = int( + np.floor_divide(Lz, chunks_maxsize["dpt"]) + ) + else: + n_chunks["dpt"] = 1 + # if axis == 'time': + # Lt = np.timedelta64(pd.to_datetime(BOX[5]) - pd.to_datetime(BOX[4]), 'D') + # MaxLen = np.timedelta64(chunks_maxsize['time'], 'D') + # if Lt > MaxLen: # Max box size in time + # n_chunks['time'] = int(np.floor_divide(Lt, MaxLen)) + # else: + # n_chunks['time'] = 1 + boxes = self._split_this_3Dbox( + BOX, nx=n_chunks["lon"], ny=n_chunks["lat"], nz=n_chunks["dpt"] + ) + return {"chunks": sorted(n_chunks), "values": boxes} + + def _chunker_wmo(self, request, chunks, chunks_maxsize): + WMO = request["wmo"] + n_chunks = chunks + if n_chunks["wmo"] == "auto": + wmo_grps = self._split_list_bychunksize(WMO, max_size=chunks_maxsize["wmo"]) + else: + n = np.min([n_chunks["wmo"], len(WMO)]) + wmo_grps = self._split_list_bychunknb(WMO, n=n) + n_chunks["wmo"] = len(wmo_grps) + return {"chunks": sorted(n_chunks), "values": wmo_grps} + + def fit_transform(self): + """Chunk a fetcher request + + Returns + ------- + list + """ + self._results = self.this_chunker(self.request, self.chunks, self.chunksize) + # self.chunks = self._results['chunks'] + return self._results["values"] diff --git a/argopy/utils/compute.py b/argopy/utils/compute.py index 5b4a0385..62e59ef0 100644 --- a/argopy/utils/compute.py +++ b/argopy/utils/compute.py @@ -1,582 +1,192 @@ """ -This sub-module provides utilities for miscellaneous computation tasks - -We construct the MyThreadPoolExecutor class, -we create a series of classes using multiple inheritance to implement monitoring features - +Mathematically or statistically compute something output of xarray objects """ -from functools import lru_cache -import os -import sys -from concurrent.futures import ThreadPoolExecutor -from concurrent.futures import as_completed -from threading import Lock -import logging -from typing import Union -from abc import ABC, abstractmethod -import importlib - -try: - from importlib.resources import files # New in version 3.9 -except ImportError: - from pathlib import Path - - files = lambda x: Path( # noqa: E731 - importlib.util.find_spec(x).submodule_search_locations[0] - ) - -has_ipython = (spec := importlib.util.find_spec("IPython")) is not None -if has_ipython: - from IPython.display import display, clear_output, HTML +import numpy as np +from scipy import interpolate +import xarray as xr +from packaging import version +import logging +from ..errors import InvalidOption log = logging.getLogger("argopy.utils.compute") - -STATIC_FILES = ( - ("argopy.static.css", "w3.css"), - ("argopy.static.css", "compute.css"), -) - - -@lru_cache(None) -def _load_static_files(): - """Lazily load the resource files into memory the first time they are needed""" - return [ - files(package).joinpath(resource).read_text(encoding="utf-8") - for package, resource in STATIC_FILES - ] - - -class proto_MonitoredThreadPoolExecutor(ABC): - """ - Add: - - self.*_fct and self.*_fct_kwargs for all the processing steps - - self.status list of characters to describe each task status - - self.status_final character to describe the final computation status - """ - - def __init__( - self, - max_workers: int = 10, - task_fct=None, - task_fct_kwargs={}, - postprocessing_fct=None, - postprocessing_fct_kwargs={}, - callback_fct=None, - callback_fct_kwargs={}, - finalize_fct=None, - finalize_fct_kwargs={}, - **kwargs, - ): - super().__init__(**kwargs) - - self.max_workers = max_workers - - self.task_fct = task_fct - self.task_fct_kwargs = task_fct_kwargs - - self.postprocessing_fct = postprocessing_fct - self.postprocessing_fct_kwargs = postprocessing_fct_kwargs - - self.callback_fct = callback_fct - self.callback_fct_kwargs = callback_fct_kwargs - - if finalize_fct is None: - finalize_fct = self._default_finalize_fct - self.finalize_fct = finalize_fct - self.finalize_fct_kwargs = finalize_fct_kwargs - - def _default_finalize_fct(self, obj_list, **kwargs): - return [v for v in dict(sorted(obj_list.items())).values()], True - - def init_status(self, bucket): - self.status = ["?" for _ in range(len(bucket))] - if self.finalize_fct: - self.status_final = "?" - self.progress = [ - 0, - len(bucket) * 4 + 2, - ] # Each task goes by 4 status ('w', 'p', 'c', 'f'/'s') and final by 2 states ('w', 'f'/'s') +# +# From xarrayutils : https://github.com/jbusecke/xarrayutils/blob/master/xarrayutils/vertical_coordinates.py +#  Direct integration of those 2 functions to minimize dependencies and possibility of tuning them to our needs +# + + +def linear_interpolation_remap( + z, data, z_regridded, z_dim=None, z_regridded_dim="regridded", output_dim="remapped" +): + # interpolation called in xarray ufunc + def _regular_interp(x, y, target_values): + # remove all nans from input x and y + idx = np.logical_or(np.isnan(x), np.isnan(y)) + x = x[~idx] + y = y[~idx] + + # Need at least 5 points in the profile to interpolate, otherwise, return NaNs + if len(y) < 5: + interpolated = np.empty(len(target_values)) + interpolated[:] = np.nan else: - self.status_final = "n" - self.progress = [ - 0, - len(bucket) * 4, - ] # Each task goes by 4 status ('w', 'p', 'c', 'f'/'s') - - def task(self, obj_id, obj): - self.update_display_status(obj_id, "w") # Working - data, state = self.task_fct(obj, **self.task_fct_kwargs) - - self.update_display_status(obj_id, "p") # Post-processing - if self.postprocessing_fct is not None: - data, state = self.postprocessing_fct( - data, **self.postprocessing_fct_kwargs + # replace nans in target_values without of bound Values (just in case) + target_values = np.where( + ~np.isnan(target_values), target_values, np.nanmax(x) + 1 ) - - return obj_id, data, state - - def callback(self, future): - obj_id, data, state = future.result() - # self.update_display_status(obj_id, "s" if state else "f") - self.update_display_status(obj_id, "c") # Callback - if self.callback_fct is not None: - data, state = self.callback_fct(data, **self.callback_fct_kwargs) - return obj_id, data, state - - def finalize(self, results): - self.update_display_status_final("w") # Working - data, state = self.finalize_fct(results, **self.finalize_fct_kwargs) - self.update_display_status_final("s" if state else "f") - return data - - def execute(self, bucket: list = None, list_failed: bool = False): - self.bucket = bucket - self.init_status(bucket) - self.display_status() - - # Execute tasks and post-processing: - self.lock = Lock() - results = {} - with ThreadPoolExecutor(max_workers=self.max_workers) as executor: - futures = [ - executor.submit(self.task, ii, obj) for ii, obj in enumerate(bucket) - ] - [f.add_done_callback(self.callback) for f in futures] - for future in as_completed(futures): - try: - obj_id, data, state = future.result() - self.update_display_status(obj_id, "s" if state else "f") - except Exception: - raise - finally: - results.update({obj_id: data}) - - # Final tasks status: - failed = [ - obj for obj_id, obj in enumerate(self.bucket) if self.status[obj_id] == "f" - ] - - # Finalize: - final = self.finalize(results) - - # Return - if list_failed: - return final, failed - else: - return final - - @abstractmethod - def display_status(self): - raise NotImplementedError("Not implemented") - - @abstractmethod - def update_display_status(self, task_id: int, st: str): - raise NotImplementedError("Not implemented") - - @abstractmethod - def update_display_status_final(self, st: str): - raise NotImplementedError("Not implemented") - - -class proto_MonitoredPoolExecutor_monitor(proto_MonitoredThreadPoolExecutor): - def __init__( - self, - show: Union[bool, str] = True, - task_legend: dict = {"w": "Working", "p": "Post-processing", "c": "Callback"}, - final_legend: dict = {"task": "Processing tasks", "final": "Finalizing"}, - **kwargs, - ): - super().__init__(**kwargs) - self.task_legend = task_legend - self.final_legend = final_legend - self.show = bool(show) - # log.debug(self.runner) - - @property - def runner(self) -> str: + # Interpolate with fill value parameter to extend min pressure toward 0 + interpolated = interpolate.interp1d( + x, y, bounds_error=False, fill_value=(y[0], y[-1]) + )(target_values) + return interpolated + + # infer dim from input + if z_dim is None: + if len(z.dims) != 1: + raise RuntimeError("if z_dim is not specified, x must be a 1D array.") + dim = z.dims[0] + else: + dim = z_dim + + # if dataset is passed drop all data_vars that dont contain dim + if isinstance(data, xr.Dataset): + raise ValueError("Dataset input is not supported yet") + # TODO: for a dataset input just apply the function for each appropriate array + + if version.parse(xr.__version__) > version.parse("0.15.0"): + kwargs = dict( + input_core_dims=[[dim], [dim], [z_regridded_dim]], + output_core_dims=[[output_dim]], + vectorize=True, + dask="parallelized", + output_dtypes=[data.dtype], + dask_gufunc_kwargs={ + "output_sizes": {output_dim: len(z_regridded[z_regridded_dim])} + }, + ) + else: + kwargs = dict( + input_core_dims=[[dim], [dim], [z_regridded_dim]], + output_core_dims=[[output_dim]], + vectorize=True, + dask="parallelized", + output_dtypes=[data.dtype], + output_sizes={output_dim: len(z_regridded[z_regridded_dim])}, + ) + remapped = xr.apply_ufunc(_regular_interp, z, data, z_regridded, **kwargs) + + remapped.coords[output_dim] = z_regridded.rename( + {z_regridded_dim: output_dim} + ).coords[output_dim] + return remapped + + +def groupby_remap( + z, + data, + z_regridded, # noqa C901 + z_dim=None, + z_regridded_dim="regridded", + output_dim="remapped", + select="deep", + right=False, +): + """todo: Need a docstring here !""" + + # sub-sampling called in xarray ufunc + def _subsample_bins(x, y, target_values): + # remove all nans from input x and y try: - shell = get_ipython().__class__.__name__ - if shell == "ZMQInteractiveShell": - return "notebook" # Jupyter notebook or qtconsole - elif shell == "TerminalInteractiveShell": - return "terminal" # Terminal running IPython - else: - return False # Other type (?) - except NameError: - return "standard" # Probably standard Python interpreter - - @property - def COLORS(self): - """task status key to css class and color and label dictionary""" - return { - "?": ("gray", "Queued", "⏸"), - "w": ("yellow", self.task_legend["w"], "⏺"), - "p": ("blue", self.task_legend["p"], "🔄"), - "c": ("cyan", self.task_legend["c"], "⏯"), - "f": ("red", "Failed", "🔴"), - "s": ("green", "Succeed", "🟢"), - } - - @property - def STATE(self): - """final state key to css class dictionary""" - return { - "?": "queue", - "w": "blinking", - "s": "success", - "f": "failure", - "n": "none", - } - - @property - def STATE_COLORS(self): - """final state key to colors dictionary""" - return { - "?": "gray", - "w": "amber", - "s": "green", - "f": "red", - "n": "blue", - } - - def display_status(self): - pass - - def update_display_status(self, *args, **kwargs): - pass - - def update_display_status_final(self, *args, **kwargs): - pass - - -class proto_MonitoredPoolExecutor_notebook(proto_MonitoredPoolExecutor_monitor): - """ - Add HTML jupyter notebook display - """ - - @property - def css_style(self): - return "\n".join(_load_static_files()) - - @property - def status_html(self): - # Create a legend: - legend = ["\t
"] - # legend.append("\t\t
Tasks:
") - for key in self.COLORS.keys(): - color, desc, icon = self.COLORS[key] - legend.append( - "\t\t
%s
" - % (color, desc) + idx = np.logical_or(np.isnan(x), np.isnan(y)) + except TypeError: + log.debug( + "Error with this '%s' y data content: %s" % (type(y), str(np.unique(y))) ) - legend.append("\t
") - # legend.append("\t\t
Finalized state:
Processing
Failure
Success
") - legend = "\n".join(legend) - - # Create a status bar for tasks: - content = ["\t
" % self.STATE[self.status_final]] - for s in self.status: - content.append("\t\t
" % self.COLORS[s][0]) - content.append("\t
") - content = "\n".join(content) + raise + x = x[~idx] + y = y[~idx] + + ifound = np.digitize( + x, target_values, right=right + ) # ``bins[i-1] <= x < bins[i]`` + ifound -= 1 # Because digitize returns a 1-based indexing, we need to remove 1 + y_binned = np.ones_like(target_values) * np.nan + + for ib, this_ibin in enumerate(np.unique(ifound)): + ix = np.where(ifound == this_ibin) + iselect = ix[-1] + + # Map to y value at specific x index in the bin: + if select == "shallow": + iselect = iselect[0] # min/shallow + mapped_value = y[iselect] + elif select == "deep": + iselect = iselect[-1] # max/deep + mapped_value = y[iselect] + elif select == "middle": + iselect = iselect[ + np.where(x[iselect] >= np.median(x[iselect]))[0][0] + ] # median/middle + mapped_value = y[iselect] + elif select == "random": + iselect = iselect[np.random.randint(len(iselect))] + mapped_value = y[iselect] + + # or Map to y statistics in the bin: + elif select == "mean": + mapped_value = np.nanmean(y[iselect]) + elif select == "min": + mapped_value = np.nanmin(y[iselect]) + elif select == "max": + mapped_value = np.nanmax(y[iselect]) + elif select == "median": + mapped_value = np.median(y[iselect]) - # Progress bar: - val = int(100 * self.progress[0] / self.progress[1]) - color = self.STATE_COLORS[self.status_final] - txt = self.final_legend["task"] - if self.status_final != "?": - txt = "%s" % (self.final_legend["final"]) - if self.status_final == "f": - txt = "Failed %s" % (self.final_legend["final"]) - if self.status_final == "s": - txt = "Succeed in %s" % (self.final_legend["final"]) - txt = "%s (%i%% processed)" % (txt, val) - progress = ["\t
"] - progress.append( - "\t\t
%s
" - % (color, val, txt) + else: + raise InvalidOption("`select` option has invalid value (%s)" % select) + + y_binned[this_ibin] = mapped_value + + return y_binned + + # infer dim from input + if z_dim is None: + if len(z.dims) != 1: + raise RuntimeError("if z_dim is not specified, x must be a 1D array.") + dim = z.dims[0] + else: + dim = z_dim + + # if dataset is passed drop all data_vars that don't contain dim + if isinstance(data, xr.Dataset): + raise ValueError("Dataset input is not supported yet") + # TODO: for a dataset input just apply the function for each appropriate array + + if version.parse(xr.__version__) > version.parse("0.15.0"): + kwargs = dict( + input_core_dims=[[dim], [dim], [z_regridded_dim]], + output_core_dims=[[output_dim]], + vectorize=True, + dask="parallelized", + output_dtypes=[data.dtype], + dask_gufunc_kwargs={ + "output_sizes": {output_dim: len(z_regridded[z_regridded_dim])} + }, ) - progress.append("\t
") - progress = "\n".join(progress) - - # Complete HTML: - html = ( - "
\n" - f"\n" - f"{legend}\n" - f"{content}\n" - "
\n" - f"{progress}\n" + else: + kwargs = dict( + input_core_dims=[[dim], [dim], [z_regridded_dim]], + output_core_dims=[[output_dim]], + vectorize=True, + dask="parallelized", + output_dtypes=[data.dtype], + output_sizes={output_dim: len(z_regridded[z_regridded_dim])}, ) - return HTML(html) - - def display_status(self): - super().display_status() - if self.show and self.runner == "notebook": - clear_output(wait=True) - display(self.status_html) - - def update_display_status(self, obj_id, status): - super().update_display_status() - with self.lock: - self.status[obj_id] = "%s" % status - self.progress[0] += 1 - self.display_status() - - def update_display_status_final(self, state): - super().update_display_status_final() - self.status_final = state - self.progress[0] += 1 - self.display_status() - - -class proto_MonitoredPoolExecutor_terminal(proto_MonitoredPoolExecutor_monitor): - """ - Add terminal display - """ - - def __init__( - self, - icon: bool = False, - **kwargs, - ): - super().__init__(**kwargs) - self._text_only = ~bool(icon) - self._reprinter = None - - class _Reprinter: - def __init__(self, text: str = ""): - self.text = text - self.counter = 0 - - def moveup(self, lines): - for _ in range(lines): - sys.stdout.write("\x1b[A") - - def reprint(self, text): - if self.counter >= 1: - self.moveup(self.text.count("\n")) - print(text, end="\r") - self.text = text - self.counter += 1 - - def _adjust_for_terminal_width(self, text, max_width=None): - """Split text if larger than terminal""" - term_width, _ = os.get_terminal_size() - term_width = term_width if max_width is None else int(term_width / max_width) - lines = [] - if len(text) > term_width: - i_start, i_end = 0, term_width - 1 - while i_end <= len(text): - lines.append(text[i_start:i_end]) - i_start = i_end - i_end = i_start + term_width - 1 - lines.append(text[i_start:]) - return "\n".join(lines) - else: - return text - - @property - def status_txt(self): - def f(text, color=None, bold=0, italic=0, underline=0, crossed=0, negative=0): - """Format text with color, - - Uses no color by default but accepts any color from the C class. - - Parameters - ---------- - text: str - color: str - bold: bool - """ - - PREF = "\033[" - RESET = f"{PREF}0m" - - class C: - gray = "37" - yellow = "33" - amber = "33" - blue = "34" - cyan = "36" - red = "31" - green = "32" - magenta = "35" - black = "30" - white = "97" - - dec = [] - if bold: - dec.append("1") - if italic: - dec.append("3") - if underline: - dec.append("4") - if crossed: - dec.append("9") - if negative: - dec.append("7") - - if color is None: - if len(dec) > 0: - dec = ";".join(dec) - return f"{PREF}{dec}m" + text + RESET - else: - return f"{PREF}" + text + RESET - else: - if len(dec) > 0: - dec = ";".join(dec) - return f"{PREF}{dec};{getattr(C, color)}m" + text + RESET - else: - return f"{PREF}{getattr(C, color)}m" + text + RESET - - # Text only (no icons): - if self._text_only: - # Create a legend: - legend = [] - for key in self.COLORS.keys(): - color, desc, icon = self.COLORS[key] - legend.append(f("%s: %s" % (key, desc), color)) - legend = " | ".join(legend) - - # Create a status bar for tasks: - # with colored brackets color for final status: - raw_content = "[%s]" % "".join(self.status) - lines = [] - for status_line in self._adjust_for_terminal_width(raw_content).split("\n"): - line_content = [] - for s in status_line: - if s not in ["[", "]"]: - line_content.append( - f(s, self.COLORS[s][0], negative=s in ["f"]) - ) - else: - line_content.append(f(s, self.STATE_COLORS[self.status_final])) - line_content = "".join(line_content) - lines.append(line_content) - content = "\n".join(lines) - - # Icons only - else: - # Create a legend: - legend = [] - for key in self.COLORS.keys(): - color, desc, icon = self.COLORS[key] - legend.append(f"{icon}: %s" % f(desc, color=color)) - legend = " | ".join(legend) - - # Create a status bar for tasks: - # with colored brackets color for final status: - # raw_content = f"[%s]" % "".join(self.status) - raw_content = "%s" % "".join(self.status) - lines = [] - for status_line in self._adjust_for_terminal_width( - raw_content, max_width=4 - ).split("\n"): - line_content = [] - for s in status_line: - if s not in ["[", "]"]: - line_content.append("%s " % self.COLORS[s][2]) - else: - line_content.append(f(s, self.STATE_COLORS[self.status_final])) - line_content = "".join(line_content) - lines.append(line_content) - content = "\n".join(lines) - - # Progress bar: - val = int(100 * self.progress[0] / self.progress[1]) - color = self.STATE_COLORS[self.status_final] - txt = self.final_legend["task"] - if self.status_final != "?": - txt = "%s" % (self.final_legend["final"]) - if self.status_final == "f": - txt = "Failed %s" % (self.final_legend["final"]) - if self.status_final == "s": - txt = "Succeed in %s" % (self.final_legend["final"]) - txt = "%s (%i%% processed)" % (txt, val) - progress = f("%s ..." % txt, color, negative=0) - - # Complete STDOUT: - txt = f"\n" f"{legend}\n" f"{content}\n" f"{progress: <50}\n" - - return txt - - def display_status(self): - super().display_status() - - if self.show and self.runner in ["terminal", "standard"]: - if self._reprinter is None: - self._reprinter = self._Reprinter(self.status_txt) - # os.system('cls' if os.name == 'nt' else 'clear') - self._reprinter.reprint(f"{self.status_txt}") - # sys.stdout.flush() - - -if has_ipython: - - class c(proto_MonitoredPoolExecutor_notebook, proto_MonitoredPoolExecutor_terminal): - pass - -else: - - class c(proto_MonitoredPoolExecutor_terminal): - pass - - -class MyThreadPoolExecutor(c): - """ - This is a low-level helper class not intended to be used directly. - - Examples - -------- - :: - - from argopy.utils.compute import MyThreadPoolExecutor as MyExecutor - from random import random - from time import sleep - import numpy as np - - def my_task(obj, errors='ignore'): - data = random() - sleep(data * 3) - state = np.random.randint(0,100) >= 25 - if not state: - if errors == 'raise': - raise ValueError('Hello world') - elif errors == 'ignore': - pass - return data, state - - def my_postprocess(obj, opt=12): - sleep(random() * 5) - data = obj**opt - state = np.random.randint(0,100) >= 25 - return data, state - - def my_callback(obj, opt=2): - sleep(random() * 2) - data = obj**opt - state = np.random.randint(0,100) >= 25 - return data, state - - def my_final(obj_list, opt=True): - data = random() - sleep(data * 20) - results = [v for v in dict(sorted(obj_list.items())).values()] - return data, np.all(results) - - if __name__ == '__main__': - run = MyExecutor(max_workers=25, - task_fct=my_task, - postprocessing_fct=my_postprocess, - callback_fct=my_callback, - finalize_fct=my_final, - show=1, - ) - results, failed = run.execute(range(100), list_failed=True) - print(results) - """ + remapped = xr.apply_ufunc(_subsample_bins, z, data, z_regridded, **kwargs) - pass + remapped.coords[output_dim] = z_regridded.rename( + {z_regridded_dim: output_dim} + ).coords[output_dim] + return remapped diff --git a/argopy/utils/decorators.py b/argopy/utils/decorators.py new file mode 100644 index 00000000..5693348e --- /dev/null +++ b/argopy/utils/decorators.py @@ -0,0 +1,149 @@ +from functools import wraps +import warnings + + +class DocInherit(object): + """Docstring inheriting method descriptor + + The class itself is also used as a decorator + + Usage: + + class Foo(object): + def foo(self): + "Frobber" + pass + + class Bar(Foo): + @doc_inherit + def foo(self): + pass + + Now, Bar.foo.__doc__ == Bar().foo.__doc__ == Foo.foo.__doc__ == "Frobber" + + src: https://code.activestate.com/recipes/576862/ + """ + + def __init__(self, mthd): + self.mthd = mthd + self.name = mthd.__name__ + + def __get__(self, obj, cls): + if obj: + return self.get_with_inst(obj, cls) + else: + return self.get_no_inst(cls) + + def get_with_inst(self, obj, cls): + overridden = getattr(super(cls, obj), self.name, None) + + @wraps(self.mthd, assigned=("__name__", "__module__")) + def f(*args, **kwargs): + return self.mthd(obj, *args, **kwargs) + + return self.use_parent_doc(f, overridden) + + def get_no_inst(self, cls): + for parent in cls.__mro__[1:]: + overridden = getattr(parent, self.name, None) + if overridden: + break + + @wraps(self.mthd, assigned=("__name__", "__module__")) + def f(*args, **kwargs): + return self.mthd(*args, **kwargs) + + return self.use_parent_doc(f, overridden) + + def use_parent_doc(self, func, source): + if source is None: + raise NameError("Can't find '%s' in parents" % self.name) + func.__doc__ = source.__doc__ + return func + + +doc_inherit = DocInherit + + +def deprecated(reason): + """Deprecation warning decorator. + + This is a decorator which can be used to mark functions + as deprecated. It will result in a warning being emitted + when the function is used. + + Parameters + ---------- + reason: {str, None} + Text message to send with deprecation warning + + Examples + -------- + The @deprecated can be used with a 'reason'. + + .. code-block:: python + + @deprecated("please, use another function") + def old_function(x, y): + pass + + or without: + + .. code-block:: python + + @deprecated + def old_function(x, y): + pass + + References + ---------- + https://stackoverflow.com/a/40301488 + """ + import inspect + + if isinstance(reason, str): + + def decorator(func1): + if inspect.isclass(func1): + fmt1 = "Call to deprecated class {name} ({reason})." + else: + fmt1 = "Call to deprecated function {name} ({reason})." + + @wraps(func1) + def new_func1(*args, **kwargs): + warnings.simplefilter("always", DeprecationWarning) + warnings.warn( + fmt1.format(name=func1.__name__, reason=reason), + category=DeprecationWarning, + stacklevel=2, + ) + warnings.simplefilter("default", DeprecationWarning) + return func1(*args, **kwargs) + + return new_func1 + + return decorator + + elif inspect.isclass(reason) or inspect.isfunction(reason): + func2 = reason + + if inspect.isclass(func2): + fmt2 = "Call to deprecated class {name}." + else: + fmt2 = "Call to deprecated function {name}." + + @wraps(func2) + def new_func2(*args, **kwargs): + warnings.simplefilter("always", DeprecationWarning) + warnings.warn( + fmt2.format(name=func2.__name__), + category=DeprecationWarning, + stacklevel=2, + ) + warnings.simplefilter("default", DeprecationWarning) + return func2(*args, **kwargs) + + return new_func2 + + else: + raise TypeError(repr(type(reason))) diff --git a/argopy/utils/format.py b/argopy/utils/format.py new file mode 100644 index 00000000..3377e0d9 --- /dev/null +++ b/argopy/utils/format.py @@ -0,0 +1,202 @@ +""" +Manipulate Argo formatted string and print/stdout formatters +""" +import os +from urllib.parse import urlparse +import logging + + +log = logging.getLogger("argopy.utils.format") + + +def format_oneline(s, max_width=65): + """Return a string formatted for a line print""" + if len(s) > max_width: + padding = " ... " + n = (max_width - len(padding)) // 2 + q = (max_width - len(padding)) % 2 + if q == 0: + return "".join([s[0:n], padding, s[-n:]]) + else: + return "".join([s[0 : n + 1], padding, s[-n:]]) + else: + return s + + +def argo_split_path(this_path): # noqa C901 + """Split path from a GDAC ftp style Argo netcdf file and return information + + >>> argo_split_path('coriolis/6901035/profiles/D6901035_001D.nc') + >>> argo_split_path('https://data-argo.ifremer.fr/dac/csiro/5903939/profiles/D5903939_103.nc') + + Parameters + ---------- + str + + Returns + ------- + dict + """ + dacs = [ + "aoml", + "bodc", + "coriolis", + "csio", + "csiro", + "incois", + "jma", + "kma", + "kordi", + "meds", + "nmdis", + ] + output = {} + + start_with = ( + lambda f, x: f[0 : len(x)] == x if len(x) <= len(f) else False + ) # noqa: E731 + + def split_path(p, sep="/"): + """Split a pathname. Returns tuple "(head, tail)" where "tail" is + everything after the final slash. Either part may be empty.""" + # Same as posixpath.py but we get to choose the file separator ! + p = os.fspath(p) + i = p.rfind(sep) + 1 + head, tail = p[:i], p[i:] + if head and head != sep * len(head): + head = head.rstrip(sep) + return head, tail + + def fix_localhost(host): + if "ftp://localhost:" in host: + return "ftp://%s" % (urlparse(host).netloc) + if "http://127.0.0.1:" in host: + return "http://%s" % (urlparse(host).netloc) + else: + return "" + + known_origins = [ + "https://data-argo.ifremer.fr", + "ftp://ftp.ifremer.fr/ifremer/argo", + "ftp://usgodae.org/pub/outgoing/argo", + fix_localhost(this_path), + "", + ] + + output["origin"] = [ + origin for origin in known_origins if start_with(this_path, origin) + ][0] + output["origin"] = "." if output["origin"] == "" else output["origin"] + "/" + sep = "/" if output["origin"] != "." else os.path.sep + + (path, file) = split_path(this_path, sep=sep) + + output["path"] = path.replace(output["origin"], "") + output["name"] = file + + # Deal with the path: + # dac/// + # dac///profiles + path_parts = path.split(sep) + + try: + if path_parts[-1] == "profiles": + output["type"] = "Mono-cycle profile file" + output["wmo"] = path_parts[-2] + output["dac"] = path_parts[-3] + else: + output["type"] = "Multi-cycle profile file" + output["wmo"] = path_parts[-1] + output["dac"] = path_parts[-2] + except Exception: + log.warning(this_path) + log.warning(path) + log.warning(sep) + log.warning(path_parts) + log.warning(output) + raise + + if output["dac"] not in dacs: + log.debug("This is not a Argo GDAC compliant file path: %s" % path) + log.warning(this_path) + log.warning(path) + log.warning(sep) + log.warning(path_parts) + log.warning(output) + raise ValueError( + "This is not a Argo GDAC compliant file path (invalid DAC name: '%s')" + % output["dac"] + ) + + # Deal with the file name: + filename, file_extension = os.path.splitext(output["name"]) + output["extension"] = file_extension + if file_extension != ".nc": + raise ValueError( + "This is not a Argo GDAC compliant file path (invalid file extension: '%s')" + % file_extension + ) + filename_parts = output["name"].split("_") + + if "Mono" in output["type"]: + prefix = filename_parts[0].split(output["wmo"])[0] + if "R" in prefix: + output["data_mode"] = "R, Real-time data" + if "D" in prefix: + output["data_mode"] = "D, Delayed-time data" + + if "S" in prefix: + output["type"] = "S, Synthetic BGC Mono-cycle profile file" + if "M" in prefix: + output["type"] = "M, Merged BGC Mono-cycle profile file" + if "B" in prefix: + output["type"] = "B, BGC Mono-cycle profile file" + + suffix = filename_parts[-1].split(output["wmo"])[-1] + if "D" in suffix: + output["direction"] = "D, descending profiles" + elif suffix == "" and "Mono" in output["type"]: + output["direction"] = "A, ascending profiles (implicit)" + + else: + typ = filename_parts[-1].split(".nc")[0] + if typ == "prof": + output["type"] = "Multi-cycle file" + if typ == "Sprof": + output["type"] = "S, Synthetic BGC Multi-cycle profiles file" + if typ == "tech": + output["type"] = "Technical data file" + if typ == "meta": + output["type"] = "Metadata file" + if "traj" in typ: + # possible typ = [Rtraj, Dtraj, BRtraj, BDtraj] + output["type"], i = "Trajectory file", 0 + if typ[0] == "B": + output["type"], i = "BGC Trajectory file", 1 + if typ.split("traj")[0][i] == "D": + output["data_mode"] = "D, Delayed-time data" + elif typ.split("traj")[0][i] == "R": + output["data_mode"] = "R, Real-time data" + else: + output["data_mode"] = "R, Real-time data (implicit)" + + # Adjust origin and path for local files: + # This ensure that output['path'] is agnostic to users and can be reused on any gdac compliant architecture + parts = path.split(sep) + i, stop = len(parts) - 1, False + while not stop: + if ( + parts[i] == "profiles" + or parts[i] == output["wmo"] + or parts[i] == output["dac"] + or parts[i] == "dac" + ): + i = i - 1 + if i < 0: + stop = True + else: + stop = True + output["origin"] = sep.join(parts[0 : i + 1]) + output["path"] = output["path"].replace(output["origin"], "") + + return dict(sorted(output.items())) diff --git a/argopy/utils/geo.py b/argopy/utils/geo.py new file mode 100644 index 00000000..ed6a27e0 --- /dev/null +++ b/argopy/utils/geo.py @@ -0,0 +1,150 @@ +import numpy as np +import pandas as pd + + +def wrap_longitude(grid_long): + """Allows longitude (0-360) to wrap beyond the 360 mark, for mapping purposes. + + Makes sure that, if the longitude is near the boundary (0 or 360) that we + wrap the values beyond 360, so it appears nicely on a map + This is a refactor between get_region_data and get_region_hist_locations to + avoid duplicate code + + source: + https://github.com/euroargodev/argodmqc_owc/blob/e174f4538fdae1534c9740491398972b1ffec3ca/pyowc/utilities.py#L80 + + Parameters + ---------- + grid_long: array of longitude values + + Returns + ------- + array of longitude values that can extend past 360 + """ + neg_long = np.argwhere(grid_long < 0) + grid_long[neg_long] = grid_long[neg_long] + 360 + + # if we have data close to upper boundary (360), then wrap some of the data round + # so it appears on the map + top_long = np.argwhere(grid_long >= 320) + if top_long.__len__() != 0: + bottom_long = np.argwhere(grid_long <= 40) + grid_long[bottom_long] = 360 + grid_long[bottom_long] + + return grid_long + + +def wmo2box(wmo_id: int): + """Convert WMO square box number into a latitude/longitude box + + See: + https://en.wikipedia.org/wiki/World_Meteorological_Organization_squares + https://commons.wikimedia.org/wiki/File:WMO-squares-global.gif + + Parameters + ---------- + wmo_id: int + WMO square number, must be between 1000 and 7817 + + Returns + ------- + box: list(int) + [lon_min, lon_max, lat_min, lat_max] bounds to the WMO square number + """ + if wmo_id < 1000 or wmo_id > 7817: + raise ValueError("Invalid WMO square number, must be between 1000 and 7817.") + wmo_id = str(wmo_id) + + # "global quadrant" numbers where 1=NE, 3=SE, 5=SW, 7=NW + quadrant = int(wmo_id[0]) + if quadrant not in [1, 3, 5, 7]: + raise ValueError("Invalid WMO square number, 1st digit must be 1, 3, 5 or 7.") + + # 'minimum' Latitude square boundary, nearest to the Equator + nearest_to_the_Equator_latitude = int(wmo_id[1]) + + # 'minimum' Longitude square boundary, nearest to the Prime Meridian + nearest_to_the_Prime_Meridian = int(wmo_id[2:4]) + + # + dd = 10 + if quadrant in [1, 3]: + lon_min = nearest_to_the_Prime_Meridian * dd + lon_max = nearest_to_the_Prime_Meridian * dd + dd + elif quadrant in [5, 7]: + lon_min = -nearest_to_the_Prime_Meridian * dd - dd + lon_max = -nearest_to_the_Prime_Meridian * dd + + if quadrant in [1, 7]: + lat_min = nearest_to_the_Equator_latitude * dd + lat_max = nearest_to_the_Equator_latitude * dd + dd + elif quadrant in [3, 5]: + lat_min = -nearest_to_the_Equator_latitude * dd - dd + lat_max = -nearest_to_the_Equator_latitude * dd + + box = [lon_min, lon_max, lat_min, lat_max] + return box + + +def toYearFraction( + this_date: pd._libs.tslibs.timestamps.Timestamp = pd.to_datetime("now", utc=True) +): + """Compute decimal year, robust to leap years, precision to the second + + Compute the fraction of the year a given timestamp corresponds to. + The "fraction of the year" goes: + - from 0 on 01-01T00:00:00.000 of the year + - to 1 on the 01-01T00:00:00.000 of the following year + + 1 second corresponds to the number of days in the year times 86400. + The faction of the year is rounded to 10-digits in order to have a "second" precision. + + See discussion here: https://github.com/euroargodev/argodmqc_owc/issues/35 + + Parameters + ---------- + pd._libs.tslibs.timestamps.Timestamp + + Returns + ------- + float + """ + if "UTC" in [this_date.tzname() if this_date.tzinfo is not None else ""]: + startOfThisYear = pd.to_datetime( + "%i-01-01T00:00:00.000" % this_date.year, utc=True + ) + else: + startOfThisYear = pd.to_datetime("%i-01-01T00:00:00.000" % this_date.year) + yearDuration_sec = ( + startOfThisYear + pd.offsets.DateOffset(years=1) - startOfThisYear + ).total_seconds() + + yearElapsed_sec = (this_date - startOfThisYear).total_seconds() + fraction = yearElapsed_sec / yearDuration_sec + fraction = np.round(fraction, 10) + return this_date.year + fraction + + +def YearFraction_to_datetime(yf: float): + """Compute datetime from year fraction + + Inverse the toYearFraction() function + + Parameters + ---------- + float + + Returns + ------- + pd._libs.tslibs.timestamps.Timestamp + """ + year = np.int32(yf) + fraction = yf - year + fraction = np.round(fraction, 10) + + startOfThisYear = pd.to_datetime("%i-01-01T00:00:00" % year) + yearDuration_sec = ( + startOfThisYear + pd.offsets.DateOffset(years=1) - startOfThisYear + ).total_seconds() + yearElapsed_sec = pd.Timedelta(fraction * yearDuration_sec, unit="s") + return pd.to_datetime(startOfThisYear + yearElapsed_sec, unit="s") diff --git a/argopy/utils/lists.py b/argopy/utils/lists.py new file mode 100644 index 00000000..32ae6f82 --- /dev/null +++ b/argopy/utils/lists.py @@ -0,0 +1,205 @@ +import sys +import warnings +from ..options import OPTIONS + + +def list_available_data_src(): + """List all available data sources""" + sources = {} + try: + from ..data_fetchers import erddap_data as Erddap_Fetchers + + # Ensure we're loading the erddap data fetcher with the current options: + Erddap_Fetchers.api_server_check = Erddap_Fetchers.api_server_check.replace( + Erddap_Fetchers.api_server, OPTIONS["erddap"] + ) + Erddap_Fetchers.api_server = OPTIONS["erddap"] + + sources["erddap"] = Erddap_Fetchers + except Exception: + warnings.warn( + "An error occurred while loading the ERDDAP data fetcher, " + "it will not be available !\n%s\n%s" + % (sys.exc_info()[0], sys.exc_info()[1]) + ) + pass + + try: + from ..data_fetchers import argovis_data as ArgoVis_Fetchers + + sources["argovis"] = ArgoVis_Fetchers + except Exception: + warnings.warn( + "An error occurred while loading the ArgoVis data fetcher, " + "it will not be available !\n%s\n%s" + % (sys.exc_info()[0], sys.exc_info()[1]) + ) + pass + + try: + from ..data_fetchers import gdacftp_data as GDAC_Fetchers + + # Ensure we're loading the gdac data fetcher with the current options: + GDAC_Fetchers.api_server_check = OPTIONS["ftp"] + GDAC_Fetchers.api_server = OPTIONS["ftp"] + + sources["gdac"] = GDAC_Fetchers + except Exception: + warnings.warn( + "An error occurred while loading the GDAC data fetcher, " + "it will not be available !\n%s\n%s" + % (sys.exc_info()[0], sys.exc_info()[1]) + ) + pass + + # return dict(sorted(sources.items())) + return sources + + +def list_available_index_src(): + """List all available index sources""" + sources = {} + try: + from ..data_fetchers import erddap_index as Erddap_Fetchers + + # Ensure we're loading the erddap data fetcher with the current options: + Erddap_Fetchers.api_server_check = Erddap_Fetchers.api_server_check.replace( + Erddap_Fetchers.api_server, OPTIONS["erddap"] + ) + Erddap_Fetchers.api_server = OPTIONS["erddap"] + + sources["erddap"] = Erddap_Fetchers + except Exception: + warnings.warn( + "An error occurred while loading the ERDDAP index fetcher, " + "it will not be available !\n%s\n%s" + % (sys.exc_info()[0], sys.exc_info()[1]) + ) + pass + + try: + from ..data_fetchers import gdacftp_index as GDAC_Fetchers + + # Ensure we're loading the gdac data fetcher with the current options: + GDAC_Fetchers.api_server_check = OPTIONS["ftp"] + GDAC_Fetchers.api_server = OPTIONS["ftp"] + + sources["gdac"] = GDAC_Fetchers + except Exception: + warnings.warn( + "An error occurred while loading the GDAC index fetcher, " + "it will not be available !\n%s\n%s" + % (sys.exc_info()[0], sys.exc_info()[1]) + ) + pass + + return sources + + +def list_standard_variables(): + """List of variables for standard users""" + return [ + "DATA_MODE", + "LATITUDE", + "LONGITUDE", + "POSITION_QC", + "DIRECTION", + "PLATFORM_NUMBER", + "CYCLE_NUMBER", + "PRES", + "TEMP", + "PSAL", + "PRES_QC", + "TEMP_QC", + "PSAL_QC", + "PRES_ADJUSTED", + "TEMP_ADJUSTED", + "PSAL_ADJUSTED", + "PRES_ADJUSTED_QC", + "TEMP_ADJUSTED_QC", + "PSAL_ADJUSTED_QC", + "PRES_ADJUSTED_ERROR", + "TEMP_ADJUSTED_ERROR", + "PSAL_ADJUSTED_ERROR", + "PRES_ERROR", # can be created from PRES_ADJUSTED_ERROR after a filter_data_mode + "TEMP_ERROR", + "PSAL_ERROR", + "JULD", + "JULD_QC", + "TIME", + "TIME_QC", + # "CONFIG_MISSION_NUMBER", + ] + + +def list_multiprofile_file_variables(): + """List of variables in a netcdf multiprofile file. + + This is for files created by GDAC under //_prof.nc + """ + return [ + "CONFIG_MISSION_NUMBER", + "CYCLE_NUMBER", + "DATA_CENTRE", + "DATA_MODE", + "DATA_STATE_INDICATOR", + "DATA_TYPE", + "DATE_CREATION", + "DATE_UPDATE", + "DC_REFERENCE", + "DIRECTION", + "FIRMWARE_VERSION", + "FLOAT_SERIAL_NO", + "FORMAT_VERSION", + "HANDBOOK_VERSION", + "HISTORY_ACTION", + "HISTORY_DATE", + "HISTORY_INSTITUTION", + "HISTORY_PARAMETER", + "HISTORY_PREVIOUS_VALUE", + "HISTORY_QCTEST", + "HISTORY_REFERENCE", + "HISTORY_SOFTWARE", + "HISTORY_SOFTWARE_RELEASE", + "HISTORY_START_PRES", + "HISTORY_STEP", + "HISTORY_STOP_PRES", + "JULD", + "JULD_LOCATION", + "JULD_QC", + "LATITUDE", + "LONGITUDE", + "PARAMETER", + "PI_NAME", + "PLATFORM_NUMBER", + "PLATFORM_TYPE", + "POSITIONING_SYSTEM", + "POSITION_QC", + "PRES", + "PRES_ADJUSTED", + "PRES_ADJUSTED_ERROR", + "PRES_ADJUSTED_QC", + "PRES_QC", + "PROFILE_PRES_QC", + "PROFILE_PSAL_QC", + "PROFILE_TEMP_QC", + "PROJECT_NAME", + "PSAL", + "PSAL_ADJUSTED", + "PSAL_ADJUSTED_ERROR", + "PSAL_ADJUSTED_QC", + "PSAL_QC", + "REFERENCE_DATE_TIME", + "SCIENTIFIC_CALIB_COEFFICIENT", + "SCIENTIFIC_CALIB_COMMENT", + "SCIENTIFIC_CALIB_DATE", + "SCIENTIFIC_CALIB_EQUATION", + "STATION_PARAMETERS", + "TEMP", + "TEMP_ADJUSTED", + "TEMP_ADJUSTED_ERROR", + "TEMP_ADJUSTED_QC", + "TEMP_QC", + "VERTICAL_SAMPLING_SCHEME", + "WMO_INST_TYPE", + ] diff --git a/argopy/utils/locals.py b/argopy/utils/locals.py new file mode 100644 index 00000000..af46b6e7 --- /dev/null +++ b/argopy/utils/locals.py @@ -0,0 +1,269 @@ +import os +import sys +import subprocess # nosec B404 only used without user inputs +import platform +import locale +import struct +import importlib +import contextlib +import copy +from ..options import OPTIONS + + +def get_sys_info(): + """Returns system information as a dict""" + + blob = [] + + # get full commit hash + commit = None + if os.path.isdir(".git") and os.path.isdir("argopy"): + try: + pipe = subprocess.Popen( # nosec No user provided input to control here + 'git log --format="%H" -n 1'.split(" "), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + so, serr = pipe.communicate() + except Exception: + pass + else: + if pipe.returncode == 0: + commit = so + try: + commit = so.decode("utf-8") + except ValueError: + pass + commit = commit.strip().strip('"') + + blob.append(("commit", commit)) + + try: + (sysname, nodename, release, version_, machine, processor) = platform.uname() + blob.extend( + [ + ("python", sys.version), + ("python-bits", struct.calcsize("P") * 8), + ("OS", "%s" % (sysname)), + ("OS-release", "%s" % (release)), + ("machine", "%s" % (machine)), + ("processor", "%s" % (processor)), + ("byteorder", "%s" % sys.byteorder), + ("LC_ALL", "%s" % os.environ.get("LC_ALL", "None")), + ("LANG", "%s" % os.environ.get("LANG", "None")), + ("LOCALE", "%s.%s" % locale.getlocale()), + ] + ) + except Exception: + pass + + return blob + + +def netcdf_and_hdf5_versions(): + libhdf5_version = None + libnetcdf_version = None + try: + import netCDF4 + + libhdf5_version = netCDF4.__hdf5libversion__ + libnetcdf_version = netCDF4.__netcdf4libversion__ + except ImportError: + try: + import h5py + + libhdf5_version = h5py.version.hdf5_version + except ImportError: + pass + return [("libhdf5", libhdf5_version), ("libnetcdf", libnetcdf_version)] + + +def show_versions(file=sys.stdout, conda=False): # noqa: C901 + """Print the versions of argopy and its dependencies + + Parameters + ---------- + file : file-like, optional + print to the given file-like object. Defaults to sys.stdout. + conda: bool, optional + format versions to be copy/pasted on a conda environment file (default, False) + """ + sys_info = get_sys_info() + + try: + sys_info.extend(netcdf_and_hdf5_versions()) + except Exception as e: + print(f"Error collecting netcdf / hdf5 version: {e}") + + DEPS = { + "core": sorted( + [ + ("argopy", lambda mod: mod.__version__), + ("xarray", lambda mod: mod.__version__), + ("scipy", lambda mod: mod.__version__), + ("netCDF4", lambda mod: mod.__version__), + ( + "erddapy", + lambda mod: mod.__version__, + ), # This could go away from requirements ? + ("fsspec", lambda mod: mod.__version__), + ("aiohttp", lambda mod: mod.__version__), + ( + "packaging", + lambda mod: mod.__version__, + ), # will come with xarray, Using 'version' to make API compatible with several fsspec releases + ("requests", lambda mod: mod.__version__), + ("toolz", lambda mod: mod.__version__), + ] + ), + "ext.util": sorted( + [ + ( + "gsw", + lambda mod: mod.__version__, + ), # Used by xarray accessor to compute new variables + ("tqdm", lambda mod: mod.__version__), + ("zarr", lambda mod: mod.__version__), + ] + ), + "ext.perf": sorted( + [ + ("dask", lambda mod: mod.__version__), + ("distributed", lambda mod: mod.__version__), + ("pyarrow", lambda mod: mod.__version__), + ] + ), + "ext.plot": sorted( + [ + ("matplotlib", lambda mod: mod.__version__), + ("cartopy", lambda mod: mod.__version__), + ("seaborn", lambda mod: mod.__version__), + ("IPython", lambda mod: mod.__version__), + ("ipywidgets", lambda mod: mod.__version__), + ("ipykernel", lambda mod: mod.__version__), + ] + ), + "dev": sorted( + [ + ("bottleneck", lambda mod: mod.__version__), + ("cftime", lambda mod: mod.__version__), + ("cfgrib", lambda mod: mod.__version__), + ("conda", lambda mod: mod.__version__), + ("nc_time_axis", lambda mod: mod.__version__), + ( + "numpy", + lambda mod: mod.__version__, + ), # will come with xarray and pandas + ("pandas", lambda mod: mod.__version__), # will come with xarray + ("pip", lambda mod: mod.__version__), + ("black", lambda mod: mod.__version__), + ("flake8", lambda mod: mod.__version__), + ("pytest", lambda mod: mod.__version__), # will come with pandas + ("pytest_env", lambda mod: mod.__version__), # will come with pandas + ("pytest_cov", lambda mod: mod.__version__), # will come with pandas + ( + "pytest_localftpserver", + lambda mod: mod.__version__, + ), # will come with pandas + ( + "pytest_reportlog", + lambda mod: mod.__version__, + ), # will come with pandas + ("setuptools", lambda mod: mod.__version__), + ("aiofiles", lambda mod: mod.__version__), + ("sphinx", lambda mod: mod.__version__), + ] + ), + } + + DEPS_blob = {} + for level in DEPS.keys(): + deps = DEPS[level] + deps_blob = list() + for modname, ver_f in deps: + try: + if modname in sys.modules: + mod = sys.modules[modname] + else: + mod = importlib.import_module(modname) + except Exception: + deps_blob.append((modname, "-")) + else: + try: + ver = ver_f(mod) + deps_blob.append((modname, ver)) + except Exception: + deps_blob.append((modname, "installed")) + DEPS_blob[level] = deps_blob + + print("\nSYSTEM", file=file) + print("------", file=file) + for k, stat in sys_info: + print(f"{k}: {stat}", file=file) + + for level in DEPS_blob: + if conda: + print("\n# %s:" % level.upper(), file=file) + else: + title = "INSTALLED VERSIONS: %s" % level.upper() + print("\n%s" % title, file=file) + print("-" * len(title), file=file) + deps_blob = DEPS_blob[level] + for k, stat in deps_blob: + if conda: + if k != "argopy": + kf = k.replace("_", "-") + comment = " " if stat != "-" else "# " + print( + f"{comment} - {kf} = {stat}", file=file + ) # Format like a conda env line, useful to update ci/requirements + else: + print("{:<12}: {:<12}".format(k, stat), file=file) + + +@contextlib.contextmanager +def modified_environ(*remove, **update): + """ + Temporarily updates the ``os.environ`` dictionary in-place. + + The ``os.environ`` dictionary is updated in-place so that the modification + is sure to work in all situations. + + :param remove: Environment variables to remove. + :param update: Dictionary of environment variables and values to add/update. + """ + # Source: https://github.com/laurent-laporte-pro/stackoverflow-q2059482 + env = os.environ + update = update or {} + remove = remove or [] + + # List of environment variables being updated or removed. + stomped = (set(update.keys()) | set(remove)) & set(env.keys()) + # Environment variables and values to restore on exit. + update_after = {k: env[k] for k in stomped} + # Environment variables and values to remove on exit. + remove_after = frozenset(k for k in update if k not in env) + + try: + env.update(update) + [env.pop(k, None) for k in remove] + yield + finally: + env.update(update_after) + [env.pop(k) for k in remove_after] + + +def show_options(file=sys.stdout): # noqa: C901 + """Print options of argopy + + Parameters + ---------- + file : file-like, optional + print to the given file-like object. Defaults to sys.stdout. + """ + print("\nARGOPY OPTIONS", file=file) + print("--------------", file=file) + opts = copy.deepcopy(OPTIONS) + opts = dict(sorted(opts.items())) + for k, v in opts.items(): + print(f"{k}: {v}", file=file) diff --git a/argopy/utils/loggers.py b/argopy/utils/loggers.py new file mode 100644 index 00000000..c0a94482 --- /dev/null +++ b/argopy/utils/loggers.py @@ -0,0 +1,47 @@ +import warnings +import inspect +import pathlib +import os +import logging + + +log = logging.getLogger("argopy.utils.loggers") + + +def warnUnless(ok, txt): + """Function to raise a warning unless condition is True + + This function IS NOT to be used as a decorator anymore + + Parameters + ---------- + ok: bool + Condition to raise the warning or not + txt: str + Text to display in the warning + """ + if not ok: + msg = "%s %s" % (inspect.stack()[1].function, txt) + warnings.warn(msg) + + +def log_argopy_callerstack(level="debug"): + """log the caller’s stack""" + froot = str(pathlib.Path(__file__).parent.resolve()) + for ideep, frame in enumerate(inspect.stack()[1:]): + if os.path.join("argopy", "argopy") in frame.filename: + # msg = ["└─"] + # [msg.append("─") for ii in range(ideep)] + msg = [""] + [msg.append(" ") for ii in range(ideep)] + msg.append( + "└─ %s:%i -> %s" + % (frame.filename.replace(froot, ""), frame.lineno, frame.function) + ) + msg = "".join(msg) + if level == "info": + log.info(msg) + elif level == "debug": + log.debug(msg) + elif level == "warning": + log.warning(msg) diff --git a/argopy/utils/monitored_threadpool.py b/argopy/utils/monitored_threadpool.py new file mode 100644 index 00000000..5b637230 --- /dev/null +++ b/argopy/utils/monitored_threadpool.py @@ -0,0 +1,582 @@ +""" +This sub-module provides utilities for miscellaneous computation tasks with multitheading + +We construct the MyThreadPoolExecutor class, +we create a series of classes using multiple inheritance to implement monitoring features + +""" +from functools import lru_cache +import os +import sys +from concurrent.futures import ThreadPoolExecutor +from concurrent.futures import as_completed +from threading import Lock +import logging +from typing import Union +from abc import ABC, abstractmethod +import importlib + +try: + from importlib.resources import files # New in version 3.9 +except ImportError: + from pathlib import Path + + files = lambda x: Path( # noqa: E731 + importlib.util.find_spec(x).submodule_search_locations[0] + ) + +has_ipython = (spec := importlib.util.find_spec("IPython")) is not None +if has_ipython: + from IPython.display import display, clear_output, HTML + + +log = logging.getLogger("argopy.utils.compute") + + +STATIC_FILES = ( + ("argopy.static.css", "w3.css"), + ("argopy.static.css", "compute.css"), +) + + +@lru_cache(None) +def _load_static_files(): + """Lazily load the resource files into memory the first time they are needed""" + return [ + files(package).joinpath(resource).read_text(encoding="utf-8") + for package, resource in STATIC_FILES + ] + + +class proto_MonitoredThreadPoolExecutor(ABC): + """ + Add: + - self.*_fct and self.*_fct_kwargs for all the processing steps + - self.status list of characters to describe each task status + - self.status_final character to describe the final computation status + """ + + def __init__( + self, + max_workers: int = 10, + task_fct=None, + task_fct_kwargs={}, + postprocessing_fct=None, + postprocessing_fct_kwargs={}, + callback_fct=None, + callback_fct_kwargs={}, + finalize_fct=None, + finalize_fct_kwargs={}, + **kwargs, + ): + super().__init__(**kwargs) + + self.max_workers = max_workers + + self.task_fct = task_fct + self.task_fct_kwargs = task_fct_kwargs + + self.postprocessing_fct = postprocessing_fct + self.postprocessing_fct_kwargs = postprocessing_fct_kwargs + + self.callback_fct = callback_fct + self.callback_fct_kwargs = callback_fct_kwargs + + if finalize_fct is None: + finalize_fct = self._default_finalize_fct + self.finalize_fct = finalize_fct + self.finalize_fct_kwargs = finalize_fct_kwargs + + def _default_finalize_fct(self, obj_list, **kwargs): + return [v for v in dict(sorted(obj_list.items())).values()], True + + def init_status(self, bucket): + self.status = ["?" for _ in range(len(bucket))] + if self.finalize_fct: + self.status_final = "?" + self.progress = [ + 0, + len(bucket) * 4 + 2, + ] # Each task goes by 4 status ('w', 'p', 'c', 'f'/'s') and final by 2 states ('w', 'f'/'s') + else: + self.status_final = "n" + self.progress = [ + 0, + len(bucket) * 4, + ] # Each task goes by 4 status ('w', 'p', 'c', 'f'/'s') + + def task(self, obj_id, obj): + self.update_display_status(obj_id, "w") # Working + data, state = self.task_fct(obj, **self.task_fct_kwargs) + + self.update_display_status(obj_id, "p") # Post-processing + if self.postprocessing_fct is not None: + data, state = self.postprocessing_fct( + data, **self.postprocessing_fct_kwargs + ) + + return obj_id, data, state + + def callback(self, future): + obj_id, data, state = future.result() + # self.update_display_status(obj_id, "s" if state else "f") + self.update_display_status(obj_id, "c") # Callback + if self.callback_fct is not None: + data, state = self.callback_fct(data, **self.callback_fct_kwargs) + return obj_id, data, state + + def finalize(self, results): + self.update_display_status_final("w") # Working + data, state = self.finalize_fct(results, **self.finalize_fct_kwargs) + self.update_display_status_final("s" if state else "f") + return data + + def execute(self, bucket: list = None, list_failed: bool = False): + self.bucket = bucket + self.init_status(bucket) + self.display_status() + + # Execute tasks and post-processing: + self.lock = Lock() + results = {} + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + futures = [ + executor.submit(self.task, ii, obj) for ii, obj in enumerate(bucket) + ] + [f.add_done_callback(self.callback) for f in futures] + for future in as_completed(futures): + try: + obj_id, data, state = future.result() + self.update_display_status(obj_id, "s" if state else "f") + except Exception: + raise + finally: + results.update({obj_id: data}) + + # Final tasks status: + failed = [ + obj for obj_id, obj in enumerate(self.bucket) if self.status[obj_id] == "f" + ] + + # Finalize: + final = self.finalize(results) + + # Return + if list_failed: + return final, failed + else: + return final + + @abstractmethod + def display_status(self): + raise NotImplementedError("Not implemented") + + @abstractmethod + def update_display_status(self, task_id: int, st: str): + raise NotImplementedError("Not implemented") + + @abstractmethod + def update_display_status_final(self, st: str): + raise NotImplementedError("Not implemented") + + +class proto_MonitoredPoolExecutor_monitor(proto_MonitoredThreadPoolExecutor): + def __init__( + self, + show: Union[bool, str] = True, + task_legend: dict = {"w": "Working", "p": "Post-processing", "c": "Callback"}, + final_legend: dict = {"task": "Processing tasks", "final": "Finalizing"}, + **kwargs, + ): + super().__init__(**kwargs) + self.task_legend = task_legend + self.final_legend = final_legend + self.show = bool(show) + # log.debug(self.runner) + + @property + def runner(self) -> str: + try: + shell = get_ipython().__class__.__name__ + if shell == "ZMQInteractiveShell": + return "notebook" # Jupyter notebook or qtconsole + elif shell == "TerminalInteractiveShell": + return "terminal" # Terminal running IPython + else: + return False # Other type (?) + except NameError: + return "standard" # Probably standard Python interpreter + + @property + def COLORS(self): + """task status key to css class and color and label dictionary""" + return { + "?": ("gray", "Queued", "⏸"), + "w": ("yellow", self.task_legend["w"], "⏺"), + "p": ("blue", self.task_legend["p"], "🔄"), + "c": ("cyan", self.task_legend["c"], "⏯"), + "f": ("red", "Failed", "🔴"), + "s": ("green", "Succeed", "🟢"), + } + + @property + def STATE(self): + """final state key to css class dictionary""" + return { + "?": "queue", + "w": "blinking", + "s": "success", + "f": "failure", + "n": "none", + } + + @property + def STATE_COLORS(self): + """final state key to colors dictionary""" + return { + "?": "gray", + "w": "amber", + "s": "green", + "f": "red", + "n": "blue", + } + + def display_status(self): + pass + + def update_display_status(self, *args, **kwargs): + pass + + def update_display_status_final(self, *args, **kwargs): + pass + + +class proto_MonitoredPoolExecutor_notebook(proto_MonitoredPoolExecutor_monitor): + """ + Add HTML jupyter notebook display + """ + + @property + def css_style(self): + return "\n".join(_load_static_files()) + + @property + def status_html(self): + # Create a legend: + legend = ["\t
"] + # legend.append("\t\t
Tasks:
") + for key in self.COLORS.keys(): + color, desc, icon = self.COLORS[key] + legend.append( + "\t\t
%s
" + % (color, desc) + ) + legend.append("\t
") + # legend.append("\t\t
Finalized state:
Processing
Failure
Success
") + legend = "\n".join(legend) + + # Create a status bar for tasks: + content = ["\t
" % self.STATE[self.status_final]] + for s in self.status: + content.append("\t\t
" % self.COLORS[s][0]) + content.append("\t
") + content = "\n".join(content) + + # Progress bar: + val = int(100 * self.progress[0] / self.progress[1]) + color = self.STATE_COLORS[self.status_final] + txt = self.final_legend["task"] + if self.status_final != "?": + txt = "%s" % (self.final_legend["final"]) + if self.status_final == "f": + txt = "Failed %s" % (self.final_legend["final"]) + if self.status_final == "s": + txt = "Succeed in %s" % (self.final_legend["final"]) + txt = "%s (%i%% processed)" % (txt, val) + progress = ["\t
"] + progress.append( + "\t\t
%s
" + % (color, val, txt) + ) + progress.append("\t
") + progress = "\n".join(progress) + + # Complete HTML: + html = ( + "
\n" + f"\n" + f"{legend}\n" + f"{content}\n" + "
\n" + f"{progress}\n" + ) + return HTML(html) + + def display_status(self): + super().display_status() + if self.show and self.runner == "notebook": + clear_output(wait=True) + display(self.status_html) + + def update_display_status(self, obj_id, status): + super().update_display_status() + with self.lock: + self.status[obj_id] = "%s" % status + self.progress[0] += 1 + self.display_status() + + def update_display_status_final(self, state): + super().update_display_status_final() + self.status_final = state + self.progress[0] += 1 + self.display_status() + + +class proto_MonitoredPoolExecutor_terminal(proto_MonitoredPoolExecutor_monitor): + """ + Add terminal display + """ + + def __init__( + self, + icon: bool = False, + **kwargs, + ): + super().__init__(**kwargs) + self._text_only = ~bool(icon) + self._reprinter = None + + class _Reprinter: + def __init__(self, text: str = ""): + self.text = text + self.counter = 0 + + def moveup(self, lines): + for _ in range(lines): + sys.stdout.write("\x1b[A") + + def reprint(self, text): + if self.counter >= 1: + self.moveup(self.text.count("\n")) + print(text, end="\r") + self.text = text + self.counter += 1 + + def _adjust_for_terminal_width(self, text, max_width=None): + """Split text if larger than terminal""" + term_width, _ = os.get_terminal_size() + term_width = term_width if max_width is None else int(term_width / max_width) + lines = [] + if len(text) > term_width: + i_start, i_end = 0, term_width - 1 + while i_end <= len(text): + lines.append(text[i_start:i_end]) + i_start = i_end + i_end = i_start + term_width - 1 + lines.append(text[i_start:]) + return "\n".join(lines) + else: + return text + + @property + def status_txt(self): + def f(text, color=None, bold=0, italic=0, underline=0, crossed=0, negative=0): + """Format text with color, + + Uses no color by default but accepts any color from the C class. + + Parameters + ---------- + text: str + color: str + bold: bool + """ + + PREF = "\033[" + RESET = f"{PREF}0m" + + class C: + gray = "37" + yellow = "33" + amber = "33" + blue = "34" + cyan = "36" + red = "31" + green = "32" + magenta = "35" + black = "30" + white = "97" + + dec = [] + if bold: + dec.append("1") + if italic: + dec.append("3") + if underline: + dec.append("4") + if crossed: + dec.append("9") + if negative: + dec.append("7") + + if color is None: + if len(dec) > 0: + dec = ";".join(dec) + return f"{PREF}{dec}m" + text + RESET + else: + return f"{PREF}" + text + RESET + else: + if len(dec) > 0: + dec = ";".join(dec) + return f"{PREF}{dec};{getattr(C, color)}m" + text + RESET + else: + return f"{PREF}{getattr(C, color)}m" + text + RESET + + # Text only (no icons): + if self._text_only: + # Create a legend: + legend = [] + for key in self.COLORS.keys(): + color, desc, icon = self.COLORS[key] + legend.append(f("%s: %s" % (key, desc), color)) + legend = " | ".join(legend) + + # Create a status bar for tasks: + # with colored brackets color for final status: + raw_content = "[%s]" % "".join(self.status) + lines = [] + for status_line in self._adjust_for_terminal_width(raw_content).split("\n"): + line_content = [] + for s in status_line: + if s not in ["[", "]"]: + line_content.append( + f(s, self.COLORS[s][0], negative=s in ["f"]) + ) + else: + line_content.append(f(s, self.STATE_COLORS[self.status_final])) + line_content = "".join(line_content) + lines.append(line_content) + content = "\n".join(lines) + + # Icons only + else: + # Create a legend: + legend = [] + for key in self.COLORS.keys(): + color, desc, icon = self.COLORS[key] + legend.append(f"{icon}: %s" % f(desc, color=color)) + legend = " | ".join(legend) + + # Create a status bar for tasks: + # with colored brackets color for final status: + # raw_content = f"[%s]" % "".join(self.status) + raw_content = "%s" % "".join(self.status) + lines = [] + for status_line in self._adjust_for_terminal_width( + raw_content, max_width=4 + ).split("\n"): + line_content = [] + for s in status_line: + if s not in ["[", "]"]: + line_content.append("%s " % self.COLORS[s][2]) + else: + line_content.append(f(s, self.STATE_COLORS[self.status_final])) + line_content = "".join(line_content) + lines.append(line_content) + content = "\n".join(lines) + + # Progress bar: + val = int(100 * self.progress[0] / self.progress[1]) + color = self.STATE_COLORS[self.status_final] + txt = self.final_legend["task"] + if self.status_final != "?": + txt = "%s" % (self.final_legend["final"]) + if self.status_final == "f": + txt = "Failed %s" % (self.final_legend["final"]) + if self.status_final == "s": + txt = "Succeed in %s" % (self.final_legend["final"]) + txt = "%s (%i%% processed)" % (txt, val) + progress = f("%s ..." % txt, color, negative=0) + + # Complete STDOUT: + txt = f"\n" f"{legend}\n" f"{content}\n" f"{progress: <50}\n" + + return txt + + def display_status(self): + super().display_status() + + if self.show and self.runner in ["terminal", "standard"]: + if self._reprinter is None: + self._reprinter = self._Reprinter(self.status_txt) + # os.system('cls' if os.name == 'nt' else 'clear') + self._reprinter.reprint(f"{self.status_txt}") + # sys.stdout.flush() + + +if has_ipython: + + class c(proto_MonitoredPoolExecutor_notebook, proto_MonitoredPoolExecutor_terminal): + pass + +else: + + class c(proto_MonitoredPoolExecutor_terminal): + pass + + +class MyThreadPoolExecutor(c): + """ + This is a low-level helper class not intended to be used directly by users + + Examples + -------- + :: + + from argopy.utils import MyThreadPoolExecutor as MyExecutor + from random import random + from time import sleep + import numpy as np + + def my_task(obj, errors='ignore'): + data = random() + sleep(data * 3) + state = np.random.randint(0,100) >= 25 + if not state: + if errors == 'raise': + raise ValueError('Hello world') + elif errors == 'ignore': + pass + return data, state + + def my_postprocess(obj, opt=12): + sleep(random() * 5) + data = obj**opt + state = np.random.randint(0,100) >= 25 + return data, state + + def my_callback(obj, opt=2): + sleep(random() * 2) + data = obj**opt + state = np.random.randint(0,100) >= 25 + return data, state + + def my_final(obj_list, opt=True): + data = random() + sleep(data * 20) + results = [v for v in dict(sorted(obj_list.items())).values()] + return data, np.all(results) + + if __name__ == '__main__': + run = MyExecutor(max_workers=25, + task_fct=my_task, + postprocessing_fct=my_postprocess, + callback_fct=my_callback, + finalize_fct=my_final, + show=1, + ) + results, failed = run.execute(range(100), list_failed=True) + print(results) + """ + + pass diff --git a/argopy/utils/monitors.py b/argopy/utils/monitors.py new file mode 100644 index 00000000..43b9b586 --- /dev/null +++ b/argopy/utils/monitors.py @@ -0,0 +1,171 @@ +import urllib +import warnings +import importlib +import time +import threading + +try: + importlib.import_module("matplotlib") # noqa: E402 + from matplotlib.colors import to_hex +except ImportError: + pass + +from .lists import list_available_data_src +from .checkers import isAPIconnected + + +def badge(label="label", message="message", color="green", insert=False): + """Return or insert shield.io badge image + + Use the shields.io service to create a badge image + + https://img.shields.io/static/v1?label=