From 2381a47e96a4c9515661ca2099d61d8a8e9826ad Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Fri, 15 Nov 2024 11:36:59 +0000 Subject: [PATCH 01/17] Initial creation of file info class to eventually replace instrument and product dicts --- src/ncas_amof_netcdf_template/__init__.py | 1 + src/ncas_amof_netcdf_template/file_info.py | 318 +++++++++++++++++++++ 2 files changed, 319 insertions(+) create mode 100644 src/ncas_amof_netcdf_template/file_info.py diff --git a/src/ncas_amof_netcdf_template/__init__.py b/src/ncas_amof_netcdf_template/__init__.py index 34b4eb2..3b3932d 100644 --- a/src/ncas_amof_netcdf_template/__init__.py +++ b/src/ncas_amof_netcdf_template/__init__.py @@ -4,5 +4,6 @@ from . import tsv2dict from . import util from . import values +from . import instrument_info __version__="2.4.0" diff --git a/src/ncas_amof_netcdf_template/file_info.py b/src/ncas_amof_netcdf_template/file_info.py new file mode 100644 index 0000000..c01e088 --- /dev/null +++ b/src/ncas_amof_netcdf_template/file_info.py @@ -0,0 +1,318 @@ +""" +Take tsv files a return a class with all the data needed for creating the netCDF files. +""" + +import requests +import pandas as pd +import re + + +class FileInfo: + """ + Class that will gather and hold all the data to create netCDF file with + """ + def __init__( + self, + instrument_name: str, + data_product: str, + deployment_mode: str = "land", + tag: str = "latest", + ) -> None: + """ + Initialise the class. + + Args: + instrument_name (str): name of the instrument + data_product (str): name of data product to use + deployment_mode (str): value of the 'deployment_mode' global attribute, and + different variables may be required depending on + value. One of "land", "sea", "air", or "trajectory". + tag (str): tagged release version of AMF_CVs, or "latest" to get most + recent version. Default is "latest". + """ + if deployment_mode not in ["land", "sea", "air", "trajectory"]: + msg = f"Invalid deployment mode {deployment_mode}, must be one of 'land', 'sea', 'air', 'trajectory'." + raise ValueError(msg) + + self.instrument_name = instrument_name + self.data_product = data_product + self.deployment_mode = deployment_mode + self.tag = tag + if self.tag == "latest": + self.ncas_gen_version = self._get_github_latest_version("https://github.com/ncasuk/AMF_CVs") + elif self._check_github_cvs_version_exists(): + self.ncas_gen_version = tag + else: + msg = f"Cannot find release version {tag} in https://github.com/ncasuk/AMF_CVs" + raise ValueError(msg) + self.attributes = {} + self.dimensions = {} + self.variables = {} + + + def __repr__(self) -> str: + class_name = type(self).__name__ + return f"{class_name}(instrument_name='{self.instrument_name}', data_product='{self.data_product}', deployment_mode='{self.deployment_mode}', tag='{self.tag}') - ncas_gen_version = '{self.ncas_gen_version}" + + + def __str__(self) -> str: + return f"Class with information for {self.instrument_name} instrument and {self.data_product} product" + + + + def get_common_info(self) -> None: + """ + Get all the common variables, dimensions and attributes, and add to class + properties + """ + self._tsv2dict_attrs(self._attributes_tsv_url(self.deployment_mode)) + + + def get_deployment_info(self) -> None: + """ + Get all the variables, dimensions and attributes related to the deployment + mode, and add to class properties + """ + self._tsv2dict_dims(self._dimensions_tsv_url(self.deployment_mode)) + self._tsv2dict_vars(self._variables_tsv_url(self.deployment_mode)) + + + def get_product_info(self) -> None: + """ + Get all the variables, dimensions and attributes related to the data product, + and add to class properties + """ + self._tsv2dict_attrs(self._attributes_tsv_url(self.data_product)) + self._tsv2dict_dims(self._dimensions_tsv_url(self.data_product)) + self._tsv2dict_vars(self._variables_tsv_url(self.data_product)) + + + def get_instrument_info(self) -> None: + """ + Get all the attribute data related to a defined instrument in the + ncas-data-instrument-vocabs repo, and add to class property. + """ + if self.instrument_name.startswith("ncas-"): + self._tsv2dict_instruments(self._get_ncas_instrument_tsv_url()) + else: + self._tsv2dict_instruments(self._get_community_instrument_tsv_url()) + + + def _tsv2dict_vars(self, tsv_file: str) -> None: + """ + For a given tsv file from the AMF_CVs GitHub repo, add dictionary of + variables and their attributes to variables property. + + Args: + tsv_file (str): URL to location of tsv file + """ + if self._check_website_exists(tsv_file): + df_vars = pd.read_csv(tsv_file, sep="\t") + df_vars = df_vars.fillna("") + + current_var_dict = {} + first_loop = True + current_var = "" + + for current_line in df_vars.iloc: + if current_line["Variable"] != "": + if not first_loop: + self.variables[current_var] = current_var_dict + else: + first_loop = False + current_var = current_line["Variable"] + current_var_dict = {} + if current_line["Attribute"] != "": + if ( + current_line["Value"] == "" + and "example value" in current_line.keys() + and current_line["example value"] != "" + ): + current_var_dict[current_line["Attribute"]] = ( + f"EXAMPLE: {current_line['example value']}" + ) + else: + current_var_dict[current_line["Attribute"]] = current_line["Value"] + + self.variables[current_var] = current_var_dict + + + def _tsv2dict_dims(self, tsv_file: str) -> None: + """ + For a given tsv file from the AMF_CVs GitHub repo, add dictionary of dimensions + and additional info to dimensions property. + + Args: + tsv_file (str): URL to location of tsv file + """ + if self._check_website_exists(tsv_file): + df_dims = pd.read_csv(tsv_file, sep="\t") + df_dims = df_dims.fillna("") + + for dim in df_dims.iloc: + dim_dict = dim.to_dict() + dim_name = dim_dict.pop("Name") + self.dimensions[dim_name] = dim_dict + + + def _tsv2dict_attrs(self, tsv_file: str) -> None: + """ + For a given tsv file from the AMF_CVs GitHub repo, add dictionary of attributes + and values to attribute property. + + Args: + tsv_file (str): URL to location of tsv file + """ + if self._check_website_exists(tsv_file): + df_attrs = pd.read_csv(tsv_file, sep="\t") + df_attrs = df_attrs.fillna("") + + for attr in df_attrs.iloc: + attr_dict = attr.to_dict() + attr_name = attr_dict.pop("Name") + self.attributes[attr_name] = attr_dict + + + def _tsv2dict_instruments(self, tsv_file: str) -> None: + """ + For a given tsv file from the ncas-data-instrument-vocabs repo, add dictionary + of instrument data to atttributes property. + + Args: + tsv_file (str): URL to location of tsv file + """ + if self._check_website_exists(tsv_file): + df_instruments = pd.read_csv(tsv_file, sep="\t") + df_instrument = df_instruments.where(df_instruments["New Instrument Name"] == self.instrument_name).dropna(subset=["New Instrument Name"]) + if len(df_instrument) == 0: + print(f"[WARNING] No details found for instrument {self.instrument_name}...") + else: + for inst in df_instrument.iloc: + instrument_dict = inst.to_dict() + data_products = re.split(r",| |\|", instrument_dict["Data Product(s)"]) + data_products = list(filter(None, data_products)) + instrument_dict["Data Product(s)"] = data_products + + for i in ["Manufacturer", "Model No.", "Serial Number", "Data Product(s)", "Mobile/Fixed (loc)", "Descriptor"]: + self.attributes[i] = {"Fixed Value": instrument_dict[i]} + + + def _check_instrument_has_product(self, product: str) -> bool: + """ + Check instrument has defined data product associated with it + + Args: + product (str): data product to check + + Returns: + bool: does the instrument have the given data product associated with it + """ + if "Data Product(s)" not in self.attributes.keys(): + if self.instrument_name.startswith("ncas"): + inst_tsv = self._get_ncas_instrument_tsv_url() + else: + inst_tsv = self._get_community_instrument_tsv_url() + self._tsv2dict_instruments(inst_tsv) + return product in self.attributes["Data Product(s)"] + + + def _get_github_latest_version(self, url: str) -> str: + """ + Get the tag of the latest release version + + Args: + url (str): GitHub URL to find latest release version of: https://github.com// + + Returns: + str: tag name of latest version release + """ + return requests.get(f"{url}/releases/latest").url.split("/")[-1] + + + def _check_website_exists(self, url: str) -> bool: + """ + Check website exists and is up + + Args: + url (str): URL to check + + Returns: + bool: website is reachable + """ + status = requests.get(url).status_code + return status == 200 + + + def _check_github_cvs_version_exists(self) -> bool: + """ + Check the requested tagged version of AMF_CVs exists on GitHub + """ + url = f"https://github.com/ncasuk/AMF_CVs/releases/{self.ncas_gen_version}" + return self._check_website_exists(url) + + + def _dimensions_tsv_url(self, obj: str) -> str: + """ + Get the URL for the tsv files for dimensions + + Args: + obj (str): Data product or deployment mode + + Returns: + str: URL location of dimension tsv file + """ + file_loc = f"https://raw.githubusercontent.com/ncasuk/AMF_CVs/{self.ncas_gen_version}/product-definitions/tsv" + path, option = (obj, "specific") if obj not in ["land", "sea", "air", "trajectory"] else ("_common", obj) + return f"{file_loc}/{path}/dimensions-{option}.tsv" + + + def _variables_tsv_url(self, obj: str) -> str: + """ + Get the URL for the tsv files for variables + + Args: + obj (str): Data product or deployment mode + + Returns: + str: URL location of variable tsv file + """ + file_loc = f"https://raw.githubusercontent.com/ncasuk/AMF_CVs/{self.ncas_gen_version}/product-definitions/tsv" + path, option = (obj, "specific") if obj not in ["land", "sea", "air", "trajectory"] else ("_common", obj) + return f"{file_loc}/{path}/variables-{option}.tsv" + + + def _attributes_tsv_url(self, obj: str) -> str: + """ + Get the URL for the tsv files for attributes + + Args: + obj (str): Data product or deployment mode + + Returns: + str: URL location of attribute tsv file + """ + file_loc = f"https://raw.githubusercontent.com/ncasuk/AMF_CVs/{self.ncas_gen_version}/product-definitions/tsv" + path, option = (obj, "-specific") if obj not in ["land", "sea", "air", "trajectory"] else ("_common", "") + return f"{file_loc}/{path}/global-attributes{option}.tsv" + + + def _get_ncas_instrument_tsv_url(self) -> str: + """ + Get the URL for the tsv file of NCAS instruments + """ + vocab_version = self._get_github_latest_version("https://github.com/ncasuk/ncas-data-instrument-vocabs") + file_loc = f"https://raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs" + return f"{file_loc}/{vocab_version}/product-definitions/tsv/_instrument_vocabs/ncas-instrument-name-and-descriptors.tsv" + + + def _get_community_instrument_tsv_url(self) -> str: + """ + Get the URL for the tsv file of NCAS instruments + """ + vocab_version = self._get_github_latest_version("https://github.com/ncasuk/ncas-data-instrument-vocabs") + file_loc = f"https://raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs" + return f"{file_loc}/{vocab_version}/product-definitions/tsv/_instrument_vocabs/community-instrument-name-and-descriptors.tsv" + + + + From 06bd589d404cbbb7069ef795f2a4c53063ca1e8e Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Fri, 15 Nov 2024 11:50:33 +0000 Subject: [PATCH 02/17] Correct name of new file --- src/ncas_amof_netcdf_template/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ncas_amof_netcdf_template/__init__.py b/src/ncas_amof_netcdf_template/__init__.py index 3b3932d..c98abfc 100644 --- a/src/ncas_amof_netcdf_template/__init__.py +++ b/src/ncas_amof_netcdf_template/__init__.py @@ -4,6 +4,6 @@ from . import tsv2dict from . import util from . import values -from . import instrument_info +from . import file_info __version__="2.4.0" From 4ec44177e2c5f675516329e935c7bfc85b2a1e3c Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Mon, 18 Nov 2024 11:07:13 +0000 Subject: [PATCH 03/17] Add instrument info to it's own property, rather than in attributes --- src/ncas_amof_netcdf_template/file_info.py | 35 +++++++++++----------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/src/ncas_amof_netcdf_template/file_info.py b/src/ncas_amof_netcdf_template/file_info.py index c01e088..56a9f08 100644 --- a/src/ncas_amof_netcdf_template/file_info.py +++ b/src/ncas_amof_netcdf_template/file_info.py @@ -5,6 +5,9 @@ import requests import pandas as pd import re +from typing import Optional + +from .util import check_int class FileInfo: @@ -40,7 +43,7 @@ def __init__( self.tag = tag if self.tag == "latest": self.ncas_gen_version = self._get_github_latest_version("https://github.com/ncasuk/AMF_CVs") - elif self._check_github_cvs_version_exists(): + elif self._check_github_cvs_version_exists(release_tag=tag): self.ncas_gen_version = tag else: msg = f"Cannot find release version {tag} in https://github.com/ncasuk/AMF_CVs" @@ -48,6 +51,7 @@ def __init__( self.attributes = {} self.dimensions = {} self.variables = {} + self.instrument_data = {} def __repr__(self) -> str: @@ -56,8 +60,7 @@ def __repr__(self) -> str: def __str__(self) -> str: - return f"Class with information for {self.instrument_name} instrument and {self.data_product} product" - + return f"Class with information for '{self.instrument_name}' instrument and '{self.data_product}' data product" def get_common_info(self) -> None: @@ -152,6 +155,8 @@ def _tsv2dict_dims(self, tsv_file: str) -> None: for dim in df_dims.iloc: dim_dict = dim.to_dict() dim_name = dim_dict.pop("Name") + if check_int(dim_dict["Length"]): + dim_dict["Length"] = int(dim_dict["Length"]) self.dimensions[dim_name] = dim_dict @@ -192,28 +197,20 @@ def _tsv2dict_instruments(self, tsv_file: str) -> None: data_products = re.split(r",| |\|", instrument_dict["Data Product(s)"]) data_products = list(filter(None, data_products)) instrument_dict["Data Product(s)"] = data_products - for i in ["Manufacturer", "Model No.", "Serial Number", "Data Product(s)", "Mobile/Fixed (loc)", "Descriptor"]: - self.attributes[i] = {"Fixed Value": instrument_dict[i]} + self.instrument_data[i] = instrument_dict[i] - def _check_instrument_has_product(self, product: str) -> bool: + def _check_instrument_has_product(self) -> bool: """ Check instrument has defined data product associated with it - Args: - product (str): data product to check - Returns: bool: does the instrument have the given data product associated with it """ - if "Data Product(s)" not in self.attributes.keys(): - if self.instrument_name.startswith("ncas"): - inst_tsv = self._get_ncas_instrument_tsv_url() - else: - inst_tsv = self._get_community_instrument_tsv_url() - self._tsv2dict_instruments(inst_tsv) - return product in self.attributes["Data Product(s)"] + if "Data Product(s)" not in self.instrument_data.keys(): + self.get_instrument_info() + return self.data_product in self.instrument_data["Data Product(s)"] def _get_github_latest_version(self, url: str) -> str: @@ -243,11 +240,13 @@ def _check_website_exists(self, url: str) -> bool: return status == 200 - def _check_github_cvs_version_exists(self) -> bool: + def _check_github_cvs_version_exists(self, release_tag: Optional[str] = None) -> bool: """ Check the requested tagged version of AMF_CVs exists on GitHub """ - url = f"https://github.com/ncasuk/AMF_CVs/releases/{self.ncas_gen_version}" + if release_tag is None: + release_tag = self.ncas_gen_version + url = f"https://github.com/ncasuk/AMF_CVs/releases/{release_tag}" return self._check_website_exists(url) From f34675584bc344025f3bc1478303dddcdfb730b6 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Mon, 18 Nov 2024 16:37:59 +0000 Subject: [PATCH 04/17] Add function to convert instrument_dict to FileInfo --- src/ncas_amof_netcdf_template/file_info.py | 45 +++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/src/ncas_amof_netcdf_template/file_info.py b/src/ncas_amof_netcdf_template/file_info.py index 56a9f08..752dd70 100644 --- a/src/ncas_amof_netcdf_template/file_info.py +++ b/src/ncas_amof_netcdf_template/file_info.py @@ -5,7 +5,7 @@ import requests import pandas as pd import re -from typing import Optional +from typing import Optional, Union from .util import check_int @@ -312,6 +312,49 @@ def _get_community_instrument_tsv_url(self) -> str: file_loc = f"https://raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs" return f"{file_loc}/{vocab_version}/product-definitions/tsv/_instrument_vocabs/community-instrument-name-and-descriptors.tsv" + +def convert_instrument_dict_to_file_info( + instrument_dict: dict[ + str, dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]] + ], + instrument_name: str, + data_product: str, + deployment_mode: str, + tag: str, +) -> FileInfo: + """ + Convert instrument_dict from tsv2dict.instrument_dict to a FileInfo class variable + + Args: + instrument_dict (dict): Dictionary made by tsv2dict.instrument_dict + instrument_name (str): Name of the instrument + data_product (str): Data product of data for netCDF file + deployment_mode (str): Deployment mode of instrument. One of "land", "sea", + "air", "trajectory" + tag (str): Tag release of AMF_CVs being used + + Returns: + FileInfo object with all instrument data from the dictionary + """ + instrument_file_info = FileInfo(instrument_name, data_product, deployment_mode, tag) + for prod in ["common", data_product]: + if "attributes" in instrument_dict[prod].keys(): + for attr_name, attr_dict in instrument_dict[prod]["attributes"].items(): + instrument_file_info.attributes[attr_name] = attr_dict + if "dimensions" in instrument_dict[prod].keys(): + for dim_name, dim_dict in instrument_dict[prod]["dimensions"].items(): + instrument_file_info.dimensions[dim_name] = dim_dict + if "variables" in instrument_dict[prod].keys(): + for var_name, var_dict in instrument_dict[prod]["variables"].items(): + instrument_file_info.variables[var_name] = var_dict + if "info" in instrument_dict.keys(): + for key, value in instrument_dict["info"].items(): + if key == "Mobile/Fixed (loc)" and value.split("-")[0].strip().lower() == "fixed": + value = value.split("-")[1].strip() + instrument_file_info.instrument_data[key] = value + + return instrument_file_info + From 0256ad44400a39467a5a483670352d293ffdfa3b Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Mon, 18 Nov 2024 16:38:27 +0000 Subject: [PATCH 05/17] Add instrument name as a key under "info" --- src/ncas_amof_netcdf_template/tsv2dict.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ncas_amof_netcdf_template/tsv2dict.py b/src/ncas_amof_netcdf_template/tsv2dict.py index 2716c16..85c887a 100644 --- a/src/ncas_amof_netcdf_template/tsv2dict.py +++ b/src/ncas_amof_netcdf_template/tsv2dict.py @@ -130,6 +130,7 @@ def tsv2dict_instruments(tsv_file: str) -> dict[str, dict[str, str]]: for current_instrument in df_instruments.iloc: inst_dict = current_instrument.to_dict() inst_name = inst_dict.pop("New Instrument Name") + inst_dict["instrument_name"] = inst_dict data_products = re.split(r",| |\|", inst_dict["Data Product(s)"]) data_products = list(filter(None, data_products)) inst_dict["Data Product(s)"] = data_products From 83850e9c31ede3fde9ba59e31390461eeef8dd80 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Mon, 18 Nov 2024 16:39:17 +0000 Subject: [PATCH 06/17] Changes needed after converting instrument_dict to FileInfo --- tests/test_create_netcdf.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tests/test_create_netcdf.py b/tests/test_create_netcdf.py index 865e590..a3ef86a 100644 --- a/tests/test_create_netcdf.py +++ b/tests/test_create_netcdf.py @@ -49,6 +49,8 @@ def test_add_attributes(): "Manufacturer": "Manufacturer", "Model No.": "Model Number", "Serial Number": "Serial Number", + "instrument_name": "instrument-name", + "Mobile/Fixed (loc)": "location1" }, "common": { "attributes": { @@ -78,7 +80,7 @@ def test_add_attributes(): location = "location1" loc = "land" use_local_files = None - tag = "v1.2.3" + tag = "v2.0.0" user = getpass.getuser() machine = socket.gethostname() @@ -115,7 +117,7 @@ def test_add_attributes(): ) assert ( ncfile.getncattr("amf_vocabularies_release") - == "https://github.com/ncasuk/AMF_CVs/releases/tag/v1.2.3" + == "https://github.com/ncasuk/AMF_CVs/releases/tag/v2.0.0" ) assert ncfile.getncattr("history") == history_text assert ncfile.getncattr("last_revised_date") == created_time @@ -223,6 +225,9 @@ def test_add_variables(): }, } }, + "info": { + "instrument_name": "instrument-name", + } } product = "product1" @@ -296,12 +301,13 @@ def test_make_netcdf(compression, complevel, shuffle): "Manufacturer": "Manufacturer", "Model No.": "Model Number", "Serial Number": "Serial Number", + "instrument_name": instrument, }, "common": { "dimensions": { - "time": None, - "latitude": None, - "longitude": None, + "time": {"Length": 5}, + "latitude": {"Length": 1}, + "longitude": {"Length": 1}, }, "variables": { "variable1": { @@ -349,7 +355,7 @@ def test_make_netcdf(compression, complevel, shuffle): product_version = "1.0" file_location = "." use_local_files = None - tag = "v1.2.3" + tag = "v2.0.0" chunk_by_dimension = {"time": 2} # Call the function @@ -397,7 +403,7 @@ def test_make_netcdf(compression, complevel, shuffle): ) assert ( ncfile.getncattr("amf_vocabularies_release") - == "https://github.com/ncasuk/AMF_CVs/releases/tag/v1.2.3" + == "https://github.com/ncasuk/AMF_CVs/releases/tag/v2.0.0" ) assert ncfile.getncattr("deployment_mode") == loc assert ( From 9a4268c785bec345a74b7feba3a9df8777a89caf Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Mon, 18 Nov 2024 16:39:40 +0000 Subject: [PATCH 07/17] Added new functionality and deprecations for switching to FileInfo --- .../create_netcdf.py | 676 +++++++++++------- 1 file changed, 410 insertions(+), 266 deletions(-) diff --git a/src/ncas_amof_netcdf_template/create_netcdf.py b/src/ncas_amof_netcdf_template/create_netcdf.py index 5a52437..c8901fc 100644 --- a/src/ncas_amof_netcdf_template/create_netcdf.py +++ b/src/ncas_amof_netcdf_template/create_netcdf.py @@ -16,52 +16,114 @@ from . import tsv2dict from . import values from .__init__ import __version__ +from .file_info import FileInfo, convert_instrument_dict_to_file_info def add_attributes( ncfile: Dataset, - instrument_dict: dict[ + instrument_dict: Optional[dict[ str, dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]] - ], - product: str, - created_time: str, - location: str, - loc: str, + ]] = None, + product: Optional[str] = None, + created_time: Optional[str] = None, + location: Optional[str] = None, + loc: str = "land", use_local_files: Optional[str] = None, tag: str = "latest", + instrument_file_info: Optional[FileInfo] = None, ) -> None: """ Adds all global attributes for a given product to the netCDF file. Args: ncfile (obj): netCDF file object - instrument_dict (dict): information about the instrument from - tsv2dict.isntrument_dict. - product (str): name of data product. - create_time (str): time of file creation. - location (str): value for the 'platform' global attribute. - loc (str): value for the 'deployment_mode' global attribute, should be one of - 'land', 'sea', 'air', or 'trajectory'. - tag (str): tagged release version of AMF_CVs, or "latest" to get most recent - release. Ignored if use_local_files is not None. Default latest. + instrument_dict (dict): -DEPRECATED- information about the instrument from + tsv2dict.isntrument_dict. Use instrument_file_info + instead. Will be removed in version 2.7.0. + product (str): -DEPRECATED- name of data product. Value will be obtained from + instrument_file_info. Option will be removed in version 2.7.0. + created_time (str or None): time of file creation. If 'None', now will be used. + location (str or None): -DEPRECATED- value for the 'platform' global attribute. + Value will be obtained from instrument_file_info. Option + will be removed in version 2.7.0. + loc (str): -DEPRECATED- value for the 'deployment_mode' global attribute, + should be one of 'land', 'sea', 'air', or 'trajectory'. Value will + be obtained from instrument_file_info. Option will be removed in + version 2.7.0. + instrument_file_info (FileInfo): information about instrument for making netCDF + file, from + ncas_amof_netcdf_template.file_info.FileInfo. + tag (str): -DEPRECATED- tagged release version of AMF_CVs, or "latest" to get + most recent release. Ignored if use_local_files is not None. Value + will be obtained from instrument_file_info. Option will be removed + in version 2.7.0. use_local_files (str or None): path to local directory where tsv files are stored. If "None", read from online. Default None. """ - for key, value in instrument_dict["common"]["attributes"].items(): + if instrument_dict is not None: + if instrument_file_info is None: + warnings.warn( + "Using dictionary for instrument info is being deprecated, use the" + " ncas_amof_netcdf_template.file_info.FileInfo class instead. Use of the" + " instrument_dict option will be removed from version 2.7.0", + DeprecationWarning, + stacklevel=2, + ) + if product is None: + msg = ( + "If instrument_dict is still being used, 'product' must be given." + " Preferred option is to switch to using instrument_file_info" + " instead." + ) + raise ValueError(msg) + instrument_file_info = convert_instrument_dict_to_file_info( + instrument_dict, + instrument_dict["info"]["instrument_name"], + product, + loc, + tag + ) + else: + warnings.warn( + "instrument_dict and instrument_file_info both given, using" + " instrument_file_info. Use of instrument_dict is being deprecated," + " and will be removed from version 2.7.0.", + DeprecationWarning, + stacklevel=2, + ) + + if product is not None or location is not None or tag != "latest": + warnings.warn( + "Defining any of 'product', 'location' or 'tag' arguments is being" + " deprecated, as this information will be pulled from" + " instrument_file_info argument. These options will be removed from" + " version 2.7.0.", + DeprecationWarning, + stacklevel=2, + ) + + if instrument_file_info is None: + msg = "No instrument file info given" + raise ValueError(msg) + + if created_time is None: + created_time = dt.datetime.now(tz=dt.UTC).strftime("%Y%m%dT%H%M%S") + + for key, value in instrument_file_info.attributes.items(): if value["Fixed Value"] != "": ncfile.setncattr(key, value["Fixed Value"]) elif key == "source": - ncfile.setncattr(key, instrument_dict["info"]["Descriptor"]) + ncfile.setncattr(key, instrument_file_info.instrument_data["Descriptor"]) elif key == "institution": ncfile.setncattr(key, "National Centre for Atmospheric Science (NCAS)") elif key == "platform": - ncfile.setncattr(key, location) + ncfile.setncattr(key, instrument_file_info.instrument_data["Mobile/Fixed (loc)"]) elif key == "instrument_manufacturer": - ncfile.setncattr(key, instrument_dict["info"]["Manufacturer"]) + ncfile.setncattr(key, instrument_file_info.instrument_data["Manufacturer"]) elif key == "instrument_model": - ncfile.setncattr(key, instrument_dict["info"]["Model No."]) + ncfile.setncattr(key, instrument_file_info.instrument_data["Model No."]) elif key == "instrument_serial_number": - ncfile.setncattr(key, instrument_dict["info"]["Serial Number"]) + ncfile.setncattr(key, instrument_file_info.instrument_data["Serial Number"]) elif key == "amf_vocabularies_release": if use_local_files: attrsdict = tsv2dict.tsv2dict_attrs( @@ -69,9 +131,7 @@ def add_attributes( ) tagurl = attrsdict["amf_vocabularies_release"]["Example"] else: - if tag == "latest": - tag = values.get_latest_CVs_version() - tagurl = f"https://github.com/ncasuk/AMF_CVs/releases/tag/{tag}" + tagurl = f"https://github.com/ncasuk/AMF_CVs/releases/tag/{instrument_file_info.ncas_gen_version}" ncfile.setncattr(key, tagurl) elif key == "history": user = getpass.getuser() @@ -91,48 +151,79 @@ def add_attributes( f"CHANGE: {value['Description']}. {value['Compliance checking rules']}", ) - for key, value in instrument_dict[product]["attributes"].items(): - if value["Fixed Value"] != "": - ncfile.setncattr(key, value["Fixed Value"]) - else: - ncfile.setncattr( - key, - f"CHANGE: {value['Description']}. {value['Compliance checking rules']}", - ) - def add_dimensions( ncfile: Dataset, - instrument_dict: dict[ + instrument_dict: Optional[dict[ str, dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]] - ], - product: str, - dimension_lengths: dict[str, int], + ]] = None, + product: Optional[str] = None, + dimension_lengths: Optional[dict[str, int]] = None, + instrument_file_info: Optional[FileInfo] = None, ) -> None: """ Adds all dimensions for a given product to the netCDF file. Args: ncfile (obj): netCDF file object - instrument_dict (dict): information about the instrument from - tsv2dict.isntrument_dict. - product (str): name of data product. - dimension_lengths (dict): length of each dimension. + instrument_dict (dict): -DEPRECATED- information about the instrument from + tsv2dict.isntrument_dict. Use instrument_file_info + instead. Will be removed in version 2.7.0. + product (str): -DEPRECATED- name of data product. Value will now be obtained + from instrument_file_info. Option will be removed from version + 2.7.0. + dimension_lengths (dict): -DEPRECATED- length of each dimension. Values will + now be obtained from instrument_file_info. Option + will be removed from version 2.7.0. + instrument_file_info (FileInfo): information about instrument for making netCDF + file, from + ncas_amof_netcdf_template.file_info.FileInfo. """ - for key, length in dimension_lengths.items(): - if ( - key in instrument_dict["common"]["dimensions"].keys() - or key in instrument_dict[product]["dimensions"].keys() - ): - ncfile.createDimension(key, length) + if instrument_dict is not None: + if instrument_file_info is None: + warnings.warn( + "Using dictionary for instrument info is being deprecated, use the" + " ncas_amof_netcdf_template.file_info.FileInfo class instead. Use of the" + " instrument_dict option will be removed from version 2.7.0", + DeprecationWarning, + stacklevel=2, + ) + if product is None or dimension_lengths is None: + msg = ( + "If instrument_dict is still being used, 'product' and" + " 'dimension_lengths' must be given. Preferred option is to switch" + " to using instrument_file_info instead." + ) + raise ValueError(msg) + else: + warnings.warn( + "instrument_dict and instrument_file_info both given, using" + " instrument_file_info. Use of instrument_dict is being deprecated," + " and will be removed from version 2.7.0.", + DeprecationWarning, + stacklevel=2, + ) + + if instrument_file_info is not None: + for dim_name in instrument_file_info.dimensions.keys(): + ncfile.createDimension(dim_name, instrument_file_info.dimensions[dim_name]["Length"]) + + elif dimension_lengths is not None: + for key, length in dimension_lengths.items(): + if ( + key in instrument_dict["common"]["dimensions"].keys() + or key in instrument_dict[product]["dimensions"].keys() + ): + ncfile.createDimension(key, length) def add_variables( ncfile: Dataset, - instrument_dict: dict[ + instrument_dict: Optional[dict[ str, dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]] - ], - product: str, + ]] = None, + product: Optional[str] = None, + instrument_file_info: Optional[FileInfo] = None, verbose: int = 0, ) -> None: """ @@ -140,130 +231,170 @@ def add_variables( Args: ncfile (obj): netCDF file object - instrument_dict (dict): information about the instrument from - tsv2dict.isntrument_dict. - product (str): name of data product. + instrument_dict (dict): -DEPRECATED- information about the instrument from + tsv2dict.isntrument_dict. Use instrument_file_info + instead. Will be removed in version 2.7.0. + product (str): -DEPRECATED- name of data product. Value will be obtained from + instrument_file_info. Option will be removed in version 2.7.0. + instrument_file_info (FileInfo): information about instrument for making netCDF + file, from + ncas_amof_netcdf_template.file_info.FileInfo. verbose (int): level of additional info to print. At the moment, there is only 1 additional level. Default 0. """ - for obj in [product, "common"]: - for key, value in instrument_dict[obj]["variables"].items(): - # make sure variable doesn't already exist, warn if it does - if key in ncfile.variables.keys(): - print(f"WARN: variable {key} defined multiple times.") + if instrument_dict is not None: + if instrument_file_info is None: + warnings.warn( + "Using dictionary for instrument info is being deprecated, use the" + " ncas_amof_netcdf_template.file_info.FileInfo class instead. Use of the" + " instrument_dict option will be removed from version 2.7.0", + DeprecationWarning, + stacklevel=2, + ) + if product is None: + msg = ( + "If instrument_dict is still being used, 'product' must be given." + " Preferred option is to switch to using instrument_file_info" + " instead." + ) + raise ValueError(msg) + instrument_file_info = convert_instrument_dict_to_file_info( + instrument_dict, + instrument_dict["info"]["instrument_name"], + product, + deployment_mode="land", + tag="latest", + ) + else: + warnings.warn( + "instrument_dict and instrument_file_info both given, using" + " instrument_file_info. Use of instrument_dict is being deprecated," + " and will be removed from version 2.7.0.", + DeprecationWarning, + stacklevel=2, + ) + + if instrument_file_info is None: + msg = "No instrument file info given" + raise ValueError(msg) + + for key, value in instrument_file_info.variables.items(): + # make sure variable doesn't already exist, warn if it does + if key in ncfile.variables.keys(): + print(f"WARN: variable {key} defined multiple times.") + else: + # therefore, value is instrument_dict[obj]['variables'][key] + # want to pop certain things here, but not for ever, so make tmp_value + tmp_value = copy.copy(value) + + # error, there are some variables with dimensions + # missing, error in spreadsheet + # if we encounter one, we're going to print out an error + # and forget about that variable + if "dimension" not in tmp_value.keys(): + print(f"WARN: No dimensions for variable {key}") + print("Variable not added file") + var_dims = () else: - # therefore, value is instrument_dict[obj]['variables'][key] - # want to pop certain things here, but not for ever, so make tmp_value - tmp_value = copy.copy(value) - - # error, there are some variables with dimensions - # missing, error in spreadsheet - # if we encounter one, we're going to print out an error - # and forget about that variable - if "dimension" not in tmp_value.keys(): - print(f"WARN: No dimensions for variable {key} in product {obj}") - print("Variable not added file") - var_dims = () - else: - var_dims = tmp_value.pop("dimension") - # there was an error somewhere meaning 2 dimensions - # had a '.' instead of ',' between them - var_dims = var_dims.replace(".", ",") - var_dims = tuple(x.strip() for x in var_dims.split(",")) + var_dims = tmp_value.pop("dimension") + # there was an error somewhere meaning 2 dimensions + # had a '.' instead of ',' between them + var_dims = var_dims.replace(".", ",") + var_dims = tuple(x.strip() for x in var_dims.split(",")) - datatype = tmp_value.pop("type") + datatype = tmp_value.pop("type") - if "_FillValue" in tmp_value: - fill_value = float(tmp_value.pop("_FillValue")) - else: - fill_value = None + if "_FillValue" in tmp_value: + fill_value = float(tmp_value.pop("_FillValue")) + else: + fill_value = None - if "chunksizes" in tmp_value: - chunksizes = tmp_value.pop("chunksizes") - else: - chunksizes = None + if "chunksizes" in tmp_value: + chunksizes = tmp_value.pop("chunksizes") + else: + chunksizes = None - if "compression" in tmp_value: - compression = tmp_value.pop("compression") - else: - compression = None + if "compression" in tmp_value: + compression = tmp_value.pop("compression") + else: + compression = None - if "complevel" in tmp_value: - complevel = tmp_value.pop("complevel") - else: - complevel = 4 + if "complevel" in tmp_value: + complevel = tmp_value.pop("complevel") + else: + complevel = 4 - if "shuffle" in tmp_value: - shuffle = tmp_value.pop("shuffle") - else: - shuffle = True - - var = ncfile.createVariable( - key, - datatype, - var_dims, - fill_value=fill_value, - chunksizes=chunksizes, - compression=compression, - complevel=complevel, - shuffle=shuffle, - ) + if "shuffle" in tmp_value: + shuffle = tmp_value.pop("shuffle") + else: + shuffle = True - for mdatkey, mdatvalue in tmp_value.items(): - # flag meanings in the tsv files are separated by '|', - # should be space separated - if "|" in mdatvalue and "flag_meaning" in mdatkey: - mdatvalue = " ".join([i.strip() for i in mdatvalue.split("|")]) - # flag values are bytes, can't add byte array - # into NETCDF4_CLASSIC so have to muddle a bit - if "flag_value" in mdatkey and "qc" in key and var.dtype == np.int8: - # turn string "0b,1b..." into list of ints [0,1...] - mdatvalue = mdatvalue.strip(",") - newmdatvalue = [int(i.strip("b")) for i in mdatvalue.split(",")] - # turn list into array with int8 type - mdatvalue = np.array(newmdatvalue, dtype=np.int8) - # print warning for example values, - # and don't add example values for standard_name + var = ncfile.createVariable( + key, + datatype, + var_dims, + fill_value=fill_value, + chunksizes=chunksizes, + compression=compression, + complevel=complevel, + shuffle=shuffle, + ) + + for mdatkey, mdatvalue in tmp_value.items(): + # flag meanings in the tsv files are separated by '|', + # should be space separated + if "|" in mdatvalue and "flag_meaning" in mdatkey: + mdatvalue = " ".join([i.strip() for i in mdatvalue.split("|")]) + # flag values are bytes, can't add byte array + # into NETCDF4_CLASSIC so have to muddle a bit + if "flag_value" in mdatkey and "qc" in key and var.dtype == np.int8: + # turn string "0b,1b..." into list of ints [0,1...] + mdatvalue = mdatvalue.strip(",") + newmdatvalue = [int(i.strip("b")) for i in mdatvalue.split(",")] + # turn list into array with int8 type + mdatvalue = np.array(newmdatvalue, dtype=np.int8) + # print warning for example values, + # and don't add example values for standard_name + if ( + mdatkey == "standard_name" + and ("EXAMPLE" in mdatvalue or mdatvalue == "") + and verbose >= 1 + ): + print( + f"WARN: No standard name for variable {key}, " + "standard_name attribute not added" + ) + elif "EXAMPLE" in mdatvalue and verbose >= 1: + print( + "WARN: example value for attribute " + f"{mdatkey} for variable {key}" + ) + # don't add EXAMPLE standard name + if not ( + mdatkey == "standard_name" + and ("EXAMPLE" in mdatvalue or mdatvalue == "") + ): + # don't add empty attributes if ( - mdatkey == "standard_name" - and ("EXAMPLE" in mdatvalue or mdatvalue == "") + isinstance(mdatvalue, str) + and mdatvalue == "" and verbose >= 1 ): print( - f"WARN: No standard name for variable {key}, " - "standard_name attribute not added" + f"WARN: No value for attribute {mdatkey} " + "for variable {key}, attribute not added" ) - elif "EXAMPLE" in mdatvalue and verbose >= 1: - print( - "WARN: example value for attribute " - f"{mdatkey} for variable {key}" - ) - # don't add EXAMPLE standard name - if not ( - mdatkey == "standard_name" - and ("EXAMPLE" in mdatvalue or mdatvalue == "") - ): - # don't add empty attributes - if ( - isinstance(mdatvalue, str) - and mdatvalue == "" - and verbose >= 1 - ): - print( - f"WARN: No value for attribute {mdatkey} " - "for variable {key}, attribute not added" - ) - else: - var.setncattr(mdatkey, mdatvalue) + else: + var.setncattr(mdatkey, mdatvalue) def make_netcdf( instrument: str, product: str, time: str, - instrument_dict: dict[ + instrument_dict: Optional[dict[ str, dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]] - ], + ]] = None, loc: str = "land", dimension_lengths: dict[str, int] = {}, verbose: int = 0, @@ -277,6 +408,7 @@ def make_netcdf( compression: Union[str, dict[str, str], None] = None, complevel: Union[int, dict[str, int]] = 4, shuffle: Union[bool, dict[str, bool]] = True, + instrument_file_info: Optional[FileInfo] = None, ) -> Union[None, Dataset]: """ Makes netCDF file for given instrument and arguments. @@ -286,8 +418,12 @@ def make_netcdf( product (str): name of data product. time (str): time that the data represents, in YYYYmmdd-HHMMSS format or as much of as required. - instrument_dict (dict): information about the instrument - from tsv2dict.isntrument_dict. + instrument_dict (dict or None): -DEPRECATED- information about the instrument + from tsv2dict.instrument_dict. Use + instrument_file_info argument instead. Will be + remved in version 2.7.0. + instrument_file_info (FileInfo or None): information about the instrument, + from file_info.FileInfo. loc (str): location of instrument, one of 'land', 'sea', 'air' or 'trajectory'. Default 'land'. dimension_lengths (dict): lengths of dimensions in file. If not given, @@ -335,51 +471,78 @@ def make_netcdf( stacklevel=2, ) + if instrument_dict is not None: + if instrument_file_info is None: + warnings.warn( + "Using dictionary for instrument info is being deprecated, use the" + " ncas_amof_netcdf_template.file_info.FileInfo class instead. Use of the" + " instrument_dict option will be removed from version 2.7.0", + DeprecationWarning, + stacklevel=2, + ) + instrument_file_info = convert_instrument_dict_to_file_info( + instrument_dict, + instrument_dict["info"]["instrument_name"], + product, + loc, + tag + ) + else: + warnings.warn( + "instrument_dict and instrument_file_info both given, using" + " instrument_file_info. Use of instrument_dict is being deprecated," + " and will be removed from version 2.7.0.", + DeprecationWarning, + stacklevel=2, + ) + + if instrument_file_info is None: + msg = "No instrument file info given" + raise ValueError(msg) + chunk_by_dimension = chunk_by_dimension or {} # add chunks to variables with defined chunk dimensions - all_options = ["common", product] - for prod in all_options: - for var in (var_dict := instrument_dict[prod]["variables"]): - if "dimension" in var_dict[var].keys(): - var_dims = var_dict[var]["dimension"] - var_dims = var_dims.replace(".", ",") - var_dims = [x.strip() for x in var_dims.split(",")] - if all(var_dim in chunk_by_dimension.keys() for var_dim in var_dims): - chunksizes = tuple( - [int(chunk_by_dimension[var_dim]) for var_dim in var_dims] - ) - var_dict[var]["chunksizes"] = chunksizes - if isinstance(compression, str): - var_dict[var]["compression"] = compression - elif isinstance(compression, dict) and var in compression.keys(): - var_dict[var]["compression"] = compression[var] - else: - var_dict[var]["compression"] = None + for var in (var_dict := instrument_file_info.variables): + if "dimension" in var_dict[var].keys(): + var_dims = var_dict[var]["dimension"] + var_dims = var_dims.replace(".", ",") + var_dims = [x.strip() for x in var_dims.split(",")] + if all(var_dim in chunk_by_dimension.keys() for var_dim in var_dims): + chunksizes = tuple( + [int(chunk_by_dimension[var_dim]) for var_dim in var_dims] + ) + var_dict[var]["chunksizes"] = chunksizes + if isinstance(compression, str): + var_dict[var]["compression"] = compression + elif isinstance(compression, dict) and var in compression.keys(): + var_dict[var]["compression"] = compression[var] + else: + var_dict[var]["compression"] = None - if isinstance(complevel, int): - var_dict[var]["complevel"] = complevel - elif isinstance(complevel, dict) and var in complevel.keys(): - var_dict[var]["complevel"] = complevel[var] - else: - var_dict[var]["complevel"] = 4 + if isinstance(complevel, int): + var_dict[var]["complevel"] = complevel + elif isinstance(complevel, dict) and var in complevel.keys(): + var_dict[var]["complevel"] = complevel[var] + else: + var_dict[var]["complevel"] = 4 - if isinstance(shuffle, bool): - var_dict[var]["shuffle"] = shuffle - elif isinstance(shuffle, dict) and var in shuffle.keys(): - var_dict[var]["shuffle"] = shuffle[var] - else: - var_dict[var]["shuffle"] = True + if isinstance(shuffle, bool): + var_dict[var]["shuffle"] = shuffle + elif isinstance(shuffle, dict) and var in shuffle.keys(): + var_dict[var]["shuffle"] = shuffle[var] + else: + var_dict[var]["shuffle"] = True if ( - instrument_dict["info"]["Mobile/Fixed (loc)"].split("-")[0].strip().lower() + instrument_file_info.instrument_data["Mobile/Fixed (loc)"].split("-")[0].strip().lower() == "fixed" ): platform = ( - instrument_dict["info"]["Mobile/Fixed (loc)"].split("-")[-1].strip().lower() + instrument_file_info.instrument_data["Mobile/Fixed (loc)"].split("-")[-1].strip().lower() ) else: - platform = instrument_dict["info"]["Mobile/Fixed (loc)"].strip().lower() + platform = instrument_file_info.instrument_data["Mobile/Fixed (loc)"].strip().lower() if options != "": no_options = len(options.split("_")) @@ -389,7 +552,7 @@ def make_netcdf( options = f"_{options}" filename = ( - f"{instrument}_{f'{platform}_' if platform != '' else ''}" + f"{instrument_file_info.instrument_name}_{f'{platform}_' if platform != '' else ''}" f"{time}_{product}{options}_v{product_version}.nc" ) @@ -398,16 +561,12 @@ def make_netcdf( add_attributes( ncfile, - instrument_dict, - product, - created_time, - platform, - loc, + instrument_file_info=instrument_file_info, use_local_files=use_local_files, - tag=tag, + created_time=created_time, ) - add_dimensions(ncfile, instrument_dict, product, dimension_lengths) - add_variables(ncfile, instrument_dict, product, verbose=verbose) + add_dimensions(ncfile, instrument_file_info=instrument_file_info) + add_variables(ncfile, instrument_file_info=instrument_file_info, verbose=verbose) if return_open: return ncfile @@ -656,7 +815,9 @@ def main( products (str or list): string of one product or list of multiple products to make netCDF file for this instrument. If None, then all available products for the defined instrument - are made. + are made. -DEPRECATION WARNING- option to specify + either a list of 'None' is being deprecated and will + be removed in version 2.7.0. Use single data product. verbose (int): level of info to print out. Note that at the moment there is only one additional layer, this may increase in future. options (str): options to be included in file name. All options should be in @@ -704,77 +865,63 @@ def main( chunk_by_dimension = chunk_by_dimension or {} - instrument_dict = tsv2dict.instrument_dict( - instrument, loc=loc, use_local_files=use_local_files, tag=tag - ) + if isinstance(products, str): + products = [products] + elif products == None: + products = list_products(instrument=instrument, tag=tag) + warnings.warn( + "Passing 'None' as argument for 'products' is being deprecated. Use single" + " data product for this argument. Available data products for instrument" + f"{instrument} are {products}. The option to use 'None' will be removed" + " from version 2.7.0.", + DeprecationWarning, + stacklevel=2 + ) + elif isinstance(products, list): + warnings.warn( + "Giving multiple data products to the 'products' argument is" + " being deprecated. Use single data product as a string for this argument." + " The option to give a list will be removed from version 2.7.0.", + DeprecationWarning, + stacklevel=2, + ) - # check if platform needs changing - if platform is not None: - if "mobile" not in instrument_dict["info"]["Mobile/Fixed (loc)"].lower(): - print( - "[WARNING]: Changing platform for an " - f"observatory instrument {instrument}." - ) - instrument_dict["info"]["Mobile/Fixed (loc)"] = platform - - # get and check our list of products - tsvdictkeys = instrument_dict.keys() - poss_products = list(tsvdictkeys) - poss_products.remove("info") - poss_products.remove("common") - if products is None: # user doesn't specify products, make all - products = poss_products - else: # check user specified products are applicable for instrument - remove_products = [] - if isinstance(products, str): - products = [products] - for product in products: - if product not in poss_products: + + ncfiles = [] + + for product in products: + instrument_file_info = FileInfo(instrument, product, deployment_mode = loc, tag = tag) + instrument_file_info.get_product_info() + instrument_file_info.get_deployment_info() + instrument_file_info.get_instrument_info() + instrument_file_info.get_common_info() + + # check if platform needs changing + if platform is not None: + if "mobile" not in instrument_file_info.instrument_data["Mobile/Fixed (loc)"].lower(): print( - f"ERROR: {product} is not available for this " - "instrument, will be skipped." + "[WARNING]: Changing platform for an " + f"observatory instrument {instrument}." ) - remove_products.append(product) - for remove_product in remove_products: - products.remove(remove_product) - # so by now we should have our list of products, quit if we have no products - if not isinstance(products, list) or len(products) == 0: - msg = f"No valid products specified, valid products are {poss_products}" - raise ValueError(msg) + instrument_file_info.instrument_data["Mobile/Fixed (loc)"] = platform - # make sure we have dimension lengths for all expected dimensions - all_dimensions = [] - dimlengths = {} - for key, val in instrument_dict.items(): - if "dimensions" in val.keys() and (key in products or key == "common"): - for dim in list(val["dimensions"].keys()): - if dim not in all_dimensions: - all_dimensions.append(dim) - if ( - isinstance(val["dimensions"][dim]["Length"], int) - or "<" not in val["dimensions"][dim]["Length"] - ): - dimlengths[dim] = int(val["dimensions"][dim]["Length"]) - for key, value in dimension_lengths.items(): - if key not in dimlengths.keys(): - dimlengths[key] = value - for dim in all_dimensions: - if dim not in dimlengths.keys(): - length = input(f"Enter length for dimension {dim}: ") - dimlengths[dim] = int(length) + # make sure we have dimension lengths for all expected dimensions + for key, val in instrument_file_info.dimensions.items(): + if not isinstance(val["Length"], int): + if key in dimension_lengths.keys(): + val["Length"] = int(dimension_lengths[key]) + else: + length = input(f"Enter length for dimension {key}: ") + val["Length"] = int(length) - # make the files - if return_open: - ncfiles = [] - for product in products: + # make the files + if return_open: ncfiles.append( make_netcdf( instrument, product, date, - instrument_dict, - loc=loc, - dimension_lengths=dimlengths, + instrument_file_info = instrument_file_info, verbose=verbose, options=options, product_version=product_version, @@ -787,20 +934,13 @@ def main( complevel=complevel, shuffle=shuffle, ) - ) - if len(ncfiles) == 1: - return ncfiles[0] + ) else: - return ncfiles - else: - for product in products: make_netcdf( instrument, product, date, - instrument_dict, - loc=loc, - dimension_lengths=dimlengths, + instrument_file_info = instrument_file_info, verbose=verbose, options=options, product_version=product_version, @@ -813,6 +953,10 @@ def main( complevel=complevel, shuffle=shuffle, ) + if len(ncfiles) == 1: + return ncfiles[0] + elif len(ncfiles) >= 2: + return ncfiles if __name__ == "__main__": From 98cded5dcf10ca39d1b7f434b6994a0370489fdc Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 19 Nov 2024 12:04:22 +0000 Subject: [PATCH 08/17] Correction to adding instrument name to instrument dictionary --- src/ncas_amof_netcdf_template/tsv2dict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ncas_amof_netcdf_template/tsv2dict.py b/src/ncas_amof_netcdf_template/tsv2dict.py index 85c887a..ddef150 100644 --- a/src/ncas_amof_netcdf_template/tsv2dict.py +++ b/src/ncas_amof_netcdf_template/tsv2dict.py @@ -130,7 +130,7 @@ def tsv2dict_instruments(tsv_file: str) -> dict[str, dict[str, str]]: for current_instrument in df_instruments.iloc: inst_dict = current_instrument.to_dict() inst_name = inst_dict.pop("New Instrument Name") - inst_dict["instrument_name"] = inst_dict + inst_dict["instrument_name"] = inst_name data_products = re.split(r",| |\|", inst_dict["Data Product(s)"]) data_products = list(filter(None, data_products)) inst_dict["Data Product(s)"] = data_products From fab183265b27228c48175afa1374d52ae470f960 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 19 Nov 2024 12:05:05 +0000 Subject: [PATCH 09/17] Add instrument name to instrument dictionary --- tests/test_tsv2dict.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_tsv2dict.py b/tests/test_tsv2dict.py index 232ca54..2de63bc 100644 --- a/tests/test_tsv2dict.py +++ b/tests/test_tsv2dict.py @@ -142,9 +142,9 @@ def test_tsv2dict_instruments(): # Check the result assert result == { - "instrument1": {"Data Product(s)": ["product1", "product2"]}, - "instrument2": {"Data Product(s)": ["product3", "product4"]}, - "instrument3": {"Data Product(s)": ["product5", "product6"]}, + "instrument1": {"Data Product(s)": ["product1", "product2"], "instrument_name": "instrument1"}, + "instrument2": {"Data Product(s)": ["product3", "product4"], "instrument_name": "instrument2"}, + "instrument3": {"Data Product(s)": ["product5", "product6"], "instrument_name": "instrument3"}, } # Delete the temporary file From 39172ad5ae09d688491e9781dc7d828e039e0fa4 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 19 Nov 2024 12:05:50 +0000 Subject: [PATCH 10/17] Update making product netCDF for using new class --- .../create_netcdf.py | 70 +++++++++---------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/src/ncas_amof_netcdf_template/create_netcdf.py b/src/ncas_amof_netcdf_template/create_netcdf.py index c8901fc..963db67 100644 --- a/src/ncas_amof_netcdf_template/create_netcdf.py +++ b/src/ncas_amof_netcdf_template/create_netcdf.py @@ -113,17 +113,32 @@ def add_attributes( if value["Fixed Value"] != "": ncfile.setncattr(key, value["Fixed Value"]) elif key == "source": - ncfile.setncattr(key, instrument_file_info.instrument_data["Descriptor"]) + if "Descriptor" in instrument_file_info.instrument_data.keys(): + ncfile.setncattr(key, instrument_file_info.instrument_data["Descriptor"]) + else: + ncfile.setncattr(key, "n/a") elif key == "institution": ncfile.setncattr(key, "National Centre for Atmospheric Science (NCAS)") elif key == "platform": - ncfile.setncattr(key, instrument_file_info.instrument_data["Mobile/Fixed (loc)"]) + if "Mobile/Fixed (loc)" in instrument_file_info.instrument_data.keys(): + ncfile.setncattr(key, instrument_file_info.instrument_data["Mobile/Fixed (loc)"]) + else: + ncfile.setncattr(key, "n/a") elif key == "instrument_manufacturer": - ncfile.setncattr(key, instrument_file_info.instrument_data["Manufacturer"]) + if "Manufacturer" in instrument_file_info.instrument_data.keys(): + ncfile.setncattr(key, instrument_file_info.instrument_data["Manufacturer"]) + else: + ncfile.setncattr(key, "n/a") elif key == "instrument_model": - ncfile.setncattr(key, instrument_file_info.instrument_data["Model No."]) + if "Model No." in instrument_file_info.instrument_data.keys(): + ncfile.setncattr(key, instrument_file_info.instrument_data["Model No."]) + else: + ncfile.setncattr(key, "n/a") elif key == "instrument_serial_number": - ncfile.setncattr(key, instrument_file_info.instrument_data["Serial Number"]) + if "Serial Number" in instrument_file_info.instrument_data.keys(): + ncfile.setncattr(key, instrument_file_info.instrument_data["Serial Number"]) + else: + ncfile.setncattr(key, "n/a") elif key == "amf_vocabularies_release": if use_local_files: attrsdict = tsv2dict.tsv2dict_attrs( @@ -707,34 +722,21 @@ def make_product_netcdf( if date is None: date = dt.datetime.now(dt.timezone.utc).strftime("%Y%m%d") - product_dict = tsv2dict.product_dict( - product, - platform=platform, - deployment_loc=deployment_loc, - use_local_files=use_local_files, - tag=tag, - ) + product_file_info = FileInfo(instrument_name, product, deployment_mode=deployment_loc, tag=tag) + product_file_info.get_common_info() + product_file_info.get_deployment_info() + product_file_info.get_product_info() + + product_file_info.instrument_data["Mobile/Fixed (loc)"] = platform # make sure we have dimension lengths for all expected dimensions - all_dimensions = [] - dimlengths = {} - for key, val in product_dict.items(): - if "dimensions" in val.keys() and (key == product or key == "common"): - for dim in list(val["dimensions"].keys()): - if dim not in all_dimensions: - all_dimensions.append(dim) - if ( - isinstance(val["dimensions"][dim]["Length"], int) - or "<" not in val["dimensions"][dim]["Length"] - ): - dimlengths[dim] = int(val["dimensions"][dim]["Length"]) - for key, value in dimension_lengths.items(): - if key not in dimlengths.keys(): - dimlengths[key] = value - for dim in all_dimensions: - if dim not in dimlengths.keys(): - length = input(f"Enter length for dimension {dim}: ") - dimlengths[dim] = int(length) + for key, val in product_file_info.dimensions.items(): + if not isinstance(val["Length"], int): + if key in dimension_lengths.keys(): + val["Length"] = int(dimension_lengths[key]) + else: + length = input(f"Enter length for dimension {key}: ") + val["Length"] = int(length) # make the files if return_open: @@ -742,9 +744,8 @@ def make_product_netcdf( instrument_name, product, date, - product_dict, + instrument_file_info=product_file_info, loc=deployment_loc, - dimension_lengths=dimlengths, verbose=verbose, options=options, product_version=product_version, @@ -763,9 +764,8 @@ def make_product_netcdf( instrument_name, product, date, - product_dict, + instrument_file_info=product_file_info, loc=deployment_loc, - dimension_lengths=dimlengths, verbose=verbose, options=options, product_version=product_version, From 611021edfb56e86ffa171b8a4c78342ef54a337f Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 19 Nov 2024 13:10:27 +0000 Subject: [PATCH 11/17] Remove unnecessary f strings and change comparison to None --- .../create_netcdf.py | 116 +++++++++++------- src/ncas_amof_netcdf_template/file_info.py | 102 ++++++++------- 2 files changed, 136 insertions(+), 82 deletions(-) diff --git a/src/ncas_amof_netcdf_template/create_netcdf.py b/src/ncas_amof_netcdf_template/create_netcdf.py index 963db67..36b2a3b 100644 --- a/src/ncas_amof_netcdf_template/create_netcdf.py +++ b/src/ncas_amof_netcdf_template/create_netcdf.py @@ -21,9 +21,12 @@ def add_attributes( ncfile: Dataset, - instrument_dict: Optional[dict[ - str, dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]] - ]] = None, + instrument_dict: Optional[ + dict[ + str, + dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]], + ] + ] = None, product: Optional[str] = None, created_time: Optional[str] = None, location: Optional[str] = None, @@ -81,7 +84,7 @@ def add_attributes( instrument_dict["info"]["instrument_name"], product, loc, - tag + tag, ) else: warnings.warn( @@ -114,19 +117,25 @@ def add_attributes( ncfile.setncattr(key, value["Fixed Value"]) elif key == "source": if "Descriptor" in instrument_file_info.instrument_data.keys(): - ncfile.setncattr(key, instrument_file_info.instrument_data["Descriptor"]) + ncfile.setncattr( + key, instrument_file_info.instrument_data["Descriptor"] + ) else: ncfile.setncattr(key, "n/a") elif key == "institution": ncfile.setncattr(key, "National Centre for Atmospheric Science (NCAS)") elif key == "platform": if "Mobile/Fixed (loc)" in instrument_file_info.instrument_data.keys(): - ncfile.setncattr(key, instrument_file_info.instrument_data["Mobile/Fixed (loc)"]) + ncfile.setncattr( + key, instrument_file_info.instrument_data["Mobile/Fixed (loc)"] + ) else: ncfile.setncattr(key, "n/a") elif key == "instrument_manufacturer": if "Manufacturer" in instrument_file_info.instrument_data.keys(): - ncfile.setncattr(key, instrument_file_info.instrument_data["Manufacturer"]) + ncfile.setncattr( + key, instrument_file_info.instrument_data["Manufacturer"] + ) else: ncfile.setncattr(key, "n/a") elif key == "instrument_model": @@ -136,7 +145,9 @@ def add_attributes( ncfile.setncattr(key, "n/a") elif key == "instrument_serial_number": if "Serial Number" in instrument_file_info.instrument_data.keys(): - ncfile.setncattr(key, instrument_file_info.instrument_data["Serial Number"]) + ncfile.setncattr( + key, instrument_file_info.instrument_data["Serial Number"] + ) else: ncfile.setncattr(key, "n/a") elif key == "amf_vocabularies_release": @@ -169,9 +180,12 @@ def add_attributes( def add_dimensions( ncfile: Dataset, - instrument_dict: Optional[dict[ - str, dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]] - ]] = None, + instrument_dict: Optional[ + dict[ + str, + dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]], + ] + ] = None, product: Optional[str] = None, dimension_lengths: Optional[dict[str, int]] = None, instrument_file_info: Optional[FileInfo] = None, @@ -205,7 +219,7 @@ def add_dimensions( ) if product is None or dimension_lengths is None: msg = ( - "If instrument_dict is still being used, 'product' and" + "If instrument_dict is still being used, 'product' and" " 'dimension_lengths' must be given. Preferred option is to switch" " to using instrument_file_info instead." ) @@ -221,7 +235,9 @@ def add_dimensions( if instrument_file_info is not None: for dim_name in instrument_file_info.dimensions.keys(): - ncfile.createDimension(dim_name, instrument_file_info.dimensions[dim_name]["Length"]) + ncfile.createDimension( + dim_name, instrument_file_info.dimensions[dim_name]["Length"] + ) elif dimension_lengths is not None: for key, length in dimension_lengths.items(): @@ -229,14 +245,17 @@ def add_dimensions( key in instrument_dict["common"]["dimensions"].keys() or key in instrument_dict[product]["dimensions"].keys() ): - ncfile.createDimension(key, length) + ncfile.createDimension(key, length) def add_variables( ncfile: Dataset, - instrument_dict: Optional[dict[ - str, dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]] - ]] = None, + instrument_dict: Optional[ + dict[ + str, + dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]], + ] + ] = None, product: Optional[str] = None, instrument_file_info: Optional[FileInfo] = None, verbose: int = 0, @@ -390,11 +409,7 @@ def add_variables( and ("EXAMPLE" in mdatvalue or mdatvalue == "") ): # don't add empty attributes - if ( - isinstance(mdatvalue, str) - and mdatvalue == "" - and verbose >= 1 - ): + if isinstance(mdatvalue, str) and mdatvalue == "" and verbose >= 1: print( f"WARN: No value for attribute {mdatkey} " "for variable {key}, attribute not added" @@ -407,9 +422,12 @@ def make_netcdf( instrument: str, product: str, time: str, - instrument_dict: Optional[dict[ - str, dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]] - ]] = None, + instrument_dict: Optional[ + dict[ + str, + dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]], + ] + ] = None, loc: str = "land", dimension_lengths: dict[str, int] = {}, verbose: int = 0, @@ -434,8 +452,8 @@ def make_netcdf( time (str): time that the data represents, in YYYYmmdd-HHMMSS format or as much of as required. instrument_dict (dict or None): -DEPRECATED- information about the instrument - from tsv2dict.instrument_dict. Use - instrument_file_info argument instead. Will be + from tsv2dict.instrument_dict. Use + instrument_file_info argument instead. Will be remved in version 2.7.0. instrument_file_info (FileInfo or None): information about the instrument, from file_info.FileInfo. @@ -500,7 +518,7 @@ def make_netcdf( instrument_dict["info"]["instrument_name"], product, loc, - tag + tag, ) else: warnings.warn( @@ -550,14 +568,22 @@ def make_netcdf( var_dict[var]["shuffle"] = True if ( - instrument_file_info.instrument_data["Mobile/Fixed (loc)"].split("-")[0].strip().lower() + instrument_file_info.instrument_data["Mobile/Fixed (loc)"] + .split("-")[0] + .strip() + .lower() == "fixed" ): platform = ( - instrument_file_info.instrument_data["Mobile/Fixed (loc)"].split("-")[-1].strip().lower() + instrument_file_info.instrument_data["Mobile/Fixed (loc)"] + .split("-")[-1] + .strip() + .lower() ) else: - platform = instrument_file_info.instrument_data["Mobile/Fixed (loc)"].strip().lower() + platform = ( + instrument_file_info.instrument_data["Mobile/Fixed (loc)"].strip().lower() + ) if options != "": no_options = len(options.split("_")) @@ -722,7 +748,9 @@ def make_product_netcdf( if date is None: date = dt.datetime.now(dt.timezone.utc).strftime("%Y%m%d") - product_file_info = FileInfo(instrument_name, product, deployment_mode=deployment_loc, tag=tag) + product_file_info = FileInfo( + instrument_name, product, deployment_mode=deployment_loc, tag=tag + ) product_file_info.get_common_info() product_file_info.get_deployment_info() product_file_info.get_product_info() @@ -867,7 +895,7 @@ def main( if isinstance(products, str): products = [products] - elif products == None: + elif products is None: products = list_products(instrument=instrument, tag=tag) warnings.warn( "Passing 'None' as argument for 'products' is being deprecated. Use single" @@ -875,7 +903,7 @@ def main( f"{instrument} are {products}. The option to use 'None' will be removed" " from version 2.7.0.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) elif isinstance(products, list): warnings.warn( @@ -886,11 +914,12 @@ def main( stacklevel=2, ) - ncfiles = [] - + for product in products: - instrument_file_info = FileInfo(instrument, product, deployment_mode = loc, tag = tag) + instrument_file_info = FileInfo( + instrument, product, deployment_mode=loc, tag=tag + ) instrument_file_info.get_product_info() instrument_file_info.get_deployment_info() instrument_file_info.get_instrument_info() @@ -898,7 +927,12 @@ def main( # check if platform needs changing if platform is not None: - if "mobile" not in instrument_file_info.instrument_data["Mobile/Fixed (loc)"].lower(): + if ( + "mobile" + not in instrument_file_info.instrument_data[ + "Mobile/Fixed (loc)" + ].lower() + ): print( "[WARNING]: Changing platform for an " f"observatory instrument {instrument}." @@ -921,7 +955,7 @@ def main( instrument, product, date, - instrument_file_info = instrument_file_info, + instrument_file_info=instrument_file_info, verbose=verbose, options=options, product_version=product_version, @@ -934,13 +968,13 @@ def main( complevel=complevel, shuffle=shuffle, ) - ) + ) else: make_netcdf( instrument, product, date, - instrument_file_info = instrument_file_info, + instrument_file_info=instrument_file_info, verbose=verbose, options=options, product_version=product_version, diff --git a/src/ncas_amof_netcdf_template/file_info.py b/src/ncas_amof_netcdf_template/file_info.py index 752dd70..f7f8069 100644 --- a/src/ncas_amof_netcdf_template/file_info.py +++ b/src/ncas_amof_netcdf_template/file_info.py @@ -14,6 +14,7 @@ class FileInfo: """ Class that will gather and hold all the data to create netCDF file with """ + def __init__( self, instrument_name: str, @@ -28,9 +29,9 @@ def __init__( instrument_name (str): name of the instrument data_product (str): name of data product to use deployment_mode (str): value of the 'deployment_mode' global attribute, and - different variables may be required depending on + different variables may be required depending on value. One of "land", "sea", "air", or "trajectory". - tag (str): tagged release version of AMF_CVs, or "latest" to get most + tag (str): tagged release version of AMF_CVs, or "latest" to get most recent version. Default is "latest". """ if deployment_mode not in ["land", "sea", "air", "trajectory"]: @@ -42,7 +43,9 @@ def __init__( self.deployment_mode = deployment_mode self.tag = tag if self.tag == "latest": - self.ncas_gen_version = self._get_github_latest_version("https://github.com/ncasuk/AMF_CVs") + self.ncas_gen_version = self._get_github_latest_version( + "https://github.com/ncasuk/AMF_CVs" + ) elif self._check_github_cvs_version_exists(release_tag=tag): self.ncas_gen_version = tag else: @@ -53,16 +56,13 @@ def __init__( self.variables = {} self.instrument_data = {} - def __repr__(self) -> str: class_name = type(self).__name__ return f"{class_name}(instrument_name='{self.instrument_name}', data_product='{self.data_product}', deployment_mode='{self.deployment_mode}', tag='{self.tag}') - ncas_gen_version = '{self.ncas_gen_version}" - def __str__(self) -> str: return f"Class with information for '{self.instrument_name}' instrument and '{self.data_product}' data product" - def get_common_info(self) -> None: """ Get all the common variables, dimensions and attributes, and add to class @@ -70,7 +70,6 @@ def get_common_info(self) -> None: """ self._tsv2dict_attrs(self._attributes_tsv_url(self.deployment_mode)) - def get_deployment_info(self) -> None: """ Get all the variables, dimensions and attributes related to the deployment @@ -79,7 +78,6 @@ def get_deployment_info(self) -> None: self._tsv2dict_dims(self._dimensions_tsv_url(self.deployment_mode)) self._tsv2dict_vars(self._variables_tsv_url(self.deployment_mode)) - def get_product_info(self) -> None: """ Get all the variables, dimensions and attributes related to the data product, @@ -89,7 +87,6 @@ def get_product_info(self) -> None: self._tsv2dict_dims(self._dimensions_tsv_url(self.data_product)) self._tsv2dict_vars(self._variables_tsv_url(self.data_product)) - def get_instrument_info(self) -> None: """ Get all the attribute data related to a defined instrument in the @@ -100,7 +97,6 @@ def get_instrument_info(self) -> None: else: self._tsv2dict_instruments(self._get_community_instrument_tsv_url()) - def _tsv2dict_vars(self, tsv_file: str) -> None: """ For a given tsv file from the AMF_CVs GitHub repo, add dictionary of @@ -135,11 +131,12 @@ def _tsv2dict_vars(self, tsv_file: str) -> None: f"EXAMPLE: {current_line['example value']}" ) else: - current_var_dict[current_line["Attribute"]] = current_line["Value"] + current_var_dict[current_line["Attribute"]] = current_line[ + "Value" + ] self.variables[current_var] = current_var_dict - def _tsv2dict_dims(self, tsv_file: str) -> None: """ For a given tsv file from the AMF_CVs GitHub repo, add dictionary of dimensions @@ -159,7 +156,6 @@ def _tsv2dict_dims(self, tsv_file: str) -> None: dim_dict["Length"] = int(dim_dict["Length"]) self.dimensions[dim_name] = dim_dict - def _tsv2dict_attrs(self, tsv_file: str) -> None: """ For a given tsv file from the AMF_CVs GitHub repo, add dictionary of attributes @@ -177,7 +173,6 @@ def _tsv2dict_attrs(self, tsv_file: str) -> None: attr_name = attr_dict.pop("Name") self.attributes[attr_name] = attr_dict - def _tsv2dict_instruments(self, tsv_file: str) -> None: """ For a given tsv file from the ncas-data-instrument-vocabs repo, add dictionary @@ -188,19 +183,31 @@ def _tsv2dict_instruments(self, tsv_file: str) -> None: """ if self._check_website_exists(tsv_file): df_instruments = pd.read_csv(tsv_file, sep="\t") - df_instrument = df_instruments.where(df_instruments["New Instrument Name"] == self.instrument_name).dropna(subset=["New Instrument Name"]) + df_instrument = df_instruments.where( + df_instruments["New Instrument Name"] == self.instrument_name + ).dropna(subset=["New Instrument Name"]) if len(df_instrument) == 0: - print(f"[WARNING] No details found for instrument {self.instrument_name}...") + print( + f"[WARNING] No details found for instrument {self.instrument_name}..." + ) else: for inst in df_instrument.iloc: instrument_dict = inst.to_dict() - data_products = re.split(r",| |\|", instrument_dict["Data Product(s)"]) + data_products = re.split( + r",| |\|", instrument_dict["Data Product(s)"] + ) data_products = list(filter(None, data_products)) instrument_dict["Data Product(s)"] = data_products - for i in ["Manufacturer", "Model No.", "Serial Number", "Data Product(s)", "Mobile/Fixed (loc)", "Descriptor"]: + for i in [ + "Manufacturer", + "Model No.", + "Serial Number", + "Data Product(s)", + "Mobile/Fixed (loc)", + "Descriptor", + ]: self.instrument_data[i] = instrument_dict[i] - def _check_instrument_has_product(self) -> bool: """ Check instrument has defined data product associated with it @@ -212,7 +219,6 @@ def _check_instrument_has_product(self) -> bool: self.get_instrument_info() return self.data_product in self.instrument_data["Data Product(s)"] - def _get_github_latest_version(self, url: str) -> str: """ Get the tag of the latest release version @@ -225,7 +231,6 @@ def _get_github_latest_version(self, url: str) -> str: """ return requests.get(f"{url}/releases/latest").url.split("/")[-1] - def _check_website_exists(self, url: str) -> bool: """ Check website exists and is up @@ -239,8 +244,9 @@ def _check_website_exists(self, url: str) -> bool: status = requests.get(url).status_code return status == 200 - - def _check_github_cvs_version_exists(self, release_tag: Optional[str] = None) -> bool: + def _check_github_cvs_version_exists( + self, release_tag: Optional[str] = None + ) -> bool: """ Check the requested tagged version of AMF_CVs exists on GitHub """ @@ -248,7 +254,6 @@ def _check_github_cvs_version_exists(self, release_tag: Optional[str] = None) -> release_tag = self.ncas_gen_version url = f"https://github.com/ncasuk/AMF_CVs/releases/{release_tag}" return self._check_website_exists(url) - def _dimensions_tsv_url(self, obj: str) -> str: """ @@ -261,10 +266,13 @@ def _dimensions_tsv_url(self, obj: str) -> str: str: URL location of dimension tsv file """ file_loc = f"https://raw.githubusercontent.com/ncasuk/AMF_CVs/{self.ncas_gen_version}/product-definitions/tsv" - path, option = (obj, "specific") if obj not in ["land", "sea", "air", "trajectory"] else ("_common", obj) + path, option = ( + (obj, "specific") + if obj not in ["land", "sea", "air", "trajectory"] + else ("_common", obj) + ) return f"{file_loc}/{path}/dimensions-{option}.tsv" - def _variables_tsv_url(self, obj: str) -> str: """ Get the URL for the tsv files for variables @@ -276,10 +284,13 @@ def _variables_tsv_url(self, obj: str) -> str: str: URL location of variable tsv file """ file_loc = f"https://raw.githubusercontent.com/ncasuk/AMF_CVs/{self.ncas_gen_version}/product-definitions/tsv" - path, option = (obj, "specific") if obj not in ["land", "sea", "air", "trajectory"] else ("_common", obj) + path, option = ( + (obj, "specific") + if obj not in ["land", "sea", "air", "trajectory"] + else ("_common", obj) + ) return f"{file_loc}/{path}/variables-{option}.tsv" - def _attributes_tsv_url(self, obj: str) -> str: """ Get the URL for the tsv files for attributes @@ -291,25 +302,35 @@ def _attributes_tsv_url(self, obj: str) -> str: str: URL location of attribute tsv file """ file_loc = f"https://raw.githubusercontent.com/ncasuk/AMF_CVs/{self.ncas_gen_version}/product-definitions/tsv" - path, option = (obj, "-specific") if obj not in ["land", "sea", "air", "trajectory"] else ("_common", "") + path, option = ( + (obj, "-specific") + if obj not in ["land", "sea", "air", "trajectory"] + else ("_common", "") + ) return f"{file_loc}/{path}/global-attributes{option}.tsv" - def _get_ncas_instrument_tsv_url(self) -> str: """ Get the URL for the tsv file of NCAS instruments """ - vocab_version = self._get_github_latest_version("https://github.com/ncasuk/ncas-data-instrument-vocabs") - file_loc = f"https://raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs" + vocab_version = self._get_github_latest_version( + "https://github.com/ncasuk/ncas-data-instrument-vocabs" + ) + file_loc = ( + "https://raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs" + ) return f"{file_loc}/{vocab_version}/product-definitions/tsv/_instrument_vocabs/ncas-instrument-name-and-descriptors.tsv" - def _get_community_instrument_tsv_url(self) -> str: """ Get the URL for the tsv file of NCAS instruments """ - vocab_version = self._get_github_latest_version("https://github.com/ncasuk/ncas-data-instrument-vocabs") - file_loc = f"https://raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs" + vocab_version = self._get_github_latest_version( + "https://github.com/ncasuk/ncas-data-instrument-vocabs" + ) + file_loc = ( + "https://raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs" + ) return f"{file_loc}/{vocab_version}/product-definitions/tsv/_instrument_vocabs/community-instrument-name-and-descriptors.tsv" @@ -329,7 +350,7 @@ def convert_instrument_dict_to_file_info( instrument_dict (dict): Dictionary made by tsv2dict.instrument_dict instrument_name (str): Name of the instrument data_product (str): Data product of data for netCDF file - deployment_mode (str): Deployment mode of instrument. One of "land", "sea", + deployment_mode (str): Deployment mode of instrument. One of "land", "sea", "air", "trajectory" tag (str): Tag release of AMF_CVs being used @@ -349,12 +370,11 @@ def convert_instrument_dict_to_file_info( instrument_file_info.variables[var_name] = var_dict if "info" in instrument_dict.keys(): for key, value in instrument_dict["info"].items(): - if key == "Mobile/Fixed (loc)" and value.split("-")[0].strip().lower() == "fixed": + if ( + key == "Mobile/Fixed (loc)" + and value.split("-")[0].strip().lower() == "fixed" + ): value = value.split("-")[1].strip() instrument_file_info.instrument_data[key] = value return instrument_file_info - - - - From 194c33b7fa7262a5ba5d10d2f75f3a3db643cb75 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Nov 2024 13:15:19 +0000 Subject: [PATCH 12/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_create_netcdf.py | 4 ++-- tests/test_tsv2dict.py | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/tests/test_create_netcdf.py b/tests/test_create_netcdf.py index a3ef86a..fd3ea28 100644 --- a/tests/test_create_netcdf.py +++ b/tests/test_create_netcdf.py @@ -50,7 +50,7 @@ def test_add_attributes(): "Model No.": "Model Number", "Serial Number": "Serial Number", "instrument_name": "instrument-name", - "Mobile/Fixed (loc)": "location1" + "Mobile/Fixed (loc)": "location1", }, "common": { "attributes": { @@ -227,7 +227,7 @@ def test_add_variables(): }, "info": { "instrument_name": "instrument-name", - } + }, } product = "product1" diff --git a/tests/test_tsv2dict.py b/tests/test_tsv2dict.py index 2de63bc..831fc09 100644 --- a/tests/test_tsv2dict.py +++ b/tests/test_tsv2dict.py @@ -142,9 +142,18 @@ def test_tsv2dict_instruments(): # Check the result assert result == { - "instrument1": {"Data Product(s)": ["product1", "product2"], "instrument_name": "instrument1"}, - "instrument2": {"Data Product(s)": ["product3", "product4"], "instrument_name": "instrument2"}, - "instrument3": {"Data Product(s)": ["product5", "product6"], "instrument_name": "instrument3"}, + "instrument1": { + "Data Product(s)": ["product1", "product2"], + "instrument_name": "instrument1", + }, + "instrument2": { + "Data Product(s)": ["product3", "product4"], + "instrument_name": "instrument2", + }, + "instrument3": { + "Data Product(s)": ["product5", "product6"], + "instrument_name": "instrument3", + }, } # Delete the temporary file From a004e4bf832528c4968088efa314d6ad4597222a Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 19 Nov 2024 14:09:14 +0000 Subject: [PATCH 13/17] Add a few tests on adding attributes --- tests/test_create_netcdf.py | 50 ++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/tests/test_create_netcdf.py b/tests/test_create_netcdf.py index fd3ea28..16b4619 100644 --- a/tests/test_create_netcdf.py +++ b/tests/test_create_netcdf.py @@ -5,6 +5,7 @@ import tempfile import getpass import socket +import datetime as dt import ncas_amof_netcdf_template as nant @@ -35,7 +36,14 @@ def test_main_process(): os.remove("ncas-aws-10_somewhere-else_20221117_surface-met_v1.0.nc") -def test_add_attributes(): +@pytest.mark.parametrize( + "created_time", + [ + "2022-01-01T00:00:00Z", + None, + ], +) +def test_add_attributes(created_time): # Create a temporary file for testing temp_file = tempfile.NamedTemporaryFile(delete=False) temp_file.close() @@ -76,7 +84,6 @@ def test_add_attributes(): } product = "product1" - created_time = "2022-01-01T00:00:00Z" location = "location1" loc = "land" use_local_files = None @@ -119,8 +126,15 @@ def test_add_attributes(): ncfile.getncattr("amf_vocabularies_release") == "https://github.com/ncasuk/AMF_CVs/releases/tag/v2.0.0" ) - assert ncfile.getncattr("history") == history_text - assert ncfile.getncattr("last_revised_date") == created_time + if created_time is not None: + assert ncfile.getncattr("history") == history_text + assert ncfile.getncattr("last_revised_date") == created_time + else: + # account for possibility of running test more than one second after making + # file, hoping not to be unlucky enough to run just before midnight + assert ncfile.getncattr("last_revised_date").startswith( + dt.datetime.now(tz=dt.UTC).strftime("%Y%m%dT") + ) assert ncfile.getncattr("deployment_mode") == loc assert ( ncfile.getncattr("defined_attribute") @@ -133,6 +147,34 @@ def test_add_attributes(): ncfile.close() os.remove(temp_file.name) + with pytest.raises(ValueError, match=r".+'product' must be given.+"): + # Create a temporary file for testing + temp_file = tempfile.NamedTemporaryFile(delete=False) + temp_file.close() + + # Create a netCDF file for testing + ncfile = Dataset(temp_file.name, "w", format="NETCDF4") + nant.create_netcdf.add_attributes( + ncfile, + instrument_dict=instrument_dict, + created_time=created_time, + location=location, + loc=loc, + use_local_files=use_local_files, + tag=tag, + ) + + with pytest.raises(ValueError, match="No instrument file info given"): + # Create a temporary file for testing + temp_file = tempfile.NamedTemporaryFile(delete=False) + temp_file.close() + + # Create a netCDF file for testing + ncfile = Dataset(temp_file.name, "w", format="NETCDF4") + nant.create_netcdf.add_attributes( + ncfile, + ) + def test_add_dimensions(): # Create a temporary file for testing From 48a5e18e53d9c36465fc7989aed8b7aab66754f7 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 19 Nov 2024 14:13:18 +0000 Subject: [PATCH 14/17] Use full reference to utc timezone --- src/ncas_amof_netcdf_template/create_netcdf.py | 2 +- tests/test_create_netcdf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ncas_amof_netcdf_template/create_netcdf.py b/src/ncas_amof_netcdf_template/create_netcdf.py index 36b2a3b..fb987a5 100644 --- a/src/ncas_amof_netcdf_template/create_netcdf.py +++ b/src/ncas_amof_netcdf_template/create_netcdf.py @@ -110,7 +110,7 @@ def add_attributes( raise ValueError(msg) if created_time is None: - created_time = dt.datetime.now(tz=dt.UTC).strftime("%Y%m%dT%H%M%S") + created_time = dt.datetime.now(tz=dt.timezone.utc).strftime("%Y%m%dT%H%M%S") for key, value in instrument_file_info.attributes.items(): if value["Fixed Value"] != "": diff --git a/tests/test_create_netcdf.py b/tests/test_create_netcdf.py index 16b4619..18da081 100644 --- a/tests/test_create_netcdf.py +++ b/tests/test_create_netcdf.py @@ -133,7 +133,7 @@ def test_add_attributes(created_time): # account for possibility of running test more than one second after making # file, hoping not to be unlucky enough to run just before midnight assert ncfile.getncattr("last_revised_date").startswith( - dt.datetime.now(tz=dt.UTC).strftime("%Y%m%dT") + dt.datetime.now(tz=dt.timezone.utc).strftime("%Y%m%dT") ) assert ncfile.getncattr("deployment_mode") == loc assert ( From 40114281f4c379b5f2e57eb6061948304a7e6bef Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 19 Nov 2024 14:18:44 +0000 Subject: [PATCH 15/17] Add API docs for file_info --- docs/source/api.rst | 1 + docs/source/file_info.rst | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 docs/source/file_info.rst diff --git a/docs/source/api.rst b/docs/source/api.rst index 2cf00de..f9851cf 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -7,6 +7,7 @@ API :recursive: ncas_amof_netcdf_template.create_netcdf + ncas_amof_netcdf_template.file_info ncas_amof_netcdf_template.remove_empty_variables ncas_amof_netcdf_template.tsv2dict ncas_amof_netcdf_template.util diff --git a/docs/source/file_info.rst b/docs/source/file_info.rst new file mode 100644 index 0000000..eb2921b --- /dev/null +++ b/docs/source/file_info.rst @@ -0,0 +1,5 @@ +file_info +--------- + +.. automodule:: ncas_amof_netcdf_template.file_info + :members: From 3790f83f2466bdb2d8bd12bc6bded3dccce7c5c4 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 19 Nov 2024 14:31:24 +0000 Subject: [PATCH 16/17] Update generated doc files --- .../ncas_amof_netcdf_template.file_info.rst | 26 +++++++++++++++++++ .../ncas_amof_netcdf_template.util.rst | 1 + .../ncas_amof_netcdf_template.values.rst | 1 + 3 files changed, 28 insertions(+) create mode 100644 docs/source/~generated/ncas_amof_netcdf_template.file_info.rst diff --git a/docs/source/~generated/ncas_amof_netcdf_template.file_info.rst b/docs/source/~generated/ncas_amof_netcdf_template.file_info.rst new file mode 100644 index 0000000..8f2f470 --- /dev/null +++ b/docs/source/~generated/ncas_amof_netcdf_template.file_info.rst @@ -0,0 +1,26 @@ +ncas\_amof\_netcdf\_template.file\_info +======================================= + +.. automodule:: ncas_amof_netcdf_template.file_info + + + + + + + + .. rubric:: Functions + + .. autosummary:: + + convert_instrument_dict_to_file_info + + + + + + .. rubric:: Classes + + .. autosummary:: + + FileInfo diff --git a/docs/source/~generated/ncas_amof_netcdf_template.util.rst b/docs/source/~generated/ncas_amof_netcdf_template.util.rst index 5a45bbe..a2d2861 100644 --- a/docs/source/~generated/ncas_amof_netcdf_template.util.rst +++ b/docs/source/~generated/ncas_amof_netcdf_template.util.rst @@ -14,6 +14,7 @@ .. autosummary:: add_metadata_to_netcdf + change_qc_flags check_float check_int check_type_convert diff --git a/docs/source/~generated/ncas_amof_netcdf_template.values.rst b/docs/source/~generated/ncas_amof_netcdf_template.values.rst index 499693a..5690c06 100644 --- a/docs/source/~generated/ncas_amof_netcdf_template.values.rst +++ b/docs/source/~generated/ncas_amof_netcdf_template.values.rst @@ -20,3 +20,4 @@ get_community_instruments_url get_instruments_url get_latest_CVs_version + get_latest_instrument_CVs_version From f4cdd642cce1bd19151cbf0d7a0f05a9708d1df2 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 19 Nov 2024 14:57:52 +0000 Subject: [PATCH 17/17] Remove incorrect warning message --- src/ncas_amof_netcdf_template/create_netcdf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ncas_amof_netcdf_template/create_netcdf.py b/src/ncas_amof_netcdf_template/create_netcdf.py index fb987a5..61e237b 100644 --- a/src/ncas_amof_netcdf_template/create_netcdf.py +++ b/src/ncas_amof_netcdf_template/create_netcdf.py @@ -327,7 +327,6 @@ def add_variables( # and forget about that variable if "dimension" not in tmp_value.keys(): print(f"WARN: No dimensions for variable {key}") - print("Variable not added file") var_dims = () else: var_dims = tmp_value.pop("dimension")