diff --git a/src/ncas_amof_netcdf_template/create_netcdf.py b/src/ncas_amof_netcdf_template/create_netcdf.py index 963db67..36b2a3b 100644 --- a/src/ncas_amof_netcdf_template/create_netcdf.py +++ b/src/ncas_amof_netcdf_template/create_netcdf.py @@ -21,9 +21,12 @@ def add_attributes( ncfile: Dataset, - instrument_dict: Optional[dict[ - str, dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]] - ]] = None, + instrument_dict: Optional[ + dict[ + str, + dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]], + ] + ] = None, product: Optional[str] = None, created_time: Optional[str] = None, location: Optional[str] = None, @@ -81,7 +84,7 @@ def add_attributes( instrument_dict["info"]["instrument_name"], product, loc, - tag + tag, ) else: warnings.warn( @@ -114,19 +117,25 @@ def add_attributes( ncfile.setncattr(key, value["Fixed Value"]) elif key == "source": if "Descriptor" in instrument_file_info.instrument_data.keys(): - ncfile.setncattr(key, instrument_file_info.instrument_data["Descriptor"]) + ncfile.setncattr( + key, instrument_file_info.instrument_data["Descriptor"] + ) else: ncfile.setncattr(key, "n/a") elif key == "institution": ncfile.setncattr(key, "National Centre for Atmospheric Science (NCAS)") elif key == "platform": if "Mobile/Fixed (loc)" in instrument_file_info.instrument_data.keys(): - ncfile.setncattr(key, instrument_file_info.instrument_data["Mobile/Fixed (loc)"]) + ncfile.setncattr( + key, instrument_file_info.instrument_data["Mobile/Fixed (loc)"] + ) else: ncfile.setncattr(key, "n/a") elif key == "instrument_manufacturer": if "Manufacturer" in instrument_file_info.instrument_data.keys(): - ncfile.setncattr(key, instrument_file_info.instrument_data["Manufacturer"]) + ncfile.setncattr( + key, instrument_file_info.instrument_data["Manufacturer"] + ) else: ncfile.setncattr(key, "n/a") elif key == "instrument_model": @@ -136,7 +145,9 @@ def add_attributes( ncfile.setncattr(key, "n/a") elif key == "instrument_serial_number": if "Serial Number" in instrument_file_info.instrument_data.keys(): - ncfile.setncattr(key, instrument_file_info.instrument_data["Serial Number"]) + ncfile.setncattr( + key, instrument_file_info.instrument_data["Serial Number"] + ) else: ncfile.setncattr(key, "n/a") elif key == "amf_vocabularies_release": @@ -169,9 +180,12 @@ def add_attributes( def add_dimensions( ncfile: Dataset, - instrument_dict: Optional[dict[ - str, dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]] - ]] = None, + instrument_dict: Optional[ + dict[ + str, + dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]], + ] + ] = None, product: Optional[str] = None, dimension_lengths: Optional[dict[str, int]] = None, instrument_file_info: Optional[FileInfo] = None, @@ -205,7 +219,7 @@ def add_dimensions( ) if product is None or dimension_lengths is None: msg = ( - "If instrument_dict is still being used, 'product' and" + "If instrument_dict is still being used, 'product' and" " 'dimension_lengths' must be given. Preferred option is to switch" " to using instrument_file_info instead." ) @@ -221,7 +235,9 @@ def add_dimensions( if instrument_file_info is not None: for dim_name in instrument_file_info.dimensions.keys(): - ncfile.createDimension(dim_name, instrument_file_info.dimensions[dim_name]["Length"]) + ncfile.createDimension( + dim_name, instrument_file_info.dimensions[dim_name]["Length"] + ) elif dimension_lengths is not None: for key, length in dimension_lengths.items(): @@ -229,14 +245,17 @@ def add_dimensions( key in instrument_dict["common"]["dimensions"].keys() or key in instrument_dict[product]["dimensions"].keys() ): - ncfile.createDimension(key, length) + ncfile.createDimension(key, length) def add_variables( ncfile: Dataset, - instrument_dict: Optional[dict[ - str, dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]] - ]] = None, + instrument_dict: Optional[ + dict[ + str, + dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]], + ] + ] = None, product: Optional[str] = None, instrument_file_info: Optional[FileInfo] = None, verbose: int = 0, @@ -390,11 +409,7 @@ def add_variables( and ("EXAMPLE" in mdatvalue or mdatvalue == "") ): # don't add empty attributes - if ( - isinstance(mdatvalue, str) - and mdatvalue == "" - and verbose >= 1 - ): + if isinstance(mdatvalue, str) and mdatvalue == "" and verbose >= 1: print( f"WARN: No value for attribute {mdatkey} " "for variable {key}, attribute not added" @@ -407,9 +422,12 @@ def make_netcdf( instrument: str, product: str, time: str, - instrument_dict: Optional[dict[ - str, dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]] - ]] = None, + instrument_dict: Optional[ + dict[ + str, + dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]], + ] + ] = None, loc: str = "land", dimension_lengths: dict[str, int] = {}, verbose: int = 0, @@ -434,8 +452,8 @@ def make_netcdf( time (str): time that the data represents, in YYYYmmdd-HHMMSS format or as much of as required. instrument_dict (dict or None): -DEPRECATED- information about the instrument - from tsv2dict.instrument_dict. Use - instrument_file_info argument instead. Will be + from tsv2dict.instrument_dict. Use + instrument_file_info argument instead. Will be remved in version 2.7.0. instrument_file_info (FileInfo or None): information about the instrument, from file_info.FileInfo. @@ -500,7 +518,7 @@ def make_netcdf( instrument_dict["info"]["instrument_name"], product, loc, - tag + tag, ) else: warnings.warn( @@ -550,14 +568,22 @@ def make_netcdf( var_dict[var]["shuffle"] = True if ( - instrument_file_info.instrument_data["Mobile/Fixed (loc)"].split("-")[0].strip().lower() + instrument_file_info.instrument_data["Mobile/Fixed (loc)"] + .split("-")[0] + .strip() + .lower() == "fixed" ): platform = ( - instrument_file_info.instrument_data["Mobile/Fixed (loc)"].split("-")[-1].strip().lower() + instrument_file_info.instrument_data["Mobile/Fixed (loc)"] + .split("-")[-1] + .strip() + .lower() ) else: - platform = instrument_file_info.instrument_data["Mobile/Fixed (loc)"].strip().lower() + platform = ( + instrument_file_info.instrument_data["Mobile/Fixed (loc)"].strip().lower() + ) if options != "": no_options = len(options.split("_")) @@ -722,7 +748,9 @@ def make_product_netcdf( if date is None: date = dt.datetime.now(dt.timezone.utc).strftime("%Y%m%d") - product_file_info = FileInfo(instrument_name, product, deployment_mode=deployment_loc, tag=tag) + product_file_info = FileInfo( + instrument_name, product, deployment_mode=deployment_loc, tag=tag + ) product_file_info.get_common_info() product_file_info.get_deployment_info() product_file_info.get_product_info() @@ -867,7 +895,7 @@ def main( if isinstance(products, str): products = [products] - elif products == None: + elif products is None: products = list_products(instrument=instrument, tag=tag) warnings.warn( "Passing 'None' as argument for 'products' is being deprecated. Use single" @@ -875,7 +903,7 @@ def main( f"{instrument} are {products}. The option to use 'None' will be removed" " from version 2.7.0.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) elif isinstance(products, list): warnings.warn( @@ -886,11 +914,12 @@ def main( stacklevel=2, ) - ncfiles = [] - + for product in products: - instrument_file_info = FileInfo(instrument, product, deployment_mode = loc, tag = tag) + instrument_file_info = FileInfo( + instrument, product, deployment_mode=loc, tag=tag + ) instrument_file_info.get_product_info() instrument_file_info.get_deployment_info() instrument_file_info.get_instrument_info() @@ -898,7 +927,12 @@ def main( # check if platform needs changing if platform is not None: - if "mobile" not in instrument_file_info.instrument_data["Mobile/Fixed (loc)"].lower(): + if ( + "mobile" + not in instrument_file_info.instrument_data[ + "Mobile/Fixed (loc)" + ].lower() + ): print( "[WARNING]: Changing platform for an " f"observatory instrument {instrument}." @@ -921,7 +955,7 @@ def main( instrument, product, date, - instrument_file_info = instrument_file_info, + instrument_file_info=instrument_file_info, verbose=verbose, options=options, product_version=product_version, @@ -934,13 +968,13 @@ def main( complevel=complevel, shuffle=shuffle, ) - ) + ) else: make_netcdf( instrument, product, date, - instrument_file_info = instrument_file_info, + instrument_file_info=instrument_file_info, verbose=verbose, options=options, product_version=product_version, diff --git a/src/ncas_amof_netcdf_template/file_info.py b/src/ncas_amof_netcdf_template/file_info.py index 752dd70..f7f8069 100644 --- a/src/ncas_amof_netcdf_template/file_info.py +++ b/src/ncas_amof_netcdf_template/file_info.py @@ -14,6 +14,7 @@ class FileInfo: """ Class that will gather and hold all the data to create netCDF file with """ + def __init__( self, instrument_name: str, @@ -28,9 +29,9 @@ def __init__( instrument_name (str): name of the instrument data_product (str): name of data product to use deployment_mode (str): value of the 'deployment_mode' global attribute, and - different variables may be required depending on + different variables may be required depending on value. One of "land", "sea", "air", or "trajectory". - tag (str): tagged release version of AMF_CVs, or "latest" to get most + tag (str): tagged release version of AMF_CVs, or "latest" to get most recent version. Default is "latest". """ if deployment_mode not in ["land", "sea", "air", "trajectory"]: @@ -42,7 +43,9 @@ def __init__( self.deployment_mode = deployment_mode self.tag = tag if self.tag == "latest": - self.ncas_gen_version = self._get_github_latest_version("https://github.com/ncasuk/AMF_CVs") + self.ncas_gen_version = self._get_github_latest_version( + "https://github.com/ncasuk/AMF_CVs" + ) elif self._check_github_cvs_version_exists(release_tag=tag): self.ncas_gen_version = tag else: @@ -53,16 +56,13 @@ def __init__( self.variables = {} self.instrument_data = {} - def __repr__(self) -> str: class_name = type(self).__name__ return f"{class_name}(instrument_name='{self.instrument_name}', data_product='{self.data_product}', deployment_mode='{self.deployment_mode}', tag='{self.tag}') - ncas_gen_version = '{self.ncas_gen_version}" - def __str__(self) -> str: return f"Class with information for '{self.instrument_name}' instrument and '{self.data_product}' data product" - def get_common_info(self) -> None: """ Get all the common variables, dimensions and attributes, and add to class @@ -70,7 +70,6 @@ def get_common_info(self) -> None: """ self._tsv2dict_attrs(self._attributes_tsv_url(self.deployment_mode)) - def get_deployment_info(self) -> None: """ Get all the variables, dimensions and attributes related to the deployment @@ -79,7 +78,6 @@ def get_deployment_info(self) -> None: self._tsv2dict_dims(self._dimensions_tsv_url(self.deployment_mode)) self._tsv2dict_vars(self._variables_tsv_url(self.deployment_mode)) - def get_product_info(self) -> None: """ Get all the variables, dimensions and attributes related to the data product, @@ -89,7 +87,6 @@ def get_product_info(self) -> None: self._tsv2dict_dims(self._dimensions_tsv_url(self.data_product)) self._tsv2dict_vars(self._variables_tsv_url(self.data_product)) - def get_instrument_info(self) -> None: """ Get all the attribute data related to a defined instrument in the @@ -100,7 +97,6 @@ def get_instrument_info(self) -> None: else: self._tsv2dict_instruments(self._get_community_instrument_tsv_url()) - def _tsv2dict_vars(self, tsv_file: str) -> None: """ For a given tsv file from the AMF_CVs GitHub repo, add dictionary of @@ -135,11 +131,12 @@ def _tsv2dict_vars(self, tsv_file: str) -> None: f"EXAMPLE: {current_line['example value']}" ) else: - current_var_dict[current_line["Attribute"]] = current_line["Value"] + current_var_dict[current_line["Attribute"]] = current_line[ + "Value" + ] self.variables[current_var] = current_var_dict - def _tsv2dict_dims(self, tsv_file: str) -> None: """ For a given tsv file from the AMF_CVs GitHub repo, add dictionary of dimensions @@ -159,7 +156,6 @@ def _tsv2dict_dims(self, tsv_file: str) -> None: dim_dict["Length"] = int(dim_dict["Length"]) self.dimensions[dim_name] = dim_dict - def _tsv2dict_attrs(self, tsv_file: str) -> None: """ For a given tsv file from the AMF_CVs GitHub repo, add dictionary of attributes @@ -177,7 +173,6 @@ def _tsv2dict_attrs(self, tsv_file: str) -> None: attr_name = attr_dict.pop("Name") self.attributes[attr_name] = attr_dict - def _tsv2dict_instruments(self, tsv_file: str) -> None: """ For a given tsv file from the ncas-data-instrument-vocabs repo, add dictionary @@ -188,19 +183,31 @@ def _tsv2dict_instruments(self, tsv_file: str) -> None: """ if self._check_website_exists(tsv_file): df_instruments = pd.read_csv(tsv_file, sep="\t") - df_instrument = df_instruments.where(df_instruments["New Instrument Name"] == self.instrument_name).dropna(subset=["New Instrument Name"]) + df_instrument = df_instruments.where( + df_instruments["New Instrument Name"] == self.instrument_name + ).dropna(subset=["New Instrument Name"]) if len(df_instrument) == 0: - print(f"[WARNING] No details found for instrument {self.instrument_name}...") + print( + f"[WARNING] No details found for instrument {self.instrument_name}..." + ) else: for inst in df_instrument.iloc: instrument_dict = inst.to_dict() - data_products = re.split(r",| |\|", instrument_dict["Data Product(s)"]) + data_products = re.split( + r",| |\|", instrument_dict["Data Product(s)"] + ) data_products = list(filter(None, data_products)) instrument_dict["Data Product(s)"] = data_products - for i in ["Manufacturer", "Model No.", "Serial Number", "Data Product(s)", "Mobile/Fixed (loc)", "Descriptor"]: + for i in [ + "Manufacturer", + "Model No.", + "Serial Number", + "Data Product(s)", + "Mobile/Fixed (loc)", + "Descriptor", + ]: self.instrument_data[i] = instrument_dict[i] - def _check_instrument_has_product(self) -> bool: """ Check instrument has defined data product associated with it @@ -212,7 +219,6 @@ def _check_instrument_has_product(self) -> bool: self.get_instrument_info() return self.data_product in self.instrument_data["Data Product(s)"] - def _get_github_latest_version(self, url: str) -> str: """ Get the tag of the latest release version @@ -225,7 +231,6 @@ def _get_github_latest_version(self, url: str) -> str: """ return requests.get(f"{url}/releases/latest").url.split("/")[-1] - def _check_website_exists(self, url: str) -> bool: """ Check website exists and is up @@ -239,8 +244,9 @@ def _check_website_exists(self, url: str) -> bool: status = requests.get(url).status_code return status == 200 - - def _check_github_cvs_version_exists(self, release_tag: Optional[str] = None) -> bool: + def _check_github_cvs_version_exists( + self, release_tag: Optional[str] = None + ) -> bool: """ Check the requested tagged version of AMF_CVs exists on GitHub """ @@ -248,7 +254,6 @@ def _check_github_cvs_version_exists(self, release_tag: Optional[str] = None) -> release_tag = self.ncas_gen_version url = f"https://github.com/ncasuk/AMF_CVs/releases/{release_tag}" return self._check_website_exists(url) - def _dimensions_tsv_url(self, obj: str) -> str: """ @@ -261,10 +266,13 @@ def _dimensions_tsv_url(self, obj: str) -> str: str: URL location of dimension tsv file """ file_loc = f"https://raw.githubusercontent.com/ncasuk/AMF_CVs/{self.ncas_gen_version}/product-definitions/tsv" - path, option = (obj, "specific") if obj not in ["land", "sea", "air", "trajectory"] else ("_common", obj) + path, option = ( + (obj, "specific") + if obj not in ["land", "sea", "air", "trajectory"] + else ("_common", obj) + ) return f"{file_loc}/{path}/dimensions-{option}.tsv" - def _variables_tsv_url(self, obj: str) -> str: """ Get the URL for the tsv files for variables @@ -276,10 +284,13 @@ def _variables_tsv_url(self, obj: str) -> str: str: URL location of variable tsv file """ file_loc = f"https://raw.githubusercontent.com/ncasuk/AMF_CVs/{self.ncas_gen_version}/product-definitions/tsv" - path, option = (obj, "specific") if obj not in ["land", "sea", "air", "trajectory"] else ("_common", obj) + path, option = ( + (obj, "specific") + if obj not in ["land", "sea", "air", "trajectory"] + else ("_common", obj) + ) return f"{file_loc}/{path}/variables-{option}.tsv" - def _attributes_tsv_url(self, obj: str) -> str: """ Get the URL for the tsv files for attributes @@ -291,25 +302,35 @@ def _attributes_tsv_url(self, obj: str) -> str: str: URL location of attribute tsv file """ file_loc = f"https://raw.githubusercontent.com/ncasuk/AMF_CVs/{self.ncas_gen_version}/product-definitions/tsv" - path, option = (obj, "-specific") if obj not in ["land", "sea", "air", "trajectory"] else ("_common", "") + path, option = ( + (obj, "-specific") + if obj not in ["land", "sea", "air", "trajectory"] + else ("_common", "") + ) return f"{file_loc}/{path}/global-attributes{option}.tsv" - def _get_ncas_instrument_tsv_url(self) -> str: """ Get the URL for the tsv file of NCAS instruments """ - vocab_version = self._get_github_latest_version("https://github.com/ncasuk/ncas-data-instrument-vocabs") - file_loc = f"https://raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs" + vocab_version = self._get_github_latest_version( + "https://github.com/ncasuk/ncas-data-instrument-vocabs" + ) + file_loc = ( + "https://raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs" + ) return f"{file_loc}/{vocab_version}/product-definitions/tsv/_instrument_vocabs/ncas-instrument-name-and-descriptors.tsv" - def _get_community_instrument_tsv_url(self) -> str: """ Get the URL for the tsv file of NCAS instruments """ - vocab_version = self._get_github_latest_version("https://github.com/ncasuk/ncas-data-instrument-vocabs") - file_loc = f"https://raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs" + vocab_version = self._get_github_latest_version( + "https://github.com/ncasuk/ncas-data-instrument-vocabs" + ) + file_loc = ( + "https://raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs" + ) return f"{file_loc}/{vocab_version}/product-definitions/tsv/_instrument_vocabs/community-instrument-name-and-descriptors.tsv" @@ -329,7 +350,7 @@ def convert_instrument_dict_to_file_info( instrument_dict (dict): Dictionary made by tsv2dict.instrument_dict instrument_name (str): Name of the instrument data_product (str): Data product of data for netCDF file - deployment_mode (str): Deployment mode of instrument. One of "land", "sea", + deployment_mode (str): Deployment mode of instrument. One of "land", "sea", "air", "trajectory" tag (str): Tag release of AMF_CVs being used @@ -349,12 +370,11 @@ def convert_instrument_dict_to_file_info( instrument_file_info.variables[var_name] = var_dict if "info" in instrument_dict.keys(): for key, value in instrument_dict["info"].items(): - if key == "Mobile/Fixed (loc)" and value.split("-")[0].strip().lower() == "fixed": + if ( + key == "Mobile/Fixed (loc)" + and value.split("-")[0].strip().lower() == "fixed" + ): value = value.split("-")[1].strip() instrument_file_info.instrument_data[key] = value return instrument_file_info - - - -