diff --git a/docs/source/api.rst b/docs/source/api.rst index 2cf00de..f9851cf 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -7,6 +7,7 @@ API :recursive: ncas_amof_netcdf_template.create_netcdf + ncas_amof_netcdf_template.file_info ncas_amof_netcdf_template.remove_empty_variables ncas_amof_netcdf_template.tsv2dict ncas_amof_netcdf_template.util diff --git a/docs/source/file_info.rst b/docs/source/file_info.rst new file mode 100644 index 0000000..eb2921b --- /dev/null +++ b/docs/source/file_info.rst @@ -0,0 +1,5 @@ +file_info +--------- + +.. automodule:: ncas_amof_netcdf_template.file_info + :members: diff --git a/docs/source/~generated/ncas_amof_netcdf_template.file_info.rst b/docs/source/~generated/ncas_amof_netcdf_template.file_info.rst new file mode 100644 index 0000000..8f2f470 --- /dev/null +++ b/docs/source/~generated/ncas_amof_netcdf_template.file_info.rst @@ -0,0 +1,26 @@ +ncas\_amof\_netcdf\_template.file\_info +======================================= + +.. automodule:: ncas_amof_netcdf_template.file_info + + + + + + + + .. rubric:: Functions + + .. autosummary:: + + convert_instrument_dict_to_file_info + + + + + + .. rubric:: Classes + + .. autosummary:: + + FileInfo diff --git a/docs/source/~generated/ncas_amof_netcdf_template.util.rst b/docs/source/~generated/ncas_amof_netcdf_template.util.rst index 5a45bbe..a2d2861 100644 --- a/docs/source/~generated/ncas_amof_netcdf_template.util.rst +++ b/docs/source/~generated/ncas_amof_netcdf_template.util.rst @@ -14,6 +14,7 @@ .. autosummary:: add_metadata_to_netcdf + change_qc_flags check_float check_int check_type_convert diff --git a/docs/source/~generated/ncas_amof_netcdf_template.values.rst b/docs/source/~generated/ncas_amof_netcdf_template.values.rst index 499693a..5690c06 100644 --- a/docs/source/~generated/ncas_amof_netcdf_template.values.rst +++ b/docs/source/~generated/ncas_amof_netcdf_template.values.rst @@ -20,3 +20,4 @@ get_community_instruments_url get_instruments_url get_latest_CVs_version + get_latest_instrument_CVs_version diff --git a/src/ncas_amof_netcdf_template/__init__.py b/src/ncas_amof_netcdf_template/__init__.py index 7635873..187a8ff 100644 --- a/src/ncas_amof_netcdf_template/__init__.py +++ b/src/ncas_amof_netcdf_template/__init__.py @@ -4,5 +4,6 @@ from . import tsv2dict from . import util from . import values +from . import file_info __version__="2.5.0-alpha" diff --git a/src/ncas_amof_netcdf_template/create_netcdf.py b/src/ncas_amof_netcdf_template/create_netcdf.py index 5a52437..61e237b 100644 --- a/src/ncas_amof_netcdf_template/create_netcdf.py +++ b/src/ncas_amof_netcdf_template/create_netcdf.py @@ -16,52 +16,140 @@ from . import tsv2dict from . import values from .__init__ import __version__ +from .file_info import FileInfo, convert_instrument_dict_to_file_info def add_attributes( ncfile: Dataset, - instrument_dict: dict[ - str, dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]] - ], - product: str, - created_time: str, - location: str, - loc: str, + instrument_dict: Optional[ + dict[ + str, + dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]], + ] + ] = None, + product: Optional[str] = None, + created_time: Optional[str] = None, + location: Optional[str] = None, + loc: str = "land", use_local_files: Optional[str] = None, tag: str = "latest", + instrument_file_info: Optional[FileInfo] = None, ) -> None: """ Adds all global attributes for a given product to the netCDF file. Args: ncfile (obj): netCDF file object - instrument_dict (dict): information about the instrument from - tsv2dict.isntrument_dict. - product (str): name of data product. - create_time (str): time of file creation. - location (str): value for the 'platform' global attribute. - loc (str): value for the 'deployment_mode' global attribute, should be one of - 'land', 'sea', 'air', or 'trajectory'. - tag (str): tagged release version of AMF_CVs, or "latest" to get most recent - release. Ignored if use_local_files is not None. Default latest. + instrument_dict (dict): -DEPRECATED- information about the instrument from + tsv2dict.isntrument_dict. Use instrument_file_info + instead. Will be removed in version 2.7.0. + product (str): -DEPRECATED- name of data product. Value will be obtained from + instrument_file_info. Option will be removed in version 2.7.0. + created_time (str or None): time of file creation. If 'None', now will be used. + location (str or None): -DEPRECATED- value for the 'platform' global attribute. + Value will be obtained from instrument_file_info. Option + will be removed in version 2.7.0. + loc (str): -DEPRECATED- value for the 'deployment_mode' global attribute, + should be one of 'land', 'sea', 'air', or 'trajectory'. Value will + be obtained from instrument_file_info. Option will be removed in + version 2.7.0. + instrument_file_info (FileInfo): information about instrument for making netCDF + file, from + ncas_amof_netcdf_template.file_info.FileInfo. + tag (str): -DEPRECATED- tagged release version of AMF_CVs, or "latest" to get + most recent release. Ignored if use_local_files is not None. Value + will be obtained from instrument_file_info. Option will be removed + in version 2.7.0. use_local_files (str or None): path to local directory where tsv files are stored. If "None", read from online. Default None. """ - for key, value in instrument_dict["common"]["attributes"].items(): + if instrument_dict is not None: + if instrument_file_info is None: + warnings.warn( + "Using dictionary for instrument info is being deprecated, use the" + " ncas_amof_netcdf_template.file_info.FileInfo class instead. Use of the" + " instrument_dict option will be removed from version 2.7.0", + DeprecationWarning, + stacklevel=2, + ) + if product is None: + msg = ( + "If instrument_dict is still being used, 'product' must be given." + " Preferred option is to switch to using instrument_file_info" + " instead." + ) + raise ValueError(msg) + instrument_file_info = convert_instrument_dict_to_file_info( + instrument_dict, + instrument_dict["info"]["instrument_name"], + product, + loc, + tag, + ) + else: + warnings.warn( + "instrument_dict and instrument_file_info both given, using" + " instrument_file_info. Use of instrument_dict is being deprecated," + " and will be removed from version 2.7.0.", + DeprecationWarning, + stacklevel=2, + ) + + if product is not None or location is not None or tag != "latest": + warnings.warn( + "Defining any of 'product', 'location' or 'tag' arguments is being" + " deprecated, as this information will be pulled from" + " instrument_file_info argument. These options will be removed from" + " version 2.7.0.", + DeprecationWarning, + stacklevel=2, + ) + + if instrument_file_info is None: + msg = "No instrument file info given" + raise ValueError(msg) + + if created_time is None: + created_time = dt.datetime.now(tz=dt.timezone.utc).strftime("%Y%m%dT%H%M%S") + + for key, value in instrument_file_info.attributes.items(): if value["Fixed Value"] != "": ncfile.setncattr(key, value["Fixed Value"]) elif key == "source": - ncfile.setncattr(key, instrument_dict["info"]["Descriptor"]) + if "Descriptor" in instrument_file_info.instrument_data.keys(): + ncfile.setncattr( + key, instrument_file_info.instrument_data["Descriptor"] + ) + else: + ncfile.setncattr(key, "n/a") elif key == "institution": ncfile.setncattr(key, "National Centre for Atmospheric Science (NCAS)") elif key == "platform": - ncfile.setncattr(key, location) + if "Mobile/Fixed (loc)" in instrument_file_info.instrument_data.keys(): + ncfile.setncattr( + key, instrument_file_info.instrument_data["Mobile/Fixed (loc)"] + ) + else: + ncfile.setncattr(key, "n/a") elif key == "instrument_manufacturer": - ncfile.setncattr(key, instrument_dict["info"]["Manufacturer"]) + if "Manufacturer" in instrument_file_info.instrument_data.keys(): + ncfile.setncattr( + key, instrument_file_info.instrument_data["Manufacturer"] + ) + else: + ncfile.setncattr(key, "n/a") elif key == "instrument_model": - ncfile.setncattr(key, instrument_dict["info"]["Model No."]) + if "Model No." in instrument_file_info.instrument_data.keys(): + ncfile.setncattr(key, instrument_file_info.instrument_data["Model No."]) + else: + ncfile.setncattr(key, "n/a") elif key == "instrument_serial_number": - ncfile.setncattr(key, instrument_dict["info"]["Serial Number"]) + if "Serial Number" in instrument_file_info.instrument_data.keys(): + ncfile.setncattr( + key, instrument_file_info.instrument_data["Serial Number"] + ) + else: + ncfile.setncattr(key, "n/a") elif key == "amf_vocabularies_release": if use_local_files: attrsdict = tsv2dict.tsv2dict_attrs( @@ -69,9 +157,7 @@ def add_attributes( ) tagurl = attrsdict["amf_vocabularies_release"]["Example"] else: - if tag == "latest": - tag = values.get_latest_CVs_version() - tagurl = f"https://github.com/ncasuk/AMF_CVs/releases/tag/{tag}" + tagurl = f"https://github.com/ncasuk/AMF_CVs/releases/tag/{instrument_file_info.ncas_gen_version}" ncfile.setncattr(key, tagurl) elif key == "history": user = getpass.getuser() @@ -91,48 +177,87 @@ def add_attributes( f"CHANGE: {value['Description']}. {value['Compliance checking rules']}", ) - for key, value in instrument_dict[product]["attributes"].items(): - if value["Fixed Value"] != "": - ncfile.setncattr(key, value["Fixed Value"]) - else: - ncfile.setncattr( - key, - f"CHANGE: {value['Description']}. {value['Compliance checking rules']}", - ) - def add_dimensions( ncfile: Dataset, - instrument_dict: dict[ - str, dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]] - ], - product: str, - dimension_lengths: dict[str, int], + instrument_dict: Optional[ + dict[ + str, + dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]], + ] + ] = None, + product: Optional[str] = None, + dimension_lengths: Optional[dict[str, int]] = None, + instrument_file_info: Optional[FileInfo] = None, ) -> None: """ Adds all dimensions for a given product to the netCDF file. Args: ncfile (obj): netCDF file object - instrument_dict (dict): information about the instrument from - tsv2dict.isntrument_dict. - product (str): name of data product. - dimension_lengths (dict): length of each dimension. + instrument_dict (dict): -DEPRECATED- information about the instrument from + tsv2dict.isntrument_dict. Use instrument_file_info + instead. Will be removed in version 2.7.0. + product (str): -DEPRECATED- name of data product. Value will now be obtained + from instrument_file_info. Option will be removed from version + 2.7.0. + dimension_lengths (dict): -DEPRECATED- length of each dimension. Values will + now be obtained from instrument_file_info. Option + will be removed from version 2.7.0. + instrument_file_info (FileInfo): information about instrument for making netCDF + file, from + ncas_amof_netcdf_template.file_info.FileInfo. """ - for key, length in dimension_lengths.items(): - if ( - key in instrument_dict["common"]["dimensions"].keys() - or key in instrument_dict[product]["dimensions"].keys() - ): - ncfile.createDimension(key, length) + if instrument_dict is not None: + if instrument_file_info is None: + warnings.warn( + "Using dictionary for instrument info is being deprecated, use the" + " ncas_amof_netcdf_template.file_info.FileInfo class instead. Use of the" + " instrument_dict option will be removed from version 2.7.0", + DeprecationWarning, + stacklevel=2, + ) + if product is None or dimension_lengths is None: + msg = ( + "If instrument_dict is still being used, 'product' and" + " 'dimension_lengths' must be given. Preferred option is to switch" + " to using instrument_file_info instead." + ) + raise ValueError(msg) + else: + warnings.warn( + "instrument_dict and instrument_file_info both given, using" + " instrument_file_info. Use of instrument_dict is being deprecated," + " and will be removed from version 2.7.0.", + DeprecationWarning, + stacklevel=2, + ) + + if instrument_file_info is not None: + for dim_name in instrument_file_info.dimensions.keys(): + ncfile.createDimension( + dim_name, instrument_file_info.dimensions[dim_name]["Length"] + ) + + elif dimension_lengths is not None: + for key, length in dimension_lengths.items(): + if ( + key in instrument_dict["common"]["dimensions"].keys() + or key in instrument_dict[product]["dimensions"].keys() + ): + ncfile.createDimension(key, length) def add_variables( ncfile: Dataset, - instrument_dict: dict[ - str, dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]] - ], - product: str, + instrument_dict: Optional[ + dict[ + str, + dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]], + ] + ] = None, + product: Optional[str] = None, + instrument_file_info: Optional[FileInfo] = None, verbose: int = 0, ) -> None: """ @@ -140,130 +265,168 @@ def add_variables( Args: ncfile (obj): netCDF file object - instrument_dict (dict): information about the instrument from - tsv2dict.isntrument_dict. - product (str): name of data product. + instrument_dict (dict): -DEPRECATED- information about the instrument from + tsv2dict.isntrument_dict. Use instrument_file_info + instead. Will be removed in version 2.7.0. + product (str): -DEPRECATED- name of data product. Value will be obtained from + instrument_file_info. Option will be removed in version 2.7.0. + instrument_file_info (FileInfo): information about instrument for making netCDF + file, from + ncas_amof_netcdf_template.file_info.FileInfo. verbose (int): level of additional info to print. At the moment, there is only 1 additional level. Default 0. """ - for obj in [product, "common"]: - for key, value in instrument_dict[obj]["variables"].items(): - # make sure variable doesn't already exist, warn if it does - if key in ncfile.variables.keys(): - print(f"WARN: variable {key} defined multiple times.") + if instrument_dict is not None: + if instrument_file_info is None: + warnings.warn( + "Using dictionary for instrument info is being deprecated, use the" + " ncas_amof_netcdf_template.file_info.FileInfo class instead. Use of the" + " instrument_dict option will be removed from version 2.7.0", + DeprecationWarning, + stacklevel=2, + ) + if product is None: + msg = ( + "If instrument_dict is still being used, 'product' must be given." + " Preferred option is to switch to using instrument_file_info" + " instead." + ) + raise ValueError(msg) + instrument_file_info = convert_instrument_dict_to_file_info( + instrument_dict, + instrument_dict["info"]["instrument_name"], + product, + deployment_mode="land", + tag="latest", + ) + else: + warnings.warn( + "instrument_dict and instrument_file_info both given, using" + " instrument_file_info. Use of instrument_dict is being deprecated," + " and will be removed from version 2.7.0.", + DeprecationWarning, + stacklevel=2, + ) + + if instrument_file_info is None: + msg = "No instrument file info given" + raise ValueError(msg) + + for key, value in instrument_file_info.variables.items(): + # make sure variable doesn't already exist, warn if it does + if key in ncfile.variables.keys(): + print(f"WARN: variable {key} defined multiple times.") + else: + # therefore, value is instrument_dict[obj]['variables'][key] + # want to pop certain things here, but not for ever, so make tmp_value + tmp_value = copy.copy(value) + + # error, there are some variables with dimensions + # missing, error in spreadsheet + # if we encounter one, we're going to print out an error + # and forget about that variable + if "dimension" not in tmp_value.keys(): + print(f"WARN: No dimensions for variable {key}") + var_dims = () else: - # therefore, value is instrument_dict[obj]['variables'][key] - # want to pop certain things here, but not for ever, so make tmp_value - tmp_value = copy.copy(value) - - # error, there are some variables with dimensions - # missing, error in spreadsheet - # if we encounter one, we're going to print out an error - # and forget about that variable - if "dimension" not in tmp_value.keys(): - print(f"WARN: No dimensions for variable {key} in product {obj}") - print("Variable not added file") - var_dims = () - else: - var_dims = tmp_value.pop("dimension") - # there was an error somewhere meaning 2 dimensions - # had a '.' instead of ',' between them - var_dims = var_dims.replace(".", ",") - var_dims = tuple(x.strip() for x in var_dims.split(",")) + var_dims = tmp_value.pop("dimension") + # there was an error somewhere meaning 2 dimensions + # had a '.' instead of ',' between them + var_dims = var_dims.replace(".", ",") + var_dims = tuple(x.strip() for x in var_dims.split(",")) - datatype = tmp_value.pop("type") + datatype = tmp_value.pop("type") - if "_FillValue" in tmp_value: - fill_value = float(tmp_value.pop("_FillValue")) - else: - fill_value = None + if "_FillValue" in tmp_value: + fill_value = float(tmp_value.pop("_FillValue")) + else: + fill_value = None - if "chunksizes" in tmp_value: - chunksizes = tmp_value.pop("chunksizes") - else: - chunksizes = None + if "chunksizes" in tmp_value: + chunksizes = tmp_value.pop("chunksizes") + else: + chunksizes = None - if "compression" in tmp_value: - compression = tmp_value.pop("compression") - else: - compression = None + if "compression" in tmp_value: + compression = tmp_value.pop("compression") + else: + compression = None - if "complevel" in tmp_value: - complevel = tmp_value.pop("complevel") - else: - complevel = 4 + if "complevel" in tmp_value: + complevel = tmp_value.pop("complevel") + else: + complevel = 4 - if "shuffle" in tmp_value: - shuffle = tmp_value.pop("shuffle") - else: - shuffle = True - - var = ncfile.createVariable( - key, - datatype, - var_dims, - fill_value=fill_value, - chunksizes=chunksizes, - compression=compression, - complevel=complevel, - shuffle=shuffle, - ) + if "shuffle" in tmp_value: + shuffle = tmp_value.pop("shuffle") + else: + shuffle = True - for mdatkey, mdatvalue in tmp_value.items(): - # flag meanings in the tsv files are separated by '|', - # should be space separated - if "|" in mdatvalue and "flag_meaning" in mdatkey: - mdatvalue = " ".join([i.strip() for i in mdatvalue.split("|")]) - # flag values are bytes, can't add byte array - # into NETCDF4_CLASSIC so have to muddle a bit - if "flag_value" in mdatkey and "qc" in key and var.dtype == np.int8: - # turn string "0b,1b..." into list of ints [0,1...] - mdatvalue = mdatvalue.strip(",") - newmdatvalue = [int(i.strip("b")) for i in mdatvalue.split(",")] - # turn list into array with int8 type - mdatvalue = np.array(newmdatvalue, dtype=np.int8) - # print warning for example values, - # and don't add example values for standard_name - if ( - mdatkey == "standard_name" - and ("EXAMPLE" in mdatvalue or mdatvalue == "") - and verbose >= 1 - ): - print( - f"WARN: No standard name for variable {key}, " - "standard_name attribute not added" - ) - elif "EXAMPLE" in mdatvalue and verbose >= 1: + var = ncfile.createVariable( + key, + datatype, + var_dims, + fill_value=fill_value, + chunksizes=chunksizes, + compression=compression, + complevel=complevel, + shuffle=shuffle, + ) + + for mdatkey, mdatvalue in tmp_value.items(): + # flag meanings in the tsv files are separated by '|', + # should be space separated + if "|" in mdatvalue and "flag_meaning" in mdatkey: + mdatvalue = " ".join([i.strip() for i in mdatvalue.split("|")]) + # flag values are bytes, can't add byte array + # into NETCDF4_CLASSIC so have to muddle a bit + if "flag_value" in mdatkey and "qc" in key and var.dtype == np.int8: + # turn string "0b,1b..." into list of ints [0,1...] + mdatvalue = mdatvalue.strip(",") + newmdatvalue = [int(i.strip("b")) for i in mdatvalue.split(",")] + # turn list into array with int8 type + mdatvalue = np.array(newmdatvalue, dtype=np.int8) + # print warning for example values, + # and don't add example values for standard_name + if ( + mdatkey == "standard_name" + and ("EXAMPLE" in mdatvalue or mdatvalue == "") + and verbose >= 1 + ): + print( + f"WARN: No standard name for variable {key}, " + "standard_name attribute not added" + ) + elif "EXAMPLE" in mdatvalue and verbose >= 1: + print( + "WARN: example value for attribute " + f"{mdatkey} for variable {key}" + ) + # don't add EXAMPLE standard name + if not ( + mdatkey == "standard_name" + and ("EXAMPLE" in mdatvalue or mdatvalue == "") + ): + # don't add empty attributes + if isinstance(mdatvalue, str) and mdatvalue == "" and verbose >= 1: print( - "WARN: example value for attribute " - f"{mdatkey} for variable {key}" + f"WARN: No value for attribute {mdatkey} " + "for variable {key}, attribute not added" ) - # don't add EXAMPLE standard name - if not ( - mdatkey == "standard_name" - and ("EXAMPLE" in mdatvalue or mdatvalue == "") - ): - # don't add empty attributes - if ( - isinstance(mdatvalue, str) - and mdatvalue == "" - and verbose >= 1 - ): - print( - f"WARN: No value for attribute {mdatkey} " - "for variable {key}, attribute not added" - ) - else: - var.setncattr(mdatkey, mdatvalue) + else: + var.setncattr(mdatkey, mdatvalue) def make_netcdf( instrument: str, product: str, time: str, - instrument_dict: dict[ - str, dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]] - ], + instrument_dict: Optional[ + dict[ + str, + dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]], + ] + ] = None, loc: str = "land", dimension_lengths: dict[str, int] = {}, verbose: int = 0, @@ -277,6 +440,7 @@ def make_netcdf( compression: Union[str, dict[str, str], None] = None, complevel: Union[int, dict[str, int]] = 4, shuffle: Union[bool, dict[str, bool]] = True, + instrument_file_info: Optional[FileInfo] = None, ) -> Union[None, Dataset]: """ Makes netCDF file for given instrument and arguments. @@ -286,8 +450,12 @@ def make_netcdf( product (str): name of data product. time (str): time that the data represents, in YYYYmmdd-HHMMSS format or as much of as required. - instrument_dict (dict): information about the instrument - from tsv2dict.isntrument_dict. + instrument_dict (dict or None): -DEPRECATED- information about the instrument + from tsv2dict.instrument_dict. Use + instrument_file_info argument instead. Will be + remved in version 2.7.0. + instrument_file_info (FileInfo or None): information about the instrument, + from file_info.FileInfo. loc (str): location of instrument, one of 'land', 'sea', 'air' or 'trajectory'. Default 'land'. dimension_lengths (dict): lengths of dimensions in file. If not given, @@ -335,51 +503,86 @@ def make_netcdf( stacklevel=2, ) + if instrument_dict is not None: + if instrument_file_info is None: + warnings.warn( + "Using dictionary for instrument info is being deprecated, use the" + " ncas_amof_netcdf_template.file_info.FileInfo class instead. Use of the" + " instrument_dict option will be removed from version 2.7.0", + DeprecationWarning, + stacklevel=2, + ) + instrument_file_info = convert_instrument_dict_to_file_info( + instrument_dict, + instrument_dict["info"]["instrument_name"], + product, + loc, + tag, + ) + else: + warnings.warn( + "instrument_dict and instrument_file_info both given, using" + " instrument_file_info. Use of instrument_dict is being deprecated," + " and will be removed from version 2.7.0.", + DeprecationWarning, + stacklevel=2, + ) + + if instrument_file_info is None: + msg = "No instrument file info given" + raise ValueError(msg) + chunk_by_dimension = chunk_by_dimension or {} # add chunks to variables with defined chunk dimensions - all_options = ["common", product] - for prod in all_options: - for var in (var_dict := instrument_dict[prod]["variables"]): - if "dimension" in var_dict[var].keys(): - var_dims = var_dict[var]["dimension"] - var_dims = var_dims.replace(".", ",") - var_dims = [x.strip() for x in var_dims.split(",")] - if all(var_dim in chunk_by_dimension.keys() for var_dim in var_dims): - chunksizes = tuple( - [int(chunk_by_dimension[var_dim]) for var_dim in var_dims] - ) - var_dict[var]["chunksizes"] = chunksizes - if isinstance(compression, str): - var_dict[var]["compression"] = compression - elif isinstance(compression, dict) and var in compression.keys(): - var_dict[var]["compression"] = compression[var] - else: - var_dict[var]["compression"] = None + for var in (var_dict := instrument_file_info.variables): + if "dimension" in var_dict[var].keys(): + var_dims = var_dict[var]["dimension"] + var_dims = var_dims.replace(".", ",") + var_dims = [x.strip() for x in var_dims.split(",")] + if all(var_dim in chunk_by_dimension.keys() for var_dim in var_dims): + chunksizes = tuple( + [int(chunk_by_dimension[var_dim]) for var_dim in var_dims] + ) + var_dict[var]["chunksizes"] = chunksizes + if isinstance(compression, str): + var_dict[var]["compression"] = compression + elif isinstance(compression, dict) and var in compression.keys(): + var_dict[var]["compression"] = compression[var] + else: + var_dict[var]["compression"] = None - if isinstance(complevel, int): - var_dict[var]["complevel"] = complevel - elif isinstance(complevel, dict) and var in complevel.keys(): - var_dict[var]["complevel"] = complevel[var] - else: - var_dict[var]["complevel"] = 4 + if isinstance(complevel, int): + var_dict[var]["complevel"] = complevel + elif isinstance(complevel, dict) and var in complevel.keys(): + var_dict[var]["complevel"] = complevel[var] + else: + var_dict[var]["complevel"] = 4 - if isinstance(shuffle, bool): - var_dict[var]["shuffle"] = shuffle - elif isinstance(shuffle, dict) and var in shuffle.keys(): - var_dict[var]["shuffle"] = shuffle[var] - else: - var_dict[var]["shuffle"] = True + if isinstance(shuffle, bool): + var_dict[var]["shuffle"] = shuffle + elif isinstance(shuffle, dict) and var in shuffle.keys(): + var_dict[var]["shuffle"] = shuffle[var] + else: + var_dict[var]["shuffle"] = True if ( - instrument_dict["info"]["Mobile/Fixed (loc)"].split("-")[0].strip().lower() + instrument_file_info.instrument_data["Mobile/Fixed (loc)"] + .split("-")[0] + .strip() + .lower() == "fixed" ): platform = ( - instrument_dict["info"]["Mobile/Fixed (loc)"].split("-")[-1].strip().lower() + instrument_file_info.instrument_data["Mobile/Fixed (loc)"] + .split("-")[-1] + .strip() + .lower() ) else: - platform = instrument_dict["info"]["Mobile/Fixed (loc)"].strip().lower() + platform = ( + instrument_file_info.instrument_data["Mobile/Fixed (loc)"].strip().lower() + ) if options != "": no_options = len(options.split("_")) @@ -389,7 +592,7 @@ def make_netcdf( options = f"_{options}" filename = ( - f"{instrument}_{f'{platform}_' if platform != '' else ''}" + f"{instrument_file_info.instrument_name}_{f'{platform}_' if platform != '' else ''}" f"{time}_{product}{options}_v{product_version}.nc" ) @@ -398,16 +601,12 @@ def make_netcdf( add_attributes( ncfile, - instrument_dict, - product, - created_time, - platform, - loc, + instrument_file_info=instrument_file_info, use_local_files=use_local_files, - tag=tag, + created_time=created_time, ) - add_dimensions(ncfile, instrument_dict, product, dimension_lengths) - add_variables(ncfile, instrument_dict, product, verbose=verbose) + add_dimensions(ncfile, instrument_file_info=instrument_file_info) + add_variables(ncfile, instrument_file_info=instrument_file_info, verbose=verbose) if return_open: return ncfile @@ -548,34 +747,23 @@ def make_product_netcdf( if date is None: date = dt.datetime.now(dt.timezone.utc).strftime("%Y%m%d") - product_dict = tsv2dict.product_dict( - product, - platform=platform, - deployment_loc=deployment_loc, - use_local_files=use_local_files, - tag=tag, + product_file_info = FileInfo( + instrument_name, product, deployment_mode=deployment_loc, tag=tag ) + product_file_info.get_common_info() + product_file_info.get_deployment_info() + product_file_info.get_product_info() + + product_file_info.instrument_data["Mobile/Fixed (loc)"] = platform # make sure we have dimension lengths for all expected dimensions - all_dimensions = [] - dimlengths = {} - for key, val in product_dict.items(): - if "dimensions" in val.keys() and (key == product or key == "common"): - for dim in list(val["dimensions"].keys()): - if dim not in all_dimensions: - all_dimensions.append(dim) - if ( - isinstance(val["dimensions"][dim]["Length"], int) - or "<" not in val["dimensions"][dim]["Length"] - ): - dimlengths[dim] = int(val["dimensions"][dim]["Length"]) - for key, value in dimension_lengths.items(): - if key not in dimlengths.keys(): - dimlengths[key] = value - for dim in all_dimensions: - if dim not in dimlengths.keys(): - length = input(f"Enter length for dimension {dim}: ") - dimlengths[dim] = int(length) + for key, val in product_file_info.dimensions.items(): + if not isinstance(val["Length"], int): + if key in dimension_lengths.keys(): + val["Length"] = int(dimension_lengths[key]) + else: + length = input(f"Enter length for dimension {key}: ") + val["Length"] = int(length) # make the files if return_open: @@ -583,9 +771,8 @@ def make_product_netcdf( instrument_name, product, date, - product_dict, + instrument_file_info=product_file_info, loc=deployment_loc, - dimension_lengths=dimlengths, verbose=verbose, options=options, product_version=product_version, @@ -604,9 +791,8 @@ def make_product_netcdf( instrument_name, product, date, - product_dict, + instrument_file_info=product_file_info, loc=deployment_loc, - dimension_lengths=dimlengths, verbose=verbose, options=options, product_version=product_version, @@ -656,7 +842,9 @@ def main( products (str or list): string of one product or list of multiple products to make netCDF file for this instrument. If None, then all available products for the defined instrument - are made. + are made. -DEPRECATION WARNING- option to specify + either a list of 'None' is being deprecated and will + be removed in version 2.7.0. Use single data product. verbose (int): level of info to print out. Note that at the moment there is only one additional layer, this may increase in future. options (str): options to be included in file name. All options should be in @@ -704,77 +892,69 @@ def main( chunk_by_dimension = chunk_by_dimension or {} - instrument_dict = tsv2dict.instrument_dict( - instrument, loc=loc, use_local_files=use_local_files, tag=tag - ) + if isinstance(products, str): + products = [products] + elif products is None: + products = list_products(instrument=instrument, tag=tag) + warnings.warn( + "Passing 'None' as argument for 'products' is being deprecated. Use single" + " data product for this argument. Available data products for instrument" + f"{instrument} are {products}. The option to use 'None' will be removed" + " from version 2.7.0.", + DeprecationWarning, + stacklevel=2, + ) + elif isinstance(products, list): + warnings.warn( + "Giving multiple data products to the 'products' argument is" + " being deprecated. Use single data product as a string for this argument." + " The option to give a list will be removed from version 2.7.0.", + DeprecationWarning, + stacklevel=2, + ) - # check if platform needs changing - if platform is not None: - if "mobile" not in instrument_dict["info"]["Mobile/Fixed (loc)"].lower(): - print( - "[WARNING]: Changing platform for an " - f"observatory instrument {instrument}." - ) - instrument_dict["info"]["Mobile/Fixed (loc)"] = platform - - # get and check our list of products - tsvdictkeys = instrument_dict.keys() - poss_products = list(tsvdictkeys) - poss_products.remove("info") - poss_products.remove("common") - if products is None: # user doesn't specify products, make all - products = poss_products - else: # check user specified products are applicable for instrument - remove_products = [] - if isinstance(products, str): - products = [products] - for product in products: - if product not in poss_products: + ncfiles = [] + + for product in products: + instrument_file_info = FileInfo( + instrument, product, deployment_mode=loc, tag=tag + ) + instrument_file_info.get_product_info() + instrument_file_info.get_deployment_info() + instrument_file_info.get_instrument_info() + instrument_file_info.get_common_info() + + # check if platform needs changing + if platform is not None: + if ( + "mobile" + not in instrument_file_info.instrument_data[ + "Mobile/Fixed (loc)" + ].lower() + ): print( - f"ERROR: {product} is not available for this " - "instrument, will be skipped." + "[WARNING]: Changing platform for an " + f"observatory instrument {instrument}." ) - remove_products.append(product) - for remove_product in remove_products: - products.remove(remove_product) - # so by now we should have our list of products, quit if we have no products - if not isinstance(products, list) or len(products) == 0: - msg = f"No valid products specified, valid products are {poss_products}" - raise ValueError(msg) + instrument_file_info.instrument_data["Mobile/Fixed (loc)"] = platform - # make sure we have dimension lengths for all expected dimensions - all_dimensions = [] - dimlengths = {} - for key, val in instrument_dict.items(): - if "dimensions" in val.keys() and (key in products or key == "common"): - for dim in list(val["dimensions"].keys()): - if dim not in all_dimensions: - all_dimensions.append(dim) - if ( - isinstance(val["dimensions"][dim]["Length"], int) - or "<" not in val["dimensions"][dim]["Length"] - ): - dimlengths[dim] = int(val["dimensions"][dim]["Length"]) - for key, value in dimension_lengths.items(): - if key not in dimlengths.keys(): - dimlengths[key] = value - for dim in all_dimensions: - if dim not in dimlengths.keys(): - length = input(f"Enter length for dimension {dim}: ") - dimlengths[dim] = int(length) + # make sure we have dimension lengths for all expected dimensions + for key, val in instrument_file_info.dimensions.items(): + if not isinstance(val["Length"], int): + if key in dimension_lengths.keys(): + val["Length"] = int(dimension_lengths[key]) + else: + length = input(f"Enter length for dimension {key}: ") + val["Length"] = int(length) - # make the files - if return_open: - ncfiles = [] - for product in products: + # make the files + if return_open: ncfiles.append( make_netcdf( instrument, product, date, - instrument_dict, - loc=loc, - dimension_lengths=dimlengths, + instrument_file_info=instrument_file_info, verbose=verbose, options=options, product_version=product_version, @@ -788,19 +968,12 @@ def main( shuffle=shuffle, ) ) - if len(ncfiles) == 1: - return ncfiles[0] else: - return ncfiles - else: - for product in products: make_netcdf( instrument, product, date, - instrument_dict, - loc=loc, - dimension_lengths=dimlengths, + instrument_file_info=instrument_file_info, verbose=verbose, options=options, product_version=product_version, @@ -813,6 +986,10 @@ def main( complevel=complevel, shuffle=shuffle, ) + if len(ncfiles) == 1: + return ncfiles[0] + elif len(ncfiles) >= 2: + return ncfiles if __name__ == "__main__": diff --git a/src/ncas_amof_netcdf_template/file_info.py b/src/ncas_amof_netcdf_template/file_info.py new file mode 100644 index 0000000..f7f8069 --- /dev/null +++ b/src/ncas_amof_netcdf_template/file_info.py @@ -0,0 +1,380 @@ +""" +Take tsv files a return a class with all the data needed for creating the netCDF files. +""" + +import requests +import pandas as pd +import re +from typing import Optional, Union + +from .util import check_int + + +class FileInfo: + """ + Class that will gather and hold all the data to create netCDF file with + """ + + def __init__( + self, + instrument_name: str, + data_product: str, + deployment_mode: str = "land", + tag: str = "latest", + ) -> None: + """ + Initialise the class. + + Args: + instrument_name (str): name of the instrument + data_product (str): name of data product to use + deployment_mode (str): value of the 'deployment_mode' global attribute, and + different variables may be required depending on + value. One of "land", "sea", "air", or "trajectory". + tag (str): tagged release version of AMF_CVs, or "latest" to get most + recent version. Default is "latest". + """ + if deployment_mode not in ["land", "sea", "air", "trajectory"]: + msg = f"Invalid deployment mode {deployment_mode}, must be one of 'land', 'sea', 'air', 'trajectory'." + raise ValueError(msg) + + self.instrument_name = instrument_name + self.data_product = data_product + self.deployment_mode = deployment_mode + self.tag = tag + if self.tag == "latest": + self.ncas_gen_version = self._get_github_latest_version( + "https://github.com/ncasuk/AMF_CVs" + ) + elif self._check_github_cvs_version_exists(release_tag=tag): + self.ncas_gen_version = tag + else: + msg = f"Cannot find release version {tag} in https://github.com/ncasuk/AMF_CVs" + raise ValueError(msg) + self.attributes = {} + self.dimensions = {} + self.variables = {} + self.instrument_data = {} + + def __repr__(self) -> str: + class_name = type(self).__name__ + return f"{class_name}(instrument_name='{self.instrument_name}', data_product='{self.data_product}', deployment_mode='{self.deployment_mode}', tag='{self.tag}') - ncas_gen_version = '{self.ncas_gen_version}" + + def __str__(self) -> str: + return f"Class with information for '{self.instrument_name}' instrument and '{self.data_product}' data product" + + def get_common_info(self) -> None: + """ + Get all the common variables, dimensions and attributes, and add to class + properties + """ + self._tsv2dict_attrs(self._attributes_tsv_url(self.deployment_mode)) + + def get_deployment_info(self) -> None: + """ + Get all the variables, dimensions and attributes related to the deployment + mode, and add to class properties + """ + self._tsv2dict_dims(self._dimensions_tsv_url(self.deployment_mode)) + self._tsv2dict_vars(self._variables_tsv_url(self.deployment_mode)) + + def get_product_info(self) -> None: + """ + Get all the variables, dimensions and attributes related to the data product, + and add to class properties + """ + self._tsv2dict_attrs(self._attributes_tsv_url(self.data_product)) + self._tsv2dict_dims(self._dimensions_tsv_url(self.data_product)) + self._tsv2dict_vars(self._variables_tsv_url(self.data_product)) + + def get_instrument_info(self) -> None: + """ + Get all the attribute data related to a defined instrument in the + ncas-data-instrument-vocabs repo, and add to class property. + """ + if self.instrument_name.startswith("ncas-"): + self._tsv2dict_instruments(self._get_ncas_instrument_tsv_url()) + else: + self._tsv2dict_instruments(self._get_community_instrument_tsv_url()) + + def _tsv2dict_vars(self, tsv_file: str) -> None: + """ + For a given tsv file from the AMF_CVs GitHub repo, add dictionary of + variables and their attributes to variables property. + + Args: + tsv_file (str): URL to location of tsv file + """ + if self._check_website_exists(tsv_file): + df_vars = pd.read_csv(tsv_file, sep="\t") + df_vars = df_vars.fillna("") + + current_var_dict = {} + first_loop = True + current_var = "" + + for current_line in df_vars.iloc: + if current_line["Variable"] != "": + if not first_loop: + self.variables[current_var] = current_var_dict + else: + first_loop = False + current_var = current_line["Variable"] + current_var_dict = {} + if current_line["Attribute"] != "": + if ( + current_line["Value"] == "" + and "example value" in current_line.keys() + and current_line["example value"] != "" + ): + current_var_dict[current_line["Attribute"]] = ( + f"EXAMPLE: {current_line['example value']}" + ) + else: + current_var_dict[current_line["Attribute"]] = current_line[ + "Value" + ] + + self.variables[current_var] = current_var_dict + + def _tsv2dict_dims(self, tsv_file: str) -> None: + """ + For a given tsv file from the AMF_CVs GitHub repo, add dictionary of dimensions + and additional info to dimensions property. + + Args: + tsv_file (str): URL to location of tsv file + """ + if self._check_website_exists(tsv_file): + df_dims = pd.read_csv(tsv_file, sep="\t") + df_dims = df_dims.fillna("") + + for dim in df_dims.iloc: + dim_dict = dim.to_dict() + dim_name = dim_dict.pop("Name") + if check_int(dim_dict["Length"]): + dim_dict["Length"] = int(dim_dict["Length"]) + self.dimensions[dim_name] = dim_dict + + def _tsv2dict_attrs(self, tsv_file: str) -> None: + """ + For a given tsv file from the AMF_CVs GitHub repo, add dictionary of attributes + and values to attribute property. + + Args: + tsv_file (str): URL to location of tsv file + """ + if self._check_website_exists(tsv_file): + df_attrs = pd.read_csv(tsv_file, sep="\t") + df_attrs = df_attrs.fillna("") + + for attr in df_attrs.iloc: + attr_dict = attr.to_dict() + attr_name = attr_dict.pop("Name") + self.attributes[attr_name] = attr_dict + + def _tsv2dict_instruments(self, tsv_file: str) -> None: + """ + For a given tsv file from the ncas-data-instrument-vocabs repo, add dictionary + of instrument data to atttributes property. + + Args: + tsv_file (str): URL to location of tsv file + """ + if self._check_website_exists(tsv_file): + df_instruments = pd.read_csv(tsv_file, sep="\t") + df_instrument = df_instruments.where( + df_instruments["New Instrument Name"] == self.instrument_name + ).dropna(subset=["New Instrument Name"]) + if len(df_instrument) == 0: + print( + f"[WARNING] No details found for instrument {self.instrument_name}..." + ) + else: + for inst in df_instrument.iloc: + instrument_dict = inst.to_dict() + data_products = re.split( + r",| |\|", instrument_dict["Data Product(s)"] + ) + data_products = list(filter(None, data_products)) + instrument_dict["Data Product(s)"] = data_products + for i in [ + "Manufacturer", + "Model No.", + "Serial Number", + "Data Product(s)", + "Mobile/Fixed (loc)", + "Descriptor", + ]: + self.instrument_data[i] = instrument_dict[i] + + def _check_instrument_has_product(self) -> bool: + """ + Check instrument has defined data product associated with it + + Returns: + bool: does the instrument have the given data product associated with it + """ + if "Data Product(s)" not in self.instrument_data.keys(): + self.get_instrument_info() + return self.data_product in self.instrument_data["Data Product(s)"] + + def _get_github_latest_version(self, url: str) -> str: + """ + Get the tag of the latest release version + + Args: + url (str): GitHub URL to find latest release version of: https://github.com// + + Returns: + str: tag name of latest version release + """ + return requests.get(f"{url}/releases/latest").url.split("/")[-1] + + def _check_website_exists(self, url: str) -> bool: + """ + Check website exists and is up + + Args: + url (str): URL to check + + Returns: + bool: website is reachable + """ + status = requests.get(url).status_code + return status == 200 + + def _check_github_cvs_version_exists( + self, release_tag: Optional[str] = None + ) -> bool: + """ + Check the requested tagged version of AMF_CVs exists on GitHub + """ + if release_tag is None: + release_tag = self.ncas_gen_version + url = f"https://github.com/ncasuk/AMF_CVs/releases/{release_tag}" + return self._check_website_exists(url) + + def _dimensions_tsv_url(self, obj: str) -> str: + """ + Get the URL for the tsv files for dimensions + + Args: + obj (str): Data product or deployment mode + + Returns: + str: URL location of dimension tsv file + """ + file_loc = f"https://raw.githubusercontent.com/ncasuk/AMF_CVs/{self.ncas_gen_version}/product-definitions/tsv" + path, option = ( + (obj, "specific") + if obj not in ["land", "sea", "air", "trajectory"] + else ("_common", obj) + ) + return f"{file_loc}/{path}/dimensions-{option}.tsv" + + def _variables_tsv_url(self, obj: str) -> str: + """ + Get the URL for the tsv files for variables + + Args: + obj (str): Data product or deployment mode + + Returns: + str: URL location of variable tsv file + """ + file_loc = f"https://raw.githubusercontent.com/ncasuk/AMF_CVs/{self.ncas_gen_version}/product-definitions/tsv" + path, option = ( + (obj, "specific") + if obj not in ["land", "sea", "air", "trajectory"] + else ("_common", obj) + ) + return f"{file_loc}/{path}/variables-{option}.tsv" + + def _attributes_tsv_url(self, obj: str) -> str: + """ + Get the URL for the tsv files for attributes + + Args: + obj (str): Data product or deployment mode + + Returns: + str: URL location of attribute tsv file + """ + file_loc = f"https://raw.githubusercontent.com/ncasuk/AMF_CVs/{self.ncas_gen_version}/product-definitions/tsv" + path, option = ( + (obj, "-specific") + if obj not in ["land", "sea", "air", "trajectory"] + else ("_common", "") + ) + return f"{file_loc}/{path}/global-attributes{option}.tsv" + + def _get_ncas_instrument_tsv_url(self) -> str: + """ + Get the URL for the tsv file of NCAS instruments + """ + vocab_version = self._get_github_latest_version( + "https://github.com/ncasuk/ncas-data-instrument-vocabs" + ) + file_loc = ( + "https://raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs" + ) + return f"{file_loc}/{vocab_version}/product-definitions/tsv/_instrument_vocabs/ncas-instrument-name-and-descriptors.tsv" + + def _get_community_instrument_tsv_url(self) -> str: + """ + Get the URL for the tsv file of NCAS instruments + """ + vocab_version = self._get_github_latest_version( + "https://github.com/ncasuk/ncas-data-instrument-vocabs" + ) + file_loc = ( + "https://raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs" + ) + return f"{file_loc}/{vocab_version}/product-definitions/tsv/_instrument_vocabs/community-instrument-name-and-descriptors.tsv" + + +def convert_instrument_dict_to_file_info( + instrument_dict: dict[ + str, dict[str, Union[str, list[str], dict[str, dict[str, Union[str, float]]]]] + ], + instrument_name: str, + data_product: str, + deployment_mode: str, + tag: str, +) -> FileInfo: + """ + Convert instrument_dict from tsv2dict.instrument_dict to a FileInfo class variable + + Args: + instrument_dict (dict): Dictionary made by tsv2dict.instrument_dict + instrument_name (str): Name of the instrument + data_product (str): Data product of data for netCDF file + deployment_mode (str): Deployment mode of instrument. One of "land", "sea", + "air", "trajectory" + tag (str): Tag release of AMF_CVs being used + + Returns: + FileInfo object with all instrument data from the dictionary + """ + instrument_file_info = FileInfo(instrument_name, data_product, deployment_mode, tag) + for prod in ["common", data_product]: + if "attributes" in instrument_dict[prod].keys(): + for attr_name, attr_dict in instrument_dict[prod]["attributes"].items(): + instrument_file_info.attributes[attr_name] = attr_dict + if "dimensions" in instrument_dict[prod].keys(): + for dim_name, dim_dict in instrument_dict[prod]["dimensions"].items(): + instrument_file_info.dimensions[dim_name] = dim_dict + if "variables" in instrument_dict[prod].keys(): + for var_name, var_dict in instrument_dict[prod]["variables"].items(): + instrument_file_info.variables[var_name] = var_dict + if "info" in instrument_dict.keys(): + for key, value in instrument_dict["info"].items(): + if ( + key == "Mobile/Fixed (loc)" + and value.split("-")[0].strip().lower() == "fixed" + ): + value = value.split("-")[1].strip() + instrument_file_info.instrument_data[key] = value + + return instrument_file_info diff --git a/src/ncas_amof_netcdf_template/tsv2dict.py b/src/ncas_amof_netcdf_template/tsv2dict.py index 2716c16..ddef150 100644 --- a/src/ncas_amof_netcdf_template/tsv2dict.py +++ b/src/ncas_amof_netcdf_template/tsv2dict.py @@ -130,6 +130,7 @@ def tsv2dict_instruments(tsv_file: str) -> dict[str, dict[str, str]]: for current_instrument in df_instruments.iloc: inst_dict = current_instrument.to_dict() inst_name = inst_dict.pop("New Instrument Name") + inst_dict["instrument_name"] = inst_name data_products = re.split(r",| |\|", inst_dict["Data Product(s)"]) data_products = list(filter(None, data_products)) inst_dict["Data Product(s)"] = data_products diff --git a/tests/test_create_netcdf.py b/tests/test_create_netcdf.py index 865e590..18da081 100644 --- a/tests/test_create_netcdf.py +++ b/tests/test_create_netcdf.py @@ -5,6 +5,7 @@ import tempfile import getpass import socket +import datetime as dt import ncas_amof_netcdf_template as nant @@ -35,7 +36,14 @@ def test_main_process(): os.remove("ncas-aws-10_somewhere-else_20221117_surface-met_v1.0.nc") -def test_add_attributes(): +@pytest.mark.parametrize( + "created_time", + [ + "2022-01-01T00:00:00Z", + None, + ], +) +def test_add_attributes(created_time): # Create a temporary file for testing temp_file = tempfile.NamedTemporaryFile(delete=False) temp_file.close() @@ -49,6 +57,8 @@ def test_add_attributes(): "Manufacturer": "Manufacturer", "Model No.": "Model Number", "Serial Number": "Serial Number", + "instrument_name": "instrument-name", + "Mobile/Fixed (loc)": "location1", }, "common": { "attributes": { @@ -74,11 +84,10 @@ def test_add_attributes(): } product = "product1" - created_time = "2022-01-01T00:00:00Z" location = "location1" loc = "land" use_local_files = None - tag = "v1.2.3" + tag = "v2.0.0" user = getpass.getuser() machine = socket.gethostname() @@ -115,10 +124,17 @@ def test_add_attributes(): ) assert ( ncfile.getncattr("amf_vocabularies_release") - == "https://github.com/ncasuk/AMF_CVs/releases/tag/v1.2.3" + == "https://github.com/ncasuk/AMF_CVs/releases/tag/v2.0.0" ) - assert ncfile.getncattr("history") == history_text - assert ncfile.getncattr("last_revised_date") == created_time + if created_time is not None: + assert ncfile.getncattr("history") == history_text + assert ncfile.getncattr("last_revised_date") == created_time + else: + # account for possibility of running test more than one second after making + # file, hoping not to be unlucky enough to run just before midnight + assert ncfile.getncattr("last_revised_date").startswith( + dt.datetime.now(tz=dt.timezone.utc).strftime("%Y%m%dT") + ) assert ncfile.getncattr("deployment_mode") == loc assert ( ncfile.getncattr("defined_attribute") @@ -131,6 +147,34 @@ def test_add_attributes(): ncfile.close() os.remove(temp_file.name) + with pytest.raises(ValueError, match=r".+'product' must be given.+"): + # Create a temporary file for testing + temp_file = tempfile.NamedTemporaryFile(delete=False) + temp_file.close() + + # Create a netCDF file for testing + ncfile = Dataset(temp_file.name, "w", format="NETCDF4") + nant.create_netcdf.add_attributes( + ncfile, + instrument_dict=instrument_dict, + created_time=created_time, + location=location, + loc=loc, + use_local_files=use_local_files, + tag=tag, + ) + + with pytest.raises(ValueError, match="No instrument file info given"): + # Create a temporary file for testing + temp_file = tempfile.NamedTemporaryFile(delete=False) + temp_file.close() + + # Create a netCDF file for testing + ncfile = Dataset(temp_file.name, "w", format="NETCDF4") + nant.create_netcdf.add_attributes( + ncfile, + ) + def test_add_dimensions(): # Create a temporary file for testing @@ -223,6 +267,9 @@ def test_add_variables(): }, } }, + "info": { + "instrument_name": "instrument-name", + }, } product = "product1" @@ -296,12 +343,13 @@ def test_make_netcdf(compression, complevel, shuffle): "Manufacturer": "Manufacturer", "Model No.": "Model Number", "Serial Number": "Serial Number", + "instrument_name": instrument, }, "common": { "dimensions": { - "time": None, - "latitude": None, - "longitude": None, + "time": {"Length": 5}, + "latitude": {"Length": 1}, + "longitude": {"Length": 1}, }, "variables": { "variable1": { @@ -349,7 +397,7 @@ def test_make_netcdf(compression, complevel, shuffle): product_version = "1.0" file_location = "." use_local_files = None - tag = "v1.2.3" + tag = "v2.0.0" chunk_by_dimension = {"time": 2} # Call the function @@ -397,7 +445,7 @@ def test_make_netcdf(compression, complevel, shuffle): ) assert ( ncfile.getncattr("amf_vocabularies_release") - == "https://github.com/ncasuk/AMF_CVs/releases/tag/v1.2.3" + == "https://github.com/ncasuk/AMF_CVs/releases/tag/v2.0.0" ) assert ncfile.getncattr("deployment_mode") == loc assert ( diff --git a/tests/test_tsv2dict.py b/tests/test_tsv2dict.py index 232ca54..831fc09 100644 --- a/tests/test_tsv2dict.py +++ b/tests/test_tsv2dict.py @@ -142,9 +142,18 @@ def test_tsv2dict_instruments(): # Check the result assert result == { - "instrument1": {"Data Product(s)": ["product1", "product2"]}, - "instrument2": {"Data Product(s)": ["product3", "product4"]}, - "instrument3": {"Data Product(s)": ["product5", "product6"]}, + "instrument1": { + "Data Product(s)": ["product1", "product2"], + "instrument_name": "instrument1", + }, + "instrument2": { + "Data Product(s)": ["product3", "product4"], + "instrument_name": "instrument2", + }, + "instrument3": { + "Data Product(s)": ["product5", "product6"], + "instrument_name": "instrument3", + }, } # Delete the temporary file