From 6e9b8efb7a62bcebbaa18ed7538c3ce5ef617c01 Mon Sep 17 00:00:00 2001 From: garciam Date: Tue, 8 Oct 2024 13:42:36 +0200 Subject: [PATCH] define z coordinate from space columns --- cdsobs/ingestion/api.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/cdsobs/ingestion/api.py b/cdsobs/ingestion/api.py index bd2a4d5..19c28b5 100644 --- a/cdsobs/ingestion/api.py +++ b/cdsobs/ingestion/api.py @@ -92,7 +92,23 @@ def validate_and_homogenise( ) else: data_renamed = data - # Check mandatory columns are present + # Add z coordinate if needed + if ( + "z_coordinate" not in data_renamed + and source_definition.space_columns is not None + and source_definition.space_columns.z is not None + ): + z_column = source_definition.sdefinpace_columns.z + logger.info(f"Using {z_column} to define z_coordinate") + # We copy it so the original can still be melted as a main_variable. + data_renamed["z_coordinate"] = data_renamed.loc[:, z_column].copy() + zcol2zcoordtype = dict(altitude=0, pressure=1) + data_renamed["z_coordinate_type"] = zcol2zcoordtype[z_column] + data_renamed["z_coordinate_type"] = data_renamed["z_coordinate_type"].astype( + "int" + ) + + # Check mandatory columns are present check_mandatory_columns(data_renamed, source_definition) # Cast data types to those specified in Service Definition file. cast_to_descriptions(data_renamed, source_definition) @@ -342,7 +358,7 @@ def _handle_aux_variables( homogenised_data_melted = homogenised_data_melted.drop(qf_col.name, axis=1) # Ensure is int and fill nans with 3 (missing according to the CDM) homogenised_data_melted["quality_flag"] = ( - homogenised_data_melted["quality_flag"].fillna(3).astype("int") + homogenised_data_melted["quality_flag"].fillna(3).astype("uint8") ) # Add processing level if melt_columns.processing_level: @@ -373,7 +389,13 @@ def _add_uncertainty_fields( uncertainty_type_name = f"uncertainty_type{unc_type_code}" uncertainty_units_name = f"uncertainty_units{unc_type_code}" homogenised_data_melted[uncertainty_value_name] = numpy.nan + homogenised_data_melted[uncertainty_value_name] = homogenised_data_melted[ + uncertainty_value_name + ].astype("float32") homogenised_data_melted[uncertainty_type_name] = unc_type_code + homogenised_data_melted[uncertainty_type_name] = homogenised_data_melted[ + uncertainty_type_name + ].astype("uint8") homogenised_data_melted[uncertainty_units_name] = "NA" for unc_col in unc_cols: