From 7d413ec0856acafe03cf7a276e87dd3cbfe9a13e Mon Sep 17 00:00:00 2001 From: Praneeth Ratna <63547155+praneethratna@users.noreply.github.com> Date: Sat, 7 Oct 2023 06:57:48 +0530 Subject: [PATCH] added support for AZFP multiple phase attributes parsing (#1182) * added support for multiple phase attributes parsing * minor changes to code * Update echopype/convert/set_groups_azfp.py Co-authored-by: Emilio Mayorga * Update set_groups_azfp.py * Update test_convert_azfp.py --------- Co-authored-by: Emilio Mayorga --- echopype/convert/parse_azfp.py | 49 +++++++----- echopype/convert/set_groups_azfp.py | 60 +++++++++++---- echopype/tests/convert/test_convert_azfp.py | 82 +++++++++++++-------- 3 files changed, 125 insertions(+), 66 deletions(-) diff --git a/echopype/convert/parse_azfp.py b/echopype/convert/parse_azfp.py index 2c031a53f..476be16bf 100644 --- a/echopype/convert/parse_azfp.py +++ b/echopype/convert/parse_azfp.py @@ -91,26 +91,35 @@ def load_AZFP_xml(self): """ xmlmap = fsspec.get_mapper(self.xml_path, **self.storage_options) - root = ET.parse(xmlmap.fs.open(xmlmap.root)).getroot() - - for child in root.iter(): - if len(child.tag) > 3 and not child.tag.startswith("VTX"): - camel_case_tag = camelcase2snakecase(child.tag) - else: - camel_case_tag = child.tag - if len(child.attrib) > 0: - for key, val in child.attrib.items(): - self.parameters[camel_case_tag + "_" + camelcase2snakecase(key)].append(val) - - if all(char == "\n" for char in child.text): - continue - else: - try: - val = int(child.text) - except ValueError: - val = float(child.text) - - self.parameters[camel_case_tag].append(val) + phase_number = None + for event, child in ET.iterparse(xmlmap.fs.open(xmlmap.root), events=("start", "end")): + if event == "end" and child.tag == "Phases": + phase_number = None + if event == "start": + if len(child.tag) > 3 and not child.tag.startswith("VTX"): + camel_case_tag = camelcase2snakecase(child.tag) + else: + camel_case_tag = child.tag + + if len(child.attrib) > 0: + for key, val in child.attrib.items(): + attrib_tag = camel_case_tag + "_" + camelcase2snakecase(key) + if phase_number is not None and camel_case_tag != "phase": + attrib_tag += f"_phase{phase_number}" + self.parameters[attrib_tag].append(val) + if child.tag == "Phase": + phase_number = val + + if all(char == "\n" for char in child.text): + continue + else: + try: + val = int(child.text) + except ValueError: + val = float(child.text) + if phase_number is not None and camel_case_tag != "phase": + camel_case_tag += f"_phase{phase_number}" + self.parameters[camel_case_tag].append(val) # Handling the case where there is only one value for each parameter for key, val in self.parameters.items(): diff --git a/echopype/convert/set_groups_azfp.py b/echopype/convert/set_groups_azfp.py index 9c37eec1e..6049d58b8 100644 --- a/echopype/convert/set_groups_azfp.py +++ b/echopype/convert/set_groups_azfp.py @@ -81,7 +81,7 @@ def _create_unique_channel_name(self): """ serial_number = self.parser_obj.unpacked_data["serial_number"] - frequency_number = self.parser_obj.parameters["frequency_number"] + frequency_number = self.parser_obj.parameters["frequency_number_phase1"] if serial_number.size == 1: freq_as_str = self.freq_sorted.astype(int).astype(str) @@ -500,7 +500,25 @@ def set_vendor(self) -> xr.Dataset: unpacked_data = self.parser_obj.unpacked_data parameters = self.parser_obj.parameters ping_time = self.parser_obj.ping_time - tdn = parameters["pulse_len"][self.freq_ind_sorted] / 1e6 + phase_params = ["burst_interval", "pings_per_burst", "average_burst_pings"] + phase_freq_params = [ + "dig_rate", + "range_samples", + "range_averaging_samples", + "lock_out_index", + "gain", + "storage_format", + ] + tdn = [] + for num in parameters["phase_number"]: + tdn.append(parameters[f"pulse_len_phase{num}"][self.freq_ind_sorted] / 1e6) + tdn = np.array(tdn) + for param in phase_freq_params: + for num in parameters["phase_number"]: + parameters[param].append(parameters[f"{param}_phase{num}"][self.freq_ind_sorted]) + for param in phase_params: + for num in parameters["phase_number"]: + parameters[param].append(parameters[f"{param}_phase{num}"]) anc = np.array(unpacked_data["ancillary"]) # convert to np array for easy slicing # Build variables in the output xarray Dataset @@ -639,13 +657,13 @@ def set_vendor(self) -> xr.Dataset: ), # parameters with channel dimension from XML file "XML_transmit_duration_nominal": ( - ["channel"], + ["phase_number", "channel"], tdn, {"long_name": "(From XML file) Nominal bandwidth of transmitted pulse"}, ), # tdn comes from parameters "XML_gain_correction": ( - ["channel"], - parameters["gain"][self.freq_ind_sorted], + ["phase_number", "channel"], + parameters["gain"], {"long_name": "(From XML file) Gain correction"}, ), "instrument_type": parameters["instrument_type"][0], @@ -660,8 +678,8 @@ def set_vendor(self) -> xr.Dataset: "parameter_version": parameters["parameter_version"], "configuration_version": parameters["configuration_version"], "XML_digitization_rate": ( - ["channel"], - parameters["dig_rate"][self.freq_ind_sorted], + ["phase_number", "channel"], + parameters["dig_rate"], { "long_name": "(From XML file) Number of samples per second in kHz that is " "processed by the A/D converter when digitizing the returned acoustic " @@ -669,8 +687,8 @@ def set_vendor(self) -> xr.Dataset: }, ), "XML_lockout_index": ( - ["channel"], - parameters["lock_out_index"][self.freq_ind_sorted], + ["phase_number", "channel"], + parameters["lock_out_index"], { "long_name": "(From XML file) The distance, rounded to the nearest " "Bin Size after the pulse is transmitted that over which AZFP will " @@ -713,17 +731,17 @@ def set_vendor(self) -> xr.Dataset: ), "Sv_offset": (["channel"], Sv_offset), "number_of_samples_digitized_per_pings": ( - ["channel"], - parameters["range_samples"][self.freq_ind_sorted], + ["phase_number", "channel"], + parameters["range_samples"], ), "number_of_digitized_samples_averaged_per_pings": ( - ["channel"], - parameters["range_averaging_samples"][self.freq_ind_sorted], + ["phase_number", "channel"], + parameters["range_averaging_samples"], ), # parameters with dim len=0 from XML file "XML_sensors_flag": parameters["sensors_flag"], "XML_burst_interval": ( - [], + ["phase_number"], parameters["burst_interval"], { "long_name": "Time in seconds between bursts or between pings if the burst " @@ -732,8 +750,14 @@ def set_vendor(self) -> xr.Dataset: ), "XML_sonar_serial_number": parameters["serial_number"], "number_of_frequency": parameters["num_freq"], - "number_of_pings_per_burst": parameters["pings_per_burst"], - "average_burst_pings_flag": parameters["average_burst_pings"], + "number_of_pings_per_burst": ( + ["phase_number"], + parameters["pings_per_burst"], + ), + "average_burst_pings_flag": ( + ["phase_number"], + parameters["average_burst_pings"], + ), # temperature coefficients from XML file **{ f"temperature_k{var}": ( @@ -789,6 +813,10 @@ def set_vendor(self) -> xr.Dataset: list(range(len(unpacked_data["ancillary"][0]))), ), "ad_len": (["ad_len"], list(range(len(unpacked_data["ad"][0])))), + "phase_number": ( + ["phase_number"], + sorted([int(num) for num in parameters["phase_number"]]), + ), }, ) return set_time_encodings(ds) diff --git a/echopype/tests/convert/test_convert_azfp.py b/echopype/tests/convert/test_convert_azfp.py index 4b8f411b6..48d4139bb 100644 --- a/echopype/tests/convert/test_convert_azfp.py +++ b/echopype/tests/convert/test_convert_azfp.py @@ -178,13 +178,13 @@ def test_convert_azfp_01a_notemperature_notilt(azfp_path): def test_load_parse_azfp_xml(azfp_path): - azfp_01a_path = azfp_path / '17082117.01A' - azfp_xml_path = azfp_path / '17030815.XML' - parseAZFP = ParseAZFP(str(azfp_01a_path), str(azfp_xml_path)) + azfp_xml_path = azfp_path / '23081211.XML' + parseAZFP = ParseAZFP(None, str(azfp_xml_path)) parseAZFP.load_AZFP_xml() expected_params = ['instrument_type_string', 'instrument_type', 'major', 'minor', 'date', 'program_name', 'program', 'CPU', 'serial_number', 'board_version', - 'file_version', 'parameter_version', 'configuration_version', 'eclock', + 'file_version', 'parameter_version', 'configuration_version', 'backplane', + 'delay_transmission_string', 'delay_transmission', 'eclock', 'digital_board_version', 'sensors_flag_pressure_sensor_installed', 'sensors_flag_paros_installed', 'sensors_flag', 'U0', 'Y1', 'Y2', 'Y3', 'C1', 'C2', 'C3', 'D1', 'D2', 'T1', 'T2', 'T3', 'T4', 'T5', 'X_a', 'X_b', @@ -194,33 +194,55 @@ def test_load_parse_azfp_xml(azfp_path): 'VTX3', 'BP', 'EL', 'DS', 'min_pulse_len', 'sound_speed', 'start_date_svalue', 'start_date', 'num_frequencies', 'num_phases', 'data_output_svalue', 'data_output', 'frequency_units', 'frequency', - 'phase_number', 'phase_type_svalue', 'phase_type', 'duration_svalue', - 'duration', 'ping_period_units', 'ping_period', 'burst_interval_units', - 'burst_interval', 'pings_per_burst_units', 'pings_per_burst', - 'average_burst_pings_units', 'average_burst_pings', 'frequency_number', - 'acquire_frequency_units', 'acquire_frequency', 'pulse_len_units', - 'pulse_len', 'dig_rate_units', 'dig_rate', 'range_samples_units', - 'range_samples', 'range_averaging_samples_units', 'range_averaging_samples', - 'lock_out_index_units', 'lock_out_index', 'gain_units', 'gain', - 'storage_format_units', 'storage_format'] + 'phase_number', 'start_date_svalue_phase1', 'start_date_phase1', + 'phase_type_svalue_phase1', 'phase_type_phase1', 'duration_svalue_phase1', + 'duration_phase1', 'ping_period_units_phase1', 'ping_period_phase1', + 'burst_interval_units_phase1', 'burst_interval_phase1', + 'pings_per_burst_units_phase1', 'pings_per_burst_phase1', + 'average_burst_pings_units_phase1', 'average_burst_pings_phase1', + 'frequency_number_phase1', 'acquire_frequency_units_phase1', + 'acquire_frequency_phase1', 'pulse_len_units_phase1', 'pulse_len_phase1', + 'dig_rate_units_phase1', 'dig_rate_phase1', 'range_samples_units_phase1', + 'range_samples_phase1', 'range_averaging_samples_units_phase1', + 'range_averaging_samples_phase1', 'lock_out_index_units_phase1', + 'lock_out_index_phase1', 'gain_units_phase1', 'gain_phase1', + 'storage_format_units_phase1', 'storage_format_phase1', + 'start_date_svalue_phase2', 'start_date_phase2', 'phase_type_svalue_phase2', + 'phase_type_phase2', 'duration_svalue_phase2', 'duration_phase2', + 'ping_period_units_phase2', 'ping_period_phase2', + 'burst_interval_units_phase2', 'burst_interval_phase2', + 'pings_per_burst_units_phase2', 'pings_per_burst_phase2', + 'average_burst_pings_units_phase2', 'average_burst_pings_phase2', + 'frequency_number_phase2', 'acquire_frequency_units_phase2', + 'acquire_frequency_phase2', 'pulse_len_units_phase2', 'pulse_len_phase2', + 'dig_rate_units_phase2', 'dig_rate_phase2', 'range_samples_units_phase2', + 'range_samples_phase2', 'range_averaging_samples_units_phase2', + 'range_averaging_samples_phase2', 'lock_out_index_units_phase2', + 'lock_out_index_phase2', 'gain_units_phase2', 'gain_phase2', + 'storage_format_units_phase2', 'storage_format_phase2', 'rt_version', + 'rt_frequency', 'enabled', 'direction', 'water_depth_high_tide', + 'instrument_depth_high_tide'] assert set(parseAZFP.parameters.keys()) == set(expected_params) assert list(set(parseAZFP.parameters['instrument_type_string']))[0] == 'AZFP' assert isinstance(parseAZFP.parameters['num_freq'], int) - assert isinstance(parseAZFP.parameters['pulse_len'], list) assert parseAZFP.parameters['num_freq'] == 4 - assert len(parseAZFP.parameters['frequency_number']) == 4 - assert parseAZFP.parameters['frequency_number'] == ['1', '2', '3', '4'] - assert parseAZFP.parameters['kHz'] == [125, 200, 455, 769] - - expected_len_params = ['acquire_frequency', 'pulse_len', 'dig_rate', 'range_samples', - 'range_averaging_samples', 'lock_out_index', 'gain', 'storage_format'] - assert all(len(parseAZFP.parameters[x]) == 4 for x in expected_len_params) - assert parseAZFP.parameters['acquire_frequency'] == [1, 1, 1, 1] - assert parseAZFP.parameters['pulse_len'] == [300, 300, 300, 300] - assert parseAZFP.parameters['dig_rate'] == [20000, 20000, 20000, 20000] - assert parseAZFP.parameters['range_samples'] == [1752, 1752, 1764, 540] - assert parseAZFP.parameters['range_averaging_samples'] == [4, 4, 4, 4] - assert parseAZFP.parameters['lock_out_index'] == [0, 0, 0, 0] - assert parseAZFP.parameters['gain'] == [1, 1, 1, 1] - assert parseAZFP.parameters['storage_format'] == [1, 1, 1, 1] - + assert parseAZFP.parameters['kHz'] == [67, 120, 200, 455] + + expected_len_params = ['acquire_frequency', 'pulse_len', 'dig_rate', + 'range_samples', 'range_averaging_samples', + 'lock_out_index', 'gain', 'storage_format'] + for num in parseAZFP.parameters["phase_number"]: + assert isinstance(parseAZFP.parameters[f"pulse_len_phase{num}"], list) + assert len(parseAZFP.parameters[f"acquire_frequency_phase{num}"]) == 4 + assert all(len(parseAZFP.parameters[f"{x}_phase{num}"]) == 4 for x in expected_len_params) + assert parseAZFP.parameters[f"frequency_number_phase{num}"] == ['1', '2', '3', '4'] + assert parseAZFP.parameters[f"acquire_frequency_phase{num}"] == [1, 1, 1, 1] + assert parseAZFP.parameters[f"dig_rate_phase{num}"] == [20000, 20000, 20000, 20000] + assert parseAZFP.parameters[f"range_averaging_samples_phase{num}"] == [1, 1, 1, 1] + assert parseAZFP.parameters[f"lock_out_index_phase{num}"] == [0, 0, 0, 0] + assert parseAZFP.parameters[f"gain_phase{num}"] == [1, 1, 1, 1] + assert parseAZFP.parameters[f"storage_format_phase{num}"] == [0, 0, 0, 0] + assert parseAZFP.parameters['pulse_len_phase1'] == [1000, 1000, 1000, 1000] + assert parseAZFP.parameters['pulse_len_phase2'] == [0, 0, 0, 0] + assert parseAZFP.parameters['range_samples_phase1'] == [8273, 8273, 8273, 8273] + assert parseAZFP.parameters['range_samples_phase2'] == [2750, 2750, 2750, 2750]