seafloor-geodesy · lsetiawan · Nov 27, 2023 · Nov 24, 2023 · Nov 24, 2023 · Nov 27, 2023
diff --git a/src/gnatss/loaders.py b/src/gnatss/loaders.py
@@ -32,14 +32,14 @@ def load_configuration(config_yaml: Optional[str] = None) -> Configuration:
     return config
 
 
-def load_sound_speed(sv_file: str) -> pd.DataFrame:
+def load_sound_speed(sv_files: List[str]) -> pd.DataFrame:
     """
     Loads sound speed file data into pandas dataframe
 
     Parameters
     ----------
-    sv_file : str
-        Path to the sound speed file to be loaded
+    sv_files : List[str]
+        The list of path string to the sound speed files to be loaded
 
     Returns
     -------
@@ -48,13 +48,14 @@ def load_sound_speed(sv_file: str) -> pd.DataFrame:
     """
     columns = [constants.SP_DEPTH, constants.SP_SOUND_SPEED]
 
-    # Read sound speed
-    return pd.read_csv(
-        sv_file,
-        delim_whitespace=True,
-        header=None,
-        names=columns,
-    )
+    sv_dfs = [
+        pd.read_csv(sv_file, delim_whitespace=True, header=None, names=columns)
+        .drop_duplicates(constants.SP_DEPTH)
+        .reset_index(drop=True)
+        for sv_file in sv_files
+    ]
+
+    return pd.concat(sv_dfs).reset_index(drop=True)
 
 
 def load_travel_times(
@@ -260,14 +261,14 @@ def load_gps_solutions(
 
 
 def load_deletions(
-    file_path: str, config: Configuration, time_scale="tt"
+    file_paths: List[str], config: Configuration, time_scale="tt"
 ) -> pd.DataFrame:
     """
     Loads the raw deletion text file into a pandas dataframe
 
     Parameters
     ----------
-    file_path : str
+    file_paths : List[str]
         Path to the deletion file to be loaded
     config : Configuration
         The configuration object
@@ -286,10 +287,12 @@ def load_deletions(
     output_path = Path(config.output.path)
     # TODO: Add additional files to be used for deletions
     default_deletions = output_path / CSVOutput.deletions.value
-    if file_path:
+    if file_paths:
         from .utilities.time import AstroTime
 
-        cut_df = pd.read_fwf(file_path, header=None)
+        cut_dfs = [pd.read_fwf(file_path, header=None) for file_path in file_paths]
+        cut_df = pd.concat(cut_dfs).reset_index(drop=True)
+
         # Date example: 28-JUL-22 12:30:00
         cut_df[constants.DEL_STARTTIME] = pd.to_datetime(
             cut_df[0] + "T" + cut_df[1], format="%d-%b-%yT%H:%M:%S"

diff --git a/src/gnatss/main.py b/src/gnatss/main.py
@@ -27,7 +27,7 @@
 
 def gather_files(
     config: Configuration, proc: Literal["solver", "posfilter"] = "solver"
-) -> Dict[str, Any]:
+) -> Dict[str, List[str]]:
     """Gather file paths for the various dataset files
 
     Parameters
@@ -56,7 +56,7 @@ def gather_files(
             if "**" in path:
                 all_files = fs.glob(path)
             else:
-                all_files = path
+                all_files = [path]
 
             all_files_dict.setdefault(k, all_files)
     return all_files_dict

diff --git a/tests/test_loaders.py b/tests/test_loaders.py
@@ -331,25 +331,31 @@ def test_load_deletions_outliers_and_deletions_from_config(
     configuration,
     create_and_cleanup_outliers_file,
 ):
-    # Use config.yaml to load deletions file
+    # Use config.yaml to load deletions files
     configuration.solver.input_files.deletions = InputData(
         path="./tests/data/2022/**/deletns.dat"
     )
 
-    config_deletions_file = gather_files(configuration)["deletions"][0]
+    config_deletions_files = gather_files(configuration)["deletions"]
     outliers_file = Path(configuration.output.path) / CSVOutput.outliers.value
     deletions_file = Path(configuration.output.path) / CSVOutput.deletions.value
 
     outliers_rows = pd.read_csv(outliers_file).shape[0]
-    config_deletions_rows = pd.read_fwf(config_deletions_file, header=None).shape[0]
+    config_deletions_rows = sum(
+        [
+            pd.read_fwf(config_deletions_file, header=None).shape[0]
+            for config_deletions_file in config_deletions_files
+        ]
+    )
 
     # Verify outliers_file and config_deletions_file is present and
     # output deletions_file is not present before calling load_deletions()
     assert outliers_file.is_file()
-    assert Path(config_deletions_file).is_file()
+    for config_deletions_file in config_deletions_files:
+        assert Path(config_deletions_file).is_file()
     assert not deletions_file.is_file()
 
-    loaded_deletions_df = load_deletions(config_deletions_file, configuration, "tt")
+    loaded_deletions_df = load_deletions(config_deletions_files, configuration, "tt")
 
     # Assert concatenation of outliers and deletions df
     assert loaded_deletions_df.shape[0] == outliers_rows + config_deletions_rows
@@ -362,4 +368,5 @@ def test_load_deletions_outliers_and_deletions_from_config(
     # deletions_file and config_deletions_file are present after calling load_deletions()
     assert not outliers_file.is_file()
     assert deletions_file.is_file()
-    assert Path(config_deletions_file).is_file()
+    for config_deletions_file in config_deletions_files:
+        assert Path(config_deletions_file).is_file()