Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: File inputs not able to be a single file #190

Merged
merged 4 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 17 additions & 14 deletions src/gnatss/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ def load_configuration(config_yaml: Optional[str] = None) -> Configuration:
return config


def load_sound_speed(sv_file: str) -> pd.DataFrame:
def load_sound_speed(sv_files: List[str]) -> pd.DataFrame:
"""
Loads sound speed file data into pandas dataframe

Parameters
----------
sv_file : str
Path to the sound speed file to be loaded
sv_files : List[str]
The list of path string to the sound speed files to be loaded

Returns
-------
Expand All @@ -48,13 +48,14 @@ def load_sound_speed(sv_file: str) -> pd.DataFrame:
"""
columns = [constants.SP_DEPTH, constants.SP_SOUND_SPEED]

# Read sound speed
return pd.read_csv(
sv_file,
delim_whitespace=True,
header=None,
names=columns,
)
sv_dfs = [
pd.read_csv(sv_file, delim_whitespace=True, header=None, names=columns)
.drop_duplicates(constants.SP_DEPTH)
.reset_index(drop=True)
for sv_file in sv_files
]

return pd.concat(sv_dfs).reset_index(drop=True)


def load_travel_times(
Expand Down Expand Up @@ -260,14 +261,14 @@ def load_gps_solutions(


def load_deletions(
file_path: str, config: Configuration, time_scale="tt"
file_paths: List[str], config: Configuration, time_scale="tt"
) -> pd.DataFrame:
"""
Loads the raw deletion text file into a pandas dataframe

Parameters
----------
file_path : str
file_paths : List[str]
Path to the deletion file to be loaded
config : Configuration
The configuration object
Expand All @@ -286,10 +287,12 @@ def load_deletions(
output_path = Path(config.output.path)
# TODO: Add additional files to be used for deletions
default_deletions = output_path / CSVOutput.deletions.value
if file_path:
if file_paths:
from .utilities.time import AstroTime

cut_df = pd.read_fwf(file_path, header=None)
cut_dfs = [pd.read_fwf(file_path, header=None) for file_path in file_paths]
cut_df = pd.concat(cut_dfs).reset_index(drop=True)

# Date example: 28-JUL-22 12:30:00
cut_df[constants.DEL_STARTTIME] = pd.to_datetime(
cut_df[0] + "T" + cut_df[1], format="%d-%b-%yT%H:%M:%S"
Expand Down
4 changes: 2 additions & 2 deletions src/gnatss/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

def gather_files(
config: Configuration, proc: Literal["solver", "posfilter"] = "solver"
) -> Dict[str, Any]:
) -> Dict[str, List[str]]:
"""Gather file paths for the various dataset files

Parameters
Expand Down Expand Up @@ -56,7 +56,7 @@ def gather_files(
if "**" in path:
all_files = fs.glob(path)
else:
all_files = path
all_files = [path]

all_files_dict.setdefault(k, all_files)
return all_files_dict
Expand Down
19 changes: 13 additions & 6 deletions tests/test_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,25 +331,31 @@ def test_load_deletions_outliers_and_deletions_from_config(
configuration,
create_and_cleanup_outliers_file,
):
# Use config.yaml to load deletions file
# Use config.yaml to load deletions files
configuration.solver.input_files.deletions = InputData(
path="./tests/data/2022/**/deletns.dat"
)

config_deletions_file = gather_files(configuration)["deletions"][0]
config_deletions_files = gather_files(configuration)["deletions"]
outliers_file = Path(configuration.output.path) / CSVOutput.outliers.value
deletions_file = Path(configuration.output.path) / CSVOutput.deletions.value

outliers_rows = pd.read_csv(outliers_file).shape[0]
config_deletions_rows = pd.read_fwf(config_deletions_file, header=None).shape[0]
config_deletions_rows = sum(
[
pd.read_fwf(config_deletions_file, header=None).shape[0]
for config_deletions_file in config_deletions_files
]
)

# Verify outliers_file and config_deletions_file is present and
# output deletions_file is not present before calling load_deletions()
assert outliers_file.is_file()
assert Path(config_deletions_file).is_file()
for config_deletions_file in config_deletions_files:
assert Path(config_deletions_file).is_file()
assert not deletions_file.is_file()

loaded_deletions_df = load_deletions(config_deletions_file, configuration, "tt")
loaded_deletions_df = load_deletions(config_deletions_files, configuration, "tt")

# Assert concatenation of outliers and deletions df
assert loaded_deletions_df.shape[0] == outliers_rows + config_deletions_rows
Expand All @@ -362,4 +368,5 @@ def test_load_deletions_outliers_and_deletions_from_config(
# deletions_file and config_deletions_file are present after calling load_deletions()
assert not outliers_file.is_file()
assert deletions_file.is_file()
assert Path(config_deletions_file).is_file()
for config_deletions_file in config_deletions_files:
assert Path(config_deletions_file).is_file()