Skip to content

Commit

Permalink
Add codespell and fix code typos
Browse files Browse the repository at this point in the history
  • Loading branch information
ghiggi committed Oct 25, 2023
1 parent 6c6783a commit d933add
Show file tree
Hide file tree
Showing 47 changed files with 197 additions and 187 deletions.
7 changes: 7 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,10 @@ repos:
hooks:
- id: blackdoc
additional_dependencies: ["black[jupyter]"]

- repo: https://github.com/codespell-project/codespell
rev: v2.2.6
hooks:
- id: codespell
types_or: [python, markdown]
additional_dependencies: [tomli]
2 changes: 1 addition & 1 deletion disdrodb/api/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def check_url(url: str) -> bool:
Returns
-------
bool
True if url well formated, False if not well formated.
True if url well formatted, False if not well formatted.
"""
regex = r"^(https?:\/\/)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)$" # noqa: E501

Expand Down
2 changes: 1 addition & 1 deletion disdrodb/api/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def get_key_from_filepaths(fpaths, key):

####--------------------------------------------------------------------------.
####################################
#### DISDRODB File Informations ####
#### DISDRODB File Information ####
####################################


Expand Down
20 changes: 10 additions & 10 deletions disdrodb/api/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,11 +234,11 @@ def check_data_sources(disdrodb_dir, product_level, data_sources):
dir_path = _get_disdrodb_directory(disdrodb_dir=disdrodb_dir, product_level=product_level)
# Get data sources directory
list_dir = os.listdir(dir_path)
# Check if there are unvalid data_sources
idx_unvalid = np.where(np.isin(data_sources, list_dir, invert=True))[0]
if len(idx_unvalid) > 0:
unvalid_data_sources = data_sources[idx_unvalid].tolist()
raise ValueError(f"These data sources are unvalid: {unvalid_data_sources}.")
# Check if there are invalid data_sources
idx_invalid = np.where(np.isin(data_sources, list_dir, invert=True))[0]
if len(idx_invalid) > 0:
invalid_data_sources = data_sources[idx_invalid].tolist()
raise ValueError(f"These data sources are invalid: {invalid_data_sources}.")
# Return data_sources list
data_sources = data_sources.tolist()
return data_sources
Expand All @@ -265,11 +265,11 @@ def _check_campaign_names(disdrodb_dir, product_level, campaign_names):
list_campaign_names = [os.path.basename(path) for path in list_campaigns_path]
# Remove duplicates
list_campaign_names = np.unique(list_campaign_names)
# Check if there are unvalid campaign_names
idx_unvalid = np.where(np.isin(campaign_names, list_campaign_names, invert=True))[0]
if len(idx_unvalid) > 0:
unvalid_campaign_names = campaign_names[idx_unvalid].tolist()
raise ValueError(f"These campaign names are unvalid: {unvalid_campaign_names}.")
# Check if there are invalid campaign_names
idx_invalid = np.where(np.isin(campaign_names, list_campaign_names, invert=True))[0]
if len(idx_invalid) > 0:
invalid_campaign_names = campaign_names[idx_invalid].tolist()
raise ValueError(f"These campaign names are invalid: {invalid_campaign_names}.")
# Return campaign_names list
campaign_names = campaign_names.tolist()
return campaign_names
Expand Down
4 changes: 2 additions & 2 deletions disdrodb/l0/check_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,9 +339,9 @@ def check_raw_array(sensor_name: str) -> None:
l0b_encodings = read_config_yml(sensor_name, "l0b_encodings.yml")

for key, list_velocity_or_diameter in dict_keys_with_dimension_order.items():
expected_lenght = len(list_velocity_or_diameter) + 1
expected_length = len(list_velocity_or_diameter) + 1
current_length = len(l0b_encodings.get(key).get("chunksizes"))
if expected_lenght != current_length:
if expected_length != current_length:
raise ValueError(f"Wrong chunksizes for {key} in l0b_encodings.yml for sensor {sensor_name}.")

# Get chunksizes in l0b_encoding.yml and check that if len > 1, has dimension_order key in raw_data_format
Expand Down
4 changes: 2 additions & 2 deletions disdrodb/l0/check_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ def check_metadata_geolocation(metadata) -> None:
if longitude == -9999 or latitude == -9999:
raise ValueError("Missing lat lon coordinates (-9999).")
elif longitude > 180 or longitude < -180:
raise ValueError("Unvalid longitude (outside [-180, 180])")
raise ValueError("Invalid longitude (outside [-180, 180])")
elif latitude > 90 or latitude < -90:
raise ValueError("Unvalid latitude (outside [-90, 90])")
raise ValueError("Invalid latitude (outside [-90, 90])")
else:
pass
return None
Expand Down
8 changes: 4 additions & 4 deletions disdrodb/l0/check_standards.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,9 @@ def check_l0a_column_names(df: pd.DataFrame, sensor_name: str) -> None:
df_columns = set(df_columns)
# --------------------------------------------
# Check there aren't valid columns
unvalid_columns = list(df_columns.difference(valid_columns))
if len(unvalid_columns) > 0:
msg = f"The following columns do no met the DISDRODB standards: {unvalid_columns}"
invalid_columns = list(df_columns.difference(valid_columns))
if len(invalid_columns) > 0:
msg = f"The following columns do no met the DISDRODB standards: {invalid_columns}"
logger.error(msg)
raise ValueError(msg)
# --------------------------------------------
Expand All @@ -194,7 +194,7 @@ def check_l0a_standards(df: pd.DataFrame, sensor_name: str, verbose: bool = True
sensor_name : str
Name of the sensor.
verbose : bool, optional
Wheter to verbose the processing.
Whether to verbose the processing.
The default is True.
Raises
Expand Down
2 changes: 1 addition & 1 deletion disdrodb/l0/configs/Thies_LPM/raw_data_format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ optical_control_voltage_output:
n_naturals: 0
data_range:
- 0 # 1000
- 9999 # 6500 in the doc, 6660 occurence found
- 9999 # 6500 in the doc, 6660 occurrence found
nan_flags: null

sensor_voltage_supply:
Expand Down
28 changes: 14 additions & 14 deletions disdrodb/l0/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,19 @@ def get_disdrodb_dir(path: str) -> str:
p = Path(path)
list_path_elements = [str(part) for part in p.parts]
# Retrieve where "DISDRODB" directory occurs
idx_occurence = np.where(np.isin(list_path_elements, "DISDRODB"))[0]
idx_occurrence = np.where(np.isin(list_path_elements, "DISDRODB"))[0]
# If DISDRODB directory not present, raise error
if len(idx_occurence) == 0:
if len(idx_occurrence) == 0:
raise ValueError(f"The DISDRODB directory is not present in {path}")
# Find the rightermost occurence
right_most_occurence = max(idx_occurence)
# Find the rightermost occurrence
right_most_occurrence = max(idx_occurrence)
# Define the disdrodb_dir path
disdrodb_dir = os.path.join(*list_path_elements[: right_most_occurence + 1])
disdrodb_dir = os.path.join(*list_path_elements[: right_most_occurrence + 1])
return disdrodb_dir


def get_disdrodb_path(path: str) -> str:
"""Return the path fron the disdrodb_dir directory.
"""Return the path from the disdrodb_dir directory.
Current assumption: no data_source, campaign_name, station_name or file contain the word DISDRODB!
Expand All @@ -85,14 +85,14 @@ def get_disdrodb_path(path: str) -> str:
p = Path(path)
list_path_elements = [str(part) for part in p.parts]
# Retrieve where "DISDRODB" directory occurs
idx_occurence = np.where(np.isin(list_path_elements, "DISDRODB"))[0]
idx_occurrence = np.where(np.isin(list_path_elements, "DISDRODB"))[0]
# If DISDRODB directory not present, raise error
if len(idx_occurence) == 0:
if len(idx_occurrence) == 0:
raise ValueError(f"The DISDRODB directory is not present in {path}")
# Find the rightermost occurence
right_most_occurence = max(idx_occurence)
# Find the rightermost occurrence
right_most_occurrence = max(idx_occurrence)
# Define the disdrodb path
disdrodb_fpath = os.path.join(*list_path_elements[right_most_occurence:])
disdrodb_fpath = os.path.join(*list_path_elements[right_most_occurrence:])
return disdrodb_fpath


Expand Down Expand Up @@ -425,7 +425,7 @@ def get_raw_file_list(raw_dir, station_name, glob_patterns, verbose=False, debug
station_name : str
ID of the station
verbose : bool, optional
Wheter to verbose the processing.
Whether to verbose the processing.
The default is False.
debugging_mode : bool, optional
If True, it select maximum 3 files for debugging purposes.
Expand Down Expand Up @@ -523,7 +523,7 @@ def _check_directory_exist(dir_path):
def _create_directory(path: str, exist_ok=True) -> None:
"""Create a directory."""
if not isinstance(path, str):
raise TypeError("'path' must be a strig.")
raise TypeError("'path' must be a string.")
try:
os.makedirs(path, exist_ok=exist_ok)
logger.debug(f"Created directory {path}.")
Expand Down Expand Up @@ -768,7 +768,7 @@ def check_raw_dir(raw_dir: str, verbose: bool = False) -> None:
raw_dir : str
Input raw directory
verbose : bool, optional
Wheter to verbose the processing.
Whether to verbose the processing.
The default is False.
"""
Expand Down
10 changes: 5 additions & 5 deletions disdrodb/l0/issue.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def check_timesteps(timesteps):
elif is_numpy_array_datetime(timesteps):
timesteps = _check_timestep_datetime_accuracy(timesteps, unit="s")
else:
raise TypeError("Unvalid timesteps input.")
raise TypeError("Invalid timesteps input.")
return timesteps


Expand Down Expand Up @@ -182,7 +182,7 @@ def check_time_periods(time_periods):
# Check time period start occur before end
for time_period in new_time_periods:
if time_period[0] > time_period[1]:
msg = f"The {time_period} time_period is unvalid. Start time occurs after end time."
msg = f"The {time_period} time_period is invalid. Start time occurs after end time."
log_error(logger, msg=msg, verbose=False)
raise ValueError(msg)
return new_time_periods
Expand Down Expand Up @@ -213,9 +213,9 @@ def check_issue_dict(issue_dict):
# Check there are only timesteps and time_periods keys
valid_keys = ["timesteps", "time_periods"]
keys = list(issue_dict.keys())
unvalid_keys = [k for k in keys if k not in valid_keys]
if len(unvalid_keys) > 0:
msg = f"Unvalid {unvalid_keys} keys. The issue YAML file accept only {valid_keys}"
invalid_keys = [k for k in keys if k not in valid_keys]
if len(invalid_keys) > 0:
msg = f"Invalid {invalid_keys} keys. The issue YAML file accept only {valid_keys}"
log_error(logger, msg=msg, verbose=False)
raise ValueError(msg)

Expand Down
10 changes: 5 additions & 5 deletions disdrodb/l0/l0_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ def run_l0a(
df_sanitizer_fun : object, optional
Sanitizer function to format the datafame into DISDRODB L0A standard.
parallel : bool
If True, the files are processed simultanously in multiple processes.
If True, the files are processed simultaneously in multiple processes.
The number of simultaneous processes can be customized using the dask.distributed LocalCluster.
If False, the files are processed sequentially in a single process.
If False, multi-threading is automatically exploited to speed up I/0 tasks.
Expand Down Expand Up @@ -515,7 +515,7 @@ def run_l0b(
Whether to print detailed processing information into terminal.
The default is True.
parallel : bool
If True, the files are processed simultanously in multiple processes.
If True, the files are processed simultaneously in multiple processes.
The number of simultaneous processes can be customized using the dask.distributed LocalCluster.
Ensure that the threads_per_worker (number of thread per process) is set to 1 to avoid HDF errors.
Also ensure to set the HDF5_USE_FILE_LOCKING environment variable to False.
Expand Down Expand Up @@ -662,7 +662,7 @@ def run_l0b_from_nc(
Whether to print detailed processing information into terminal.
The default is False.
parallel : bool
If True, the files are processed simultanously in multiple processes.
If True, the files are processed simultaneously in multiple processes.
The number of simultaneous processes can be customized using the dask.distributed LocalCluster.
If False, the files are processed sequentially in a single process.
If False, multi-threading is automatically exploited to speed up I/0 tasks.
Expand Down Expand Up @@ -895,7 +895,7 @@ def run_disdrodb_l0_station(
Whether to print detailed processing information into terminal.
The default is True.
parallel : bool
If True, the files are processed simultanously in multiple processes.
If True, the files are processed simultaneously in multiple processes.
Each process will use a single thread to avoid issues with the HDF/netCDF library.
By default, the number of process is defined with os.cpu_count().
If False, the files are processed sequentially in a single process.
Expand Down Expand Up @@ -1048,7 +1048,7 @@ def run_disdrodb_l0(
Whether to print detailed processing information into terminal.
The default is True.
parallel : bool
If True, the files are processed simultanously in multiple processes.
If True, the files are processed simultaneously in multiple processes.
Each process will use a single thread to avoid issues with the HDF/netCDF library.
By default, the number of process is defined with os.cpu_count().
If False, the files are processed sequentially in a single process.
Expand Down
6 changes: 3 additions & 3 deletions disdrodb/l0/l0_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def get_reader(reader_data_source: str, reader_name: str) -> object:
# Check data source and reader_name validity
reader_data_source = _check_reader_data_source(reader_data_source)
reader_name = check_reader_exists(reader_data_source=reader_data_source, reader_name=reader_name)
# Retrive reader function
# Retrieve reader function
if reader_name:
full_name = f"disdrodb.l0.readers.{reader_data_source}.{reader_name}.reader"
module_name, unit_name = full_name.rsplit(".", 1)
Expand Down Expand Up @@ -411,7 +411,7 @@ def reader_generic_docstring():
Whether to print detailed processing information into terminal.
The default is True.
parallel : bool
If True, the files are processed simultanously in multiple processes.
If True, the files are processed simultaneously in multiple processes.
The number of simultaneous processes can be customized using the dask.distributed LocalCluster.
If False, the files are processed sequentially in a single process.
If False, multi-threading is automatically exploited to speed up I/0 tasks.
Expand All @@ -435,7 +435,7 @@ def check_available_readers():
reader = get_reader(reader_data_source=reader_data_source, reader_name=reader_name)
check_reader_arguments(reader)
except Exception as e:
raise ValueError(f"Unvalid reader for {reader_data_source}/{reader_name}.py. The error is {e}")
raise ValueError(f"Invalid reader for {reader_data_source}/{reader_name}.py. The error is {e}")
return None


Expand Down
Loading

0 comments on commit d933add

Please sign in to comment.