Add codespell and fix code typos

ltelab · Oct 25, 2023 · d933add · d933add
1 parent 6c6783a
commit d933add
Show file tree

Hide file tree

Showing 47 changed files with 197 additions and 187 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -22,3 +22,10 @@ repos:
     hooks:
     - id: blackdoc
       additional_dependencies: ["black[jupyter]"]
+
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.2.6
+    hooks:
+    - id: codespell
+      types_or: [python, markdown]
+      additional_dependencies: [tomli]
diff --git a/disdrodb/api/checks.py b/disdrodb/api/checks.py
@@ -30,7 +30,7 @@ def check_url(url: str) -> bool:
     Returns
     -------
     bool
-        True if url well formated, False if not well formated.
+        True if url well formatted, False if not well formatted.
     """
     regex = r"^(https?:\/\/)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)$"  # noqa: E501
 

diff --git a/disdrodb/api/info.py b/disdrodb/api/info.py
@@ -64,7 +64,7 @@ def get_key_from_filepaths(fpaths, key):
 
 ####--------------------------------------------------------------------------.
 ####################################
-#### DISDRODB File Informations ####
+#### DISDRODB File Information ####
 ####################################
 
 

diff --git a/disdrodb/api/io.py b/disdrodb/api/io.py
@@ -234,11 +234,11 @@ def check_data_sources(disdrodb_dir, product_level, data_sources):
     dir_path = _get_disdrodb_directory(disdrodb_dir=disdrodb_dir, product_level=product_level)
     # Get data sources directory
     list_dir = os.listdir(dir_path)
-    # Check if there are unvalid data_sources
-    idx_unvalid = np.where(np.isin(data_sources, list_dir, invert=True))[0]
-    if len(idx_unvalid) > 0:
-        unvalid_data_sources = data_sources[idx_unvalid].tolist()
-        raise ValueError(f"These data sources are unvalid: {unvalid_data_sources}.")
+    # Check if there are invalid data_sources
+    idx_invalid = np.where(np.isin(data_sources, list_dir, invert=True))[0]
+    if len(idx_invalid) > 0:
+        invalid_data_sources = data_sources[idx_invalid].tolist()
+        raise ValueError(f"These data sources are invalid: {invalid_data_sources}.")
     # Return data_sources list
     data_sources = data_sources.tolist()
     return data_sources
@@ -265,11 +265,11 @@ def _check_campaign_names(disdrodb_dir, product_level, campaign_names):
     list_campaign_names = [os.path.basename(path) for path in list_campaigns_path]
     # Remove duplicates
     list_campaign_names = np.unique(list_campaign_names)
-    # Check if there are unvalid campaign_names
-    idx_unvalid = np.where(np.isin(campaign_names, list_campaign_names, invert=True))[0]
-    if len(idx_unvalid) > 0:
-        unvalid_campaign_names = campaign_names[idx_unvalid].tolist()
-        raise ValueError(f"These campaign names are unvalid: {unvalid_campaign_names}.")
+    # Check if there are invalid campaign_names
+    idx_invalid = np.where(np.isin(campaign_names, list_campaign_names, invert=True))[0]
+    if len(idx_invalid) > 0:
+        invalid_campaign_names = campaign_names[idx_invalid].tolist()
+        raise ValueError(f"These campaign names are invalid: {invalid_campaign_names}.")
     # Return campaign_names list
     campaign_names = campaign_names.tolist()
     return campaign_names

diff --git a/disdrodb/l0/check_configs.py b/disdrodb/l0/check_configs.py
@@ -339,9 +339,9 @@ def check_raw_array(sensor_name: str) -> None:
     l0b_encodings = read_config_yml(sensor_name, "l0b_encodings.yml")
 
     for key, list_velocity_or_diameter in dict_keys_with_dimension_order.items():
-        expected_lenght = len(list_velocity_or_diameter) + 1
+        expected_length = len(list_velocity_or_diameter) + 1
         current_length = len(l0b_encodings.get(key).get("chunksizes"))
-        if expected_lenght != current_length:
+        if expected_length != current_length:
             raise ValueError(f"Wrong chunksizes for {key} in l0b_encodings.yml for sensor {sensor_name}.")
 
     # Get chunksizes in l0b_encoding.yml and check that if len > 1, has dimension_order key in raw_data_format

diff --git a/disdrodb/l0/check_metadata.py b/disdrodb/l0/check_metadata.py
@@ -71,9 +71,9 @@ def check_metadata_geolocation(metadata) -> None:
             if longitude == -9999 or latitude == -9999:
                 raise ValueError("Missing lat lon coordinates (-9999).")
             elif longitude > 180 or longitude < -180:
-                raise ValueError("Unvalid longitude (outside [-180, 180])")
+                raise ValueError("Invalid longitude (outside [-180, 180])")
             elif latitude > 90 or latitude < -90:
-                raise ValueError("Unvalid latitude (outside [-90, 90])")
+                raise ValueError("Invalid latitude (outside [-90, 90])")
             else:
                 pass
     return None

diff --git a/disdrodb/l0/check_standards.py b/disdrodb/l0/check_standards.py
@@ -169,9 +169,9 @@ def check_l0a_column_names(df: pd.DataFrame, sensor_name: str) -> None:
     df_columns = set(df_columns)
     # --------------------------------------------
     # Check there aren't valid columns
-    unvalid_columns = list(df_columns.difference(valid_columns))
-    if len(unvalid_columns) > 0:
-        msg = f"The following columns do no met the DISDRODB standards: {unvalid_columns}"
+    invalid_columns = list(df_columns.difference(valid_columns))
+    if len(invalid_columns) > 0:
+        msg = f"The following columns do no met the DISDRODB standards: {invalid_columns}"
         logger.error(msg)
         raise ValueError(msg)
     # --------------------------------------------
@@ -194,7 +194,7 @@ def check_l0a_standards(df: pd.DataFrame, sensor_name: str, verbose: bool = True
     sensor_name : str
         Name of the sensor.
     verbose : bool, optional
-        Wheter to verbose the processing.
+        Whether to verbose the processing.
         The default is True.
 
     Raises

diff --git a/disdrodb/l0/configs/Thies_LPM/raw_data_format.yml b/disdrodb/l0/configs/Thies_LPM/raw_data_format.yml
@@ -424,7 +424,7 @@ optical_control_voltage_output:
   n_naturals: 0
   data_range:
   - 0    # 1000
-  - 9999 # 6500 in the doc, 6660 occurence found
+  - 9999 # 6500 in the doc, 6660 occurrence found
   nan_flags: null
 
 sensor_voltage_supply:

diff --git a/disdrodb/l0/io.py b/disdrodb/l0/io.py
@@ -54,19 +54,19 @@ def get_disdrodb_dir(path: str) -> str:
     p = Path(path)
     list_path_elements = [str(part) for part in p.parts]
     # Retrieve where "DISDRODB" directory occurs
-    idx_occurence = np.where(np.isin(list_path_elements, "DISDRODB"))[0]
+    idx_occurrence = np.where(np.isin(list_path_elements, "DISDRODB"))[0]
     # If DISDRODB directory not present, raise error
-    if len(idx_occurence) == 0:
+    if len(idx_occurrence) == 0:
         raise ValueError(f"The DISDRODB directory is not present in {path}")
-    # Find the rightermost occurence
-    right_most_occurence = max(idx_occurence)
+    # Find the rightermost occurrence
+    right_most_occurrence = max(idx_occurrence)
     # Define the disdrodb_dir path
-    disdrodb_dir = os.path.join(*list_path_elements[: right_most_occurence + 1])
+    disdrodb_dir = os.path.join(*list_path_elements[: right_most_occurrence + 1])
     return disdrodb_dir
 
 
 def get_disdrodb_path(path: str) -> str:
-    """Return the path fron the disdrodb_dir directory.
+    """Return the path from the disdrodb_dir directory.
 
     Current assumption: no data_source, campaign_name, station_name or file contain the word DISDRODB!
 
@@ -85,14 +85,14 @@ def get_disdrodb_path(path: str) -> str:
     p = Path(path)
     list_path_elements = [str(part) for part in p.parts]
     # Retrieve where "DISDRODB" directory occurs
-    idx_occurence = np.where(np.isin(list_path_elements, "DISDRODB"))[0]
+    idx_occurrence = np.where(np.isin(list_path_elements, "DISDRODB"))[0]
     # If DISDRODB directory not present, raise error
-    if len(idx_occurence) == 0:
+    if len(idx_occurrence) == 0:
         raise ValueError(f"The DISDRODB directory is not present in {path}")
-    # Find the rightermost occurence
-    right_most_occurence = max(idx_occurence)
+    # Find the rightermost occurrence
+    right_most_occurrence = max(idx_occurrence)
     # Define the disdrodb path
-    disdrodb_fpath = os.path.join(*list_path_elements[right_most_occurence:])
+    disdrodb_fpath = os.path.join(*list_path_elements[right_most_occurrence:])
     return disdrodb_fpath
 
 
@@ -425,7 +425,7 @@ def get_raw_file_list(raw_dir, station_name, glob_patterns, verbose=False, debug
     station_name : str
         ID of the station
     verbose : bool, optional
-        Wheter to verbose the processing.
+        Whether to verbose the processing.
         The default is False.
     debugging_mode : bool, optional
         If True, it select maximum 3 files for debugging purposes.
@@ -523,7 +523,7 @@ def _check_directory_exist(dir_path):
 def _create_directory(path: str, exist_ok=True) -> None:
     """Create a directory."""
     if not isinstance(path, str):
-        raise TypeError("'path' must be a strig.")
+        raise TypeError("'path' must be a string.")
     try:
         os.makedirs(path, exist_ok=exist_ok)
         logger.debug(f"Created directory {path}.")
@@ -768,7 +768,7 @@ def check_raw_dir(raw_dir: str, verbose: bool = False) -> None:
     raw_dir : str
         Input raw directory
     verbose : bool, optional
-        Wheter to verbose the processing.
+        Whether to verbose the processing.
         The default is False.
 
     """

diff --git a/disdrodb/l0/issue.py b/disdrodb/l0/issue.py
@@ -147,7 +147,7 @@ def check_timesteps(timesteps):
     elif is_numpy_array_datetime(timesteps):
         timesteps = _check_timestep_datetime_accuracy(timesteps, unit="s")
     else:
-        raise TypeError("Unvalid timesteps input.")
+        raise TypeError("Invalid timesteps input.")
     return timesteps
 
 
@@ -182,7 +182,7 @@ def check_time_periods(time_periods):
     # Check time period start occur before end
     for time_period in new_time_periods:
         if time_period[0] > time_period[1]:
-            msg = f"The {time_period} time_period is unvalid. Start time occurs after end time."
+            msg = f"The {time_period} time_period is invalid. Start time occurs after end time."
             log_error(logger, msg=msg, verbose=False)
             raise ValueError(msg)
     return new_time_periods
@@ -213,9 +213,9 @@ def check_issue_dict(issue_dict):
     # Check there are only timesteps and time_periods keys
     valid_keys = ["timesteps", "time_periods"]
     keys = list(issue_dict.keys())
-    unvalid_keys = [k for k in keys if k not in valid_keys]
-    if len(unvalid_keys) > 0:
-        msg = f"Unvalid {unvalid_keys} keys. The issue YAML file accept only {valid_keys}"
+    invalid_keys = [k for k in keys if k not in valid_keys]
+    if len(invalid_keys) > 0:
+        msg = f"Invalid {invalid_keys} keys. The issue YAML file accept only {valid_keys}"
         log_error(logger, msg=msg, verbose=False)
         raise ValueError(msg)
 

diff --git a/disdrodb/l0/l0_processing.py b/disdrodb/l0/l0_processing.py
@@ -381,7 +381,7 @@ def run_l0a(
     df_sanitizer_fun : object, optional
         Sanitizer function to format the datafame into DISDRODB L0A standard.
     parallel : bool
-        If True, the files are processed simultanously in multiple processes.
+        If True, the files are processed simultaneously in multiple processes.
         The number of simultaneous processes can be customized using the dask.distributed LocalCluster.
         If False, the files are processed sequentially in a single process.
         If False, multi-threading is automatically exploited to speed up I/0 tasks.
@@ -515,7 +515,7 @@ def run_l0b(
         Whether to print detailed processing information into terminal.
         The default is True.
     parallel : bool
-        If True, the files are processed simultanously in multiple processes.
+        If True, the files are processed simultaneously in multiple processes.
         The number of simultaneous processes can be customized using the dask.distributed LocalCluster.
         Ensure that the threads_per_worker (number of thread per process) is set to 1 to avoid HDF errors.
         Also ensure to set the HDF5_USE_FILE_LOCKING environment variable to False.
@@ -662,7 +662,7 @@ def run_l0b_from_nc(
         Whether to print detailed processing information into terminal.
         The default is False.
     parallel : bool
-        If True, the files are processed simultanously in multiple processes.
+        If True, the files are processed simultaneously in multiple processes.
         The number of simultaneous processes can be customized using the dask.distributed LocalCluster.
         If False, the files are processed sequentially in a single process.
         If False, multi-threading is automatically exploited to speed up I/0 tasks.
@@ -895,7 +895,7 @@ def run_disdrodb_l0_station(
         Whether to print detailed processing information into terminal.
         The default is True.
     parallel : bool
-        If True, the files are processed simultanously in multiple processes.
+        If True, the files are processed simultaneously in multiple processes.
         Each process will use a single thread to avoid issues with the HDF/netCDF library.
         By default, the number of process is defined with os.cpu_count().
         If False, the files are processed sequentially in a single process.
@@ -1048,7 +1048,7 @@ def run_disdrodb_l0(
         Whether to print detailed processing information into terminal.
         The default is True.
     parallel : bool
-        If True, the files are processed simultanously in multiple processes.
+        If True, the files are processed simultaneously in multiple processes.
         Each process will use a single thread to avoid issues with the HDF/netCDF library.
         By default, the number of process is defined with os.cpu_count().
         If False, the files are processed sequentially in a single process.

diff --git a/disdrodb/l0/l0_reader.py b/disdrodb/l0/l0_reader.py
@@ -227,7 +227,7 @@ def get_reader(reader_data_source: str, reader_name: str) -> object:
     # Check data source and reader_name validity
     reader_data_source = _check_reader_data_source(reader_data_source)
     reader_name = check_reader_exists(reader_data_source=reader_data_source, reader_name=reader_name)
-    # Retrive reader function
+    # Retrieve reader function
     if reader_name:
         full_name = f"disdrodb.l0.readers.{reader_data_source}.{reader_name}.reader"
         module_name, unit_name = full_name.rsplit(".", 1)
@@ -411,7 +411,7 @@ def reader_generic_docstring():
         Whether to print detailed processing information into terminal.
         The default is True.
     parallel : bool
-        If True, the files are processed simultanously in multiple processes.
+        If True, the files are processed simultaneously in multiple processes.
         The number of simultaneous processes can be customized using the dask.distributed LocalCluster.
         If False, the files are processed sequentially in a single process.
         If False, multi-threading is automatically exploited to speed up I/0 tasks.
@@ -435,7 +435,7 @@ def check_available_readers():
                 reader = get_reader(reader_data_source=reader_data_source, reader_name=reader_name)
                 check_reader_arguments(reader)
             except Exception as e:
-                raise ValueError(f"Unvalid reader for {reader_data_source}/{reader_name}.py. The error is {e}")
+                raise ValueError(f"Invalid reader for {reader_data_source}/{reader_name}.py. The error is {e}")
     return None
-Original file line number
+Diff line change
@@ Expand Up / @@ -30,7 +30,7 @@ def check_url(url: str) -> bool: @@
         Returns
         -------
         bool
-            True if url well formated, False if not well formated.
+            True if url well formatted, False if not well formatted.
         """
         regex = r"^(https?:\/\/)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)$"  # noqa: E501
@@ Expand Down @@