Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dependency version cleanups etc. for release of version 2.0.1 #560

Merged
merged 4 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ exclude = '''

[tool.poetry]
name = 'climate_indices'
version = '2.0.0'
version = '2.0.1'
description = 'Reference implementations of various climate indices typically used for drought monitoring'
authors = ['James Adams <[email protected]>']
readme = 'README.md'
Expand All @@ -44,19 +44,21 @@ classifiers = [
packages = [{include = 'climate_indices', from = 'src'}]

[tool.poetry.dependencies]
cftime = '>=1.6.2'
dask = '>=2022.2.0'
h5netcdf = '>=1.1.0'
# only Python and scipy are required for the base library code
python = '>=3.8,<3.12'
scipy = '>=1.10.0'
scipy = '1.10.1'
# remaining dependencies are required for the CLI (console) scripts
cftime = '>=1.6.4'
dask = '>=2023.5.0'
h5netcdf = '>=1.1.0'
xarray = '>=2023.1.0'

[tool.poetry.dev-dependencies]
pytest = '*'
pytest = '8.3.3'
toml = '>=0.10.2'

[tool.poetry.group.dev.dependencies]
sphinx-autodoc-typehints = "^1.23.3"
sphinx-autodoc-typehints = '2.0.1'

[tool.poetry.scripts]
process_climate_indices = 'climate_indices.__main__:main'
Expand All @@ -66,4 +68,3 @@ spi = 'climate_indices.__spi__:main'
filterwarnings = [
'ignore::FutureWarning',
]

107 changes: 96 additions & 11 deletions src/climate_indices/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import logging
import multiprocessing
import os
from typing import Any, Dict, List
from typing import Any, Dict, List, Tuple

import numpy as np
import scipy.constants
Expand Down Expand Up @@ -563,7 +563,7 @@ def _drop_data_into_shared_arrays_grid(
var_names: list,
periodicity: compute.Periodicity,
data_start_year: int,
):
) -> Tuple[int, ...]:
output_shape = None

# get the data arrays we'll use later in the index computations
Expand Down Expand Up @@ -626,9 +626,16 @@ def _drop_data_into_shared_arrays_grid(


def _drop_data_into_shared_arrays_divisions(
dataset,
dataset: xr.Dataset,
var_names: List[str],
):
) -> Tuple[int, ...]:
"""
Drop data into shared arrays for use in the index computations.

:param dataset:
:param var_names:
:return:
"""
output_shape = None

# get the data arrays we'll use later in the index computations
Expand Down Expand Up @@ -874,7 +881,7 @@ def _compute_write_index(keyword_arguments):
# TODO once we support daily Palmers then we'll need to convert values
# from a 366-day calendar back into a normal/Gregorian calendar

# get the computedPDSI data as an array of float32 values
# get the computed PDSI data as an array of float32 values
array = _global_shared_arrays[_KEY_RESULT_PDSI][_KEY_ARRAY]
shape = _global_shared_arrays[_KEY_RESULT_PDSI][_KEY_SHAPE]
pdsi = np.frombuffer(array.get_obj()).reshape(shape).astype(float)
Expand Down Expand Up @@ -1579,6 +1586,42 @@ def main(): # type: () -> None

arguments = parser.parse_args()

process_climate_indices(arguments=arguments)

# report the elapsed time
end_datetime = datetime.now()
_logger.info("End time: %s", end_datetime)
elapsed = end_datetime - start_datetime
_logger.info("Elapsed time: %s", elapsed)

except Exception:
_logger.exception("Failed to complete", exc_info=True)
raise


def process_climate_indices(
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (complexity): Consider refactoring the main function to improve code structure and reduce complexity.

The introduction of process_climate_indices() increases complexity without clear benefits. To improve code structure and readability, consider fully refactoring the processing logic into this function:

  1. Move all climate indices processing into process_climate_indices().
  2. Remove global state dependencies.
  3. Return the processing results instead of relying on side effects.

Here's a simplified example of how to restructure the code:

def main():
    start_datetime = datetime.now()
    _logger.info("Start time:    %s", start_datetime)

    try:
        arguments = parse_arguments()
        results = process_climate_indices(arguments)
        write_results(results, arguments)
    except Exception:
        _logger.exception("Failed to complete", exc_info=True)
        raise
    finally:
        end_datetime = datetime.now()
        _logger.info("End time:      %s", end_datetime)
        _logger.info("Elapsed time:  %s", end_datetime - start_datetime)

def process_climate_indices(args: argparse.Namespace) -> Dict[str, Any]:
    input_type = _validate_args(args)
    _setup_multiprocessing(args)

    data = _load_data(args)
    indices = _compute_indices(data, args)

    return indices

def _setup_multiprocessing(args: argparse.Namespace) -> None:
    global _NUMBER_OF_WORKER_PROCESSES
    # ... (existing multiprocessing setup logic)

def _load_data(args: argparse.Namespace) -> Dict[str, np.ndarray]:
    # ... (existing data loading logic)

def _compute_indices(data: Dict[str, np.ndarray], args: argparse.Namespace) -> Dict[str, np.ndarray]:
    # ... (existing index computation logic)

def write_results(results: Dict[str, np.ndarray], args: argparse.Namespace) -> None:
    # ... (existing result writing logic)

This structure separates concerns more clearly, reduces global state usage, and makes the flow of data and control more explicit. It's easier to understand, test, and maintain.

arguments: argparse.Namespace,
) -> None:
"""
Process climate indices based on the provided arguments.

:param arguments: A dictionary or argparse.Namespace containing the arguments
:return: The results of the climate indices processing
"""
# Extract arguments
# index = args['index']
# periodicity = args['periodicity']
# scales = args['scales']
# calibration_start_year = args['calibration_start_year']
# calibration_end_year = args['calibration_end_year']
# netcdf_precip = args['netcdf_precip']
# var_name_precip = args['var_name_precip']
Comment on lines +1611 to +1618
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: Consider removing or updating commented-out code

Commented-out code can become outdated and confusing. If this is meant as a guide for future implementation, consider adding a TODO comment explaining its purpose. Otherwise, it might be better to remove it.

Suggested change
# Extract arguments
# index = args['index']
# periodicity = args['periodicity']
# scales = args['scales']
# calibration_start_year = args['calibration_start_year']
# calibration_end_year = args['calibration_end_year']
# netcdf_precip = args['netcdf_precip']
# var_name_precip = args['var_name_precip']
# TODO: Implement argument extraction
# Expected arguments: index, periodicity, scales, calibration_start_year,
# calibration_end_year, netcdf_precip, var_name_precip

# output_file_base = args['output_file_base']

# Add your existing processing logic here
# ...

try:
# validate the arguments and determine the input type
input_type = _validate_args(arguments)

Expand Down Expand Up @@ -1740,16 +1783,12 @@ def main(): # type: () -> None
if netcdf_awc != arguments.netcdf_awc:
os.remove(netcdf_awc)

# report on the elapsed time
end_datetime = datetime.now()
_logger.info("End time: %s", end_datetime)
elapsed = end_datetime - start_datetime
_logger.info("Elapsed time: %s", elapsed)

except Exception:
_logger.exception("Failed to complete", exc_info=True)
raise

return None


if __name__ == "__main__":
# (please do not remove -- useful for running as a script when debugging)
Expand Down Expand Up @@ -1777,4 +1816,50 @@ def main(): # type: () -> None
# --calibration_start_year 1951 --calibration_end_year 2010
# --multiprocessing all --periodicity monthly

"""
SYNOPSIS:

The main program in the provided code excerpt is designed to process climate indices on NetCDF
gridded datasets in parallel, leveraging Python's multiprocessing module. The process can be
broken down into several key steps, which together implement a quasi "map-reduce" model for parallel
computation. Here's an overview of how it works:

Step 1: Initialization and Argument Parsing
The program starts by parsing command-line arguments that specify the details of the computation,
such as the index to compute (e.g., SPI, SPEI), the input NetCDF files, and various parameters
relevant to the computation. It then validates these arguments to ensure they form a coherent set
of instructions for the computation.

Step 2: Setting Up Multiprocessing
Based on the command-line arguments, the program determines the number of worker processes to use.
It can use all available CPUs minus one, a single process, or all CPUs, depending on the user's choice.
Global shared arrays are prepared for use by worker processes. These arrays hold the input data
(e.g., precipitation, temperature) and the results of the computations.

Step 3: Data Preparation
The input data from NetCDF files is loaded into shared memory arrays. This step involves reading the data,
possibly converting units, and then distributing it across shared arrays that worker processes can access.
The program checks the dimensions and shapes of the input data to ensure they match expected patterns,
adjusting as necessary to fit the computation requirements.

Step 4: Parallel Computation ("Map")
The program splits the computation into chunks that can be processed independently.
This is the "map" part of the "map-reduce" model.
Worker processes are spawned, each taking a portion of the data from the shared arrays
to compute the climate index (e.g., SPI, SPEI) over that subset.
Each worker applies the computation function along the specified axis of the data chunk it has been given.
This could involve complex calculations like the Thornthwaite method for PET or statistical analysis for SPI.

Step 5: Aggregating Results ("Reduce")
Once all worker processes complete their computations, the results are aggregated back into a single dataset. Summary
This is the "reduce" part of the "map-reduce" model.
The program collects the computed indices from the shared arrays and assembles them into a coherent
output dataset, maintaining the correct dimensions and metadata.

Step 6: Writing Output
The final step involves writing the computed indices back to NetCDF files.
Each index computed (e.g., SPI, SPEI, PET) is saved in its own file.
The program ensures that the output files contain all necessary metadata and are structured
correctly to be used in further analysis or visualization.
"""
main()
14 changes: 7 additions & 7 deletions src/climate_indices/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,23 +118,23 @@ def sum_to_scale(
) -> np.ndarray:
"""
Compute a sliding sums array using 1-D convolution. The initial
(scale - 1) elements of the result array will be padded with np.NaN values.
Missing values are not ignored, i.e. if a np.NaN
(scale - 1) elements of the result array will be padded with np.nan values.
Missing values are not ignored, i.e. if a np.nan
(missing) value is part of the group of values to be summed then the sum
will be np.NaN
will be np.nan

For example if the first array is [3, 4, 6, 2, 1, 3, 5, 8, 5] and
the number of values to sum is 3 then the resulting array
will be [np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18].
will be [np.nan, np.nan, 13, 12, 9, 6, 9, 16, 18].

More generally:

Y = f(X, n)

Y[i] == np.NaN, where i < n
Y[i] == np.nan, where i < n
Y[i] == sum(X[i - n + 1:i + 1]), where i >= n - 1 and X[i - n + 1:i + 1]
contains no NaN values
Y[i] == np.NaN, where i >= n - 1 and X[i - n + 1:i + 1] contains
Y[i] == np.nan, where i >= n - 1 and X[i - n + 1:i + 1] contains
one or more NaN values

:param values: the array of values over which we'll compute sliding sums
Expand All @@ -158,7 +158,7 @@ def sum_to_scale(

# BELOW FOR dask/xarray DataArray integration
# # pad the values array with (scale - 1) NaNs
# values = pad(values, pad_width=(scale - 1, 0), mode='constant', constant_values=np.NaN)
# values = pad(values, pad_width=(scale - 1, 0), mode='constant', constant_values=np.nan)
#
# start = 1
# end = -(scale - 2)
Expand Down
2 changes: 1 addition & 1 deletion src/climate_indices/indices.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ def percentage_of_normal(

# get an array containing a sliding sum on the specified time step
# scale -- i.e. if the scale is 3 then the first two elements will be
# np.NaN, since we need 3 elements to get a sum, and then from the third
# np.nan, since we need 3 elements to get a sum, and then from the third
# element to the end the values will equal the sum of the corresponding
# time step plus the values of the two previous time steps
scale_sums = compute.sum_to_scale(values, scale)
Expand Down
30 changes: 15 additions & 15 deletions tests/test_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_transform_fitted_gamma(
"""

# confirm that an input array of all NaNs results in the same array returned
all_nans = np.full(precips_mm_monthly.shape, np.NaN)
all_nans = np.full(precips_mm_monthly.shape, np.nan)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Consistent use of np.nan instead of np.NaN

This change improves consistency with NumPy's preferred lowercase 'nan'. Consider updating all occurrences throughout the test suite for uniformity.

Suggested change
all_nans = np.full(precips_mm_monthly.shape, np.nan)
"""
# confirm that an input array of all NaNs results in the same array returned
all_nans = np.full(precips_mm_monthly.shape, np.nan)

computed_values = compute.transform_fitted_gamma(
all_nans,
data_year_start_monthly,
Expand Down Expand Up @@ -172,9 +172,9 @@ def test_gamma_parameters(
"""

# confirm that an input array of all NaNs results in the same array returned
all_nans = np.full(precips_mm_monthly.shape, np.NaN)
nan_alphas = np.full(shape=(12,), fill_value=np.NaN)
nan_betas = np.full(shape=(12,), fill_value=np.NaN)
all_nans = np.full(precips_mm_monthly.shape, np.nan)
nan_alphas = np.full(shape=(12,), fill_value=np.nan)
nan_betas = np.full(shape=(12,), fill_value=np.nan)
alphas, betas = compute.gamma_parameters(
all_nans,
data_year_start_monthly,
Expand Down Expand Up @@ -247,7 +247,7 @@ def test_transform_fitted_pearson(
"""

# confirm that an input array of all NaNs results in the same array returned
all_nans = np.full(precips_mm_monthly.shape, np.NaN)
all_nans = np.full(precips_mm_monthly.shape, np.nan)
computed_values = compute.transform_fitted_pearson(
all_nans,
data_year_start_monthly,
Expand Down Expand Up @@ -280,7 +280,7 @@ def test_transform_fitted_pearson(
)

# confirm that an input array of all NaNs will return the same array
all_nans = np.full(precips_mm_monthly.shape, np.NaN)
all_nans = np.full(precips_mm_monthly.shape, np.nan)
computed_values = compute.transform_fitted_pearson(
all_nans,
data_year_start_monthly,
Expand Down Expand Up @@ -524,44 +524,44 @@ def test_sum_to_scale():
# test an input array with no missing values
values = np.array([3.0, 4, 6, 2, 1, 3, 5, 8, 5])
computed_values = compute.sum_to_scale(values, 3)
expected_values = np.array([np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18])
expected_values = np.array([np.nan, np.nan, 13, 12, 9, 6, 9, 16, 18])
np.testing.assert_allclose(
computed_values,
expected_values,
err_msg=UNEXPECTED_SLIDING_SUMS_MESSAGE,
)
computed_values = compute.sum_to_scale(values, 4)
expected_values = np.array([np.NaN, np.NaN, np.NaN, 15, 13, 12, 11, 17, 21])
expected_values = np.array([np.nan, np.nan, np.nan, 15, 13, 12, 11, 17, 21])
np.testing.assert_allclose(
computed_values,
expected_values,
err_msg=UNEXPECTED_SLIDING_SUMS_MESSAGE,
)

# test an input array with missing values on the end
values = np.array([3, 4, 6, 2, 1, 3, 5, 8, 5, np.NaN, np.NaN, np.NaN])
values = np.array([3, 4, 6, 2, 1, 3, 5, 8, 5, np.nan, np.nan, np.nan])
computed_values = compute.sum_to_scale(values, 3)
expected_values = np.array([np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18, np.NaN, np.NaN, np.NaN])
expected_values = np.array([np.nan, np.nan, 13, 12, 9, 6, 9, 16, 18, np.nan, np.nan, np.nan])
np.testing.assert_allclose(
computed_values,
expected_values,
err_msg="Sliding sums not computed as expected when missing values appended to end of input array",
)

# test an input array with missing values within the array
values = np.array([3, 4, 6, 2, 1, 3, 5, np.NaN, 8, 5, 6])
values = np.array([3, 4, 6, 2, 1, 3, 5, np.nan, 8, 5, 6])
computed_values = compute.sum_to_scale(values, 3)
expected_values = np.array([np.NaN, np.NaN, 13, 12, 9, 6, 9, np.NaN, np.NaN, np.NaN, 19])
expected_values = np.array([np.nan, np.nan, 13, 12, 9, 6, 9, np.nan, np.nan, np.nan, 19])
np.testing.assert_allclose(
computed_values,
expected_values,
err_msg="Sliding sums not computed as expected when missing values appended to end of input array",
)

test_values = np.array([1.0, 5, 7, 2, 3, 4, 9, 6, 3, 8])
sum_by2 = np.array([np.NaN, 6, 12, 9, 5, 7, 13, 15, 9, 11])
sum_by4 = np.array([np.NaN, np.NaN, np.NaN, 15, 17, 16, 18, 22, 22, 26])
sum_by6 = np.array([np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22, 30, 31, 27, 33])
sum_by2 = np.array([np.nan, 6, 12, 9, 5, 7, 13, 15, 9, 11])
sum_by4 = np.array([np.nan, np.nan, np.nan, 15, 17, 16, 18, 22, 22, 26])
sum_by6 = np.array([np.nan, np.nan, np.nan, np.nan, np.nan, 22, 30, 31, 27, 33])
np.testing.assert_equal(
compute.sum_to_scale(test_values, 2),
sum_by2,
Expand Down
10 changes: 5 additions & 5 deletions tests/test_eto.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,19 +73,19 @@ def test_eto_thornthwaite(temps_celsius, latitude_degrees, data_year_start_month
pytest.raises(TypeError, eto.eto_thornthwaite, temps_celsius, None, data_year_start_monthly)

# make sure that an invalid latitude value (NaN) raises an error
pytest.raises(ValueError, eto.eto_thornthwaite, temps_celsius, np.NaN, data_year_start_monthly)
pytest.raises(ValueError, eto.eto_thornthwaite, temps_celsius, np.nan, data_year_start_monthly)


# ------------------------------------------------------------------------------
def test_sunset_hour_angle():
# make sure that an invalid latitude value raises an error
pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(-100.0), np.deg2rad(0.0))
pytest.raises(ValueError, eto._sunset_hour_angle, np.NaN, np.deg2rad(0.0))
pytest.raises(ValueError, eto._sunset_hour_angle, np.nan, np.deg2rad(0.0))
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Consistent use of np.nan in eto tests

This change improves consistency. Consider adding a test case to ensure the function handles np.nan inputs correctly, if not already present.

    pytest.raises(TypeError, eto.eto_thornthwaite, temps_celsius, None, data_year_start_monthly)

    # make sure that an invalid latitude value (NaN) raises an error
    pytest.raises(ValueError, eto.eto_thornthwaite, temps_celsius, np.nan, data_year_start_monthly)


def test_sunset_hour_angle():
    # make sure that an invalid latitude value raises an error
    pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(-100.0), np.deg2rad(0.0))
    pytest.raises(ValueError, eto._sunset_hour_angle, np.nan, np.deg2rad(0.0))
    pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(0.0), np.nan)


# make sure that an invalid solar declination angle raises an error
pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(0.0), np.deg2rad(-75.0))
pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(0.0), np.deg2rad(85.0))
pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(0.0), np.NaN)
pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(0.0), np.nan)

expected_value = math.pi / 2
computed_value = eto._sunset_hour_angle(0.0, np.deg2rad(0.0))
Expand Down Expand Up @@ -113,7 +113,7 @@ def test_solar_declination():
pytest.raises(ValueError, eto._solar_declination, -1)
pytest.raises(ValueError, eto._solar_declination, 367)
pytest.raises(ValueError, eto._solar_declination, 5000)
pytest.raises(ValueError, eto._solar_declination, np.NaN)
pytest.raises(ValueError, eto._solar_declination, np.nan)

expected_value = -0.313551072399921
computed_value = eto._solar_declination(30)
Expand All @@ -130,7 +130,7 @@ def test_daylight_hours():
# make sure invalid arguments raise an error
pytest.raises(ValueError, eto._daylight_hours, math.pi + 1)
pytest.raises(ValueError, eto._daylight_hours, -1.0)
pytest.raises(ValueError, eto._daylight_hours, np.NaN)
pytest.raises(ValueError, eto._daylight_hours, np.nan)

expected_value = 7.999999999999999
computed_value = eto._daylight_hours(math.pi / 3)
Expand Down
Loading
Loading