-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #199 from brandynlucca/test-branch
Merging current refactored EchoPro code for software renaming
- Loading branch information
Showing
28 changed files
with
46,822 additions
and
1,749 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
C:/Users/Brandyn/Documents/GitHub/EchoPro_data/2019_consolidated_files |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
import numpy as np | ||
|
||
def ts_length_regression( x, slope, intercept ): | ||
return slope * np.log10(x) + intercept | ||
|
||
def to_linear( x ): | ||
return 10.0 ** ( x / 10.0 ) | ||
|
||
def to_dB( x ): | ||
return 10 * np.log10(x) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
import numpy as np | ||
import pandas as pd | ||
from ..computation.spatial import correct_transect_intervals | ||
from ..computation.operations import group_merge | ||
|
||
def index_sex_weight_proportions( biology_dict: dict ): | ||
""" | ||
Generate dataframe containing sex-stratified weight proportions | ||
Parameters | ||
---------- | ||
biology_dict: dict | ||
Biology data attribute dictionary | ||
""" | ||
|
||
# Age-stratified weight proportions | ||
age_stratified_proportions = biology_dict[ 'weight' ][ 'proportions' ][ 'age_weight_proportions_df' ] | ||
|
||
# Age-stratified & sex-indexed weight proportions | ||
age_sex_stratified_proportions = biology_dict[ 'weight' ][ 'proportions' ][ 'sex_age_weight_proportions_df' ] | ||
|
||
# Concatenate the two to add a 'total' category | ||
return ( | ||
pd.concat( | ||
[ ( age_sex_stratified_proportions | ||
.rename( columns = { 'weight_sex_stratum_proportion': 'weight_proportion' } ) ) , | ||
( age_stratified_proportions.assign( sex = 'total' ) | ||
.rename( columns = { 'weight_stratum_proportion': 'weight_proportion' } ) ) ] | ||
) | ||
) | ||
|
||
def index_transect_age_sex_proportions( acoustics_dict: dict , | ||
biology_dict: dict , | ||
info_strata: pd.DataFrame ): | ||
""" | ||
Prepares the age- and sex-stratified dataframe for biomass calculation | ||
Parameters | ||
---------- | ||
acoustics_dict: dict | ||
Acoustic data attribute dictionary | ||
biology_dict: dict | ||
Biology data attribute dictionary | ||
infra_strata: pd.DataFrame | ||
Dataframe containing strata definitions | ||
""" | ||
|
||
### Prepare initial dataframes used for calculation population statistics | ||
# Construct georeferenced dataframe containing NASC data | ||
nasc_interval_df = correct_transect_intervals( acoustics_dict[ 'nasc' ][ 'nasc_df' ] ) | ||
|
||
### Call additional dataframes needed to merge with the NASC data and subsequently calculate | ||
### population-level metrics (and later statistics) | ||
# Sex-stratum-indexed proportions and average weight | ||
weight_sex_strata = biology_dict[ 'weight' ][ 'weight_strata_df' ] | ||
|
||
# Stratum-averaged sigma_bs | ||
sigma_bs_strata = acoustics_dict[ 'sigma_bs' ][ 'strata_mean' ] | ||
|
||
# Adult NASC proportions for each stratum (number) | ||
# !!! TODO: Currently only uses 'age_1_excluded' -- this should become an argument that toggles | ||
## This is not a major issue since both 'NASC_*' columns can be pivoted to create a single | ||
## NASC column so the column name does not have to be hard-coded. This could then correspond to | ||
## the configuration settings in some way, or this may be where the argument comes into play where | ||
## the dataframe can be simply filtered based on the input/selection. | ||
# between excluding and including age-1 fish | ||
nasc_number_proportions = ( | ||
biology_dict[ 'weight' ][ 'proportions' ][ 'age_proportions_df' ] | ||
) | ||
|
||
# Adult NASC proportions for each stratum (weight) | ||
nasc_weight_proportions = ( | ||
biology_dict[ 'weight' ][ 'proportions' ][ 'age_weight_proportions_df' ] | ||
) | ||
|
||
### Consolidate dataframes that will be added into a list | ||
dataframes_to_add = [ nasc_interval_df , sigma_bs_strata , weight_sex_strata , nasc_number_proportions , | ||
nasc_weight_proportions ] | ||
|
||
|
||
## Merge the relevant dataframes | ||
return ( | ||
nasc_interval_df | ||
# Merge stratum information ( join = 'outer' since missing values will be filled later on) | ||
.merge( info_strata , on = [ 'stratum_num' , 'haul_num' ] , how = 'outer' ) | ||
# Drop unused hauls | ||
.dropna( subset = 'transect_num' ) | ||
# Fill NaN w/ 0's for 'fraction_hake' | ||
.assign( fraction_hake = lambda x: x[ 'fraction_hake' ].fillna( 0 ) ) | ||
# Group merge | ||
.group_merge( dataframes_to_add = dataframes_to_add , inner_on = 'age' , outer_on = 'stratum_num' ) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,224 @@ | ||
import numpy as np | ||
import pandas as pd | ||
from ..computation.spatial import lag_distance_griddify , local_search_index | ||
from ..computation.variogram_models import variogram | ||
|
||
def compute_kriging_weights( ratio , M2 , K ): | ||
|
||
### Singular value decomposition (SVD) | ||
# U: left singular vectors (directions of maximum variance) | ||
# Sigma: singular values (amount of variance captured by each singular vector, U) | ||
# VH: conjugate transpose of the right singular vectors | ||
U , Sigma , VH = np.linalg.svd( K , full_matrices = True ) | ||
|
||
### Create Sigma mask informed by the ratio-threshold | ||
# The ratio between all singular values and their respective | ||
# maximum is used to apply a mask that informs which values | ||
# are used to tabulate the kriging weights (aka lambda) | ||
Sigma_mask = np.abs( Sigma / Sigma[ 0 ] ) > ratio | ||
|
||
### Inverse masked semivariogram (K) | ||
K_inv = np.matmul( | ||
np.matmul( VH.T[:, Sigma_mask] , np.diag( 1.0 / Sigma[ Sigma_mask ] ) ) , | ||
U[ : , Sigma_mask ].T | ||
) | ||
|
||
### Calculate kriging weights (lambda) | ||
return np.dot( K_inv , M2 ) | ||
|
||
def compute_kriging_matrix( x_coordinates , | ||
y_coordinates , | ||
variogram_parameters ): | ||
|
||
### Calculate local distance matrix of within-range samples | ||
local_distance_matrix = lag_distance_griddify( x_coordinates , y_coordinates ) | ||
|
||
### Calculate the covariance/kriging matrix (without the constant term) | ||
kriging_matrix_initial = variogram( distance_lags = local_distance_matrix , | ||
variogram_parameters = variogram_parameters ) | ||
|
||
### Expand the covariance/kriging matrix with a constant | ||
# In Ordinary Kriging, this should be '1' | ||
# Columns | ||
kriging_matrix = np.concatenate( [ kriging_matrix_initial , | ||
np.ones( ( x_coordinates.size , 1 ) ) ] , | ||
axis = 1 ) | ||
|
||
# Rows | ||
kriging_matrix = np.concatenate( [ kriging_matrix , | ||
np.ones( ( 1 , x_coordinates.size + 1 ) ) ] , | ||
axis = 0 ) | ||
|
||
### Diagonal fill (0.0) | ||
# TODO: Should we put in statements for Objective mapping and Universal Kriging w/ Linear drift? # noqa | ||
# Add column and row of ones for Ordinary Kriging | ||
np.fill_diagonal( kriging_matrix , 0.0 ) | ||
|
||
return kriging_matrix | ||
|
||
def range_index_threshold( local_point_grid , | ||
distance_matrix , | ||
R , | ||
k_min ): | ||
|
||
### Calculate the within-sample grid indices | ||
inside_indices = np.where( distance_matrix[ local_point_grid ] <= R )[ 0 ] | ||
|
||
### Expand search radius if the number of local points are insufficient | ||
if len( inside_indices ) < k_min: | ||
|
||
### Sort the closest within-sample points | ||
inside_indices = np.argsort( distance_matrix[ local_point_grid ] )[ : k_min ] | ||
|
||
### Look beyond the sample to collect indices beyond the range threshold | ||
outside_indices = np.where( distance_matrix[ local_point_grid[ inside_indices ] ] > R )[ 0 ] | ||
|
||
### Calculate the appropraite weights for these values beyond the threshold | ||
# TODO: should we change this to how Chu does it? | ||
# tapered function to handle extrapolation | ||
out_of_sample_weights = np.exp( - np.nanmean( distance_matrix[ local_point_grid[ inside_indices ] ] ) / R ) | ||
|
||
else: | ||
### If the sample size is sufficient | ||
outside_indices = [ ] | ||
out_of_sample_weights = 1.0 | ||
|
||
### Carriage return | ||
return inside_indices , outside_indices , out_of_sample_weights | ||
|
||
|
||
def compute_kriging_statistics( point_values , | ||
lagged_semivariogram , | ||
kriging_weights , | ||
inside_indices , | ||
outside_indices , | ||
out_of_sample_weights ): | ||
|
||
### Remove any extrapolation | ||
if len( outside_indices ) > 0: | ||
point_values[ outside_indices ] = 0.0 | ||
|
||
### Calculate locally weighted kriging mean | ||
local_mean = np.nansum( kriging_weights[: len( inside_indices ) ] * point_values ) * out_of_sample_weights | ||
|
||
### Calculate locally weighted kriging prediction variance | ||
local_prediction_variance = np.nansum( kriging_weights * lagged_semivariogram ) | ||
|
||
### Calculate locally weighted point sample variance | ||
if abs( local_mean ) < np.finfo( float ).eps: | ||
local_sample_variance = np.nan | ||
else: | ||
local_arithmetic_variance = np.nanvar( point_values , ddof = 1 ) # Non-spatial, arithmetic sample variance | ||
local_sample_variance = np.sqrt( local_prediction_variance * local_arithmetic_variance ) / abs( local_mean ) | ||
|
||
### Carriage return | ||
return local_mean , local_prediction_variance , local_sample_variance | ||
|
||
def ordinary_kriging( dataframe , | ||
transformed_mesh , | ||
variogram_parameters , | ||
kriging_parameters , | ||
variable = 'B_a_adult' ): | ||
|
||
### Calculate the kriging distance matrix and corresponding indices | ||
distance_matrix , local_point_grid = local_search_index( transformed_mesh , | ||
dataframe , | ||
kriging_parameters[ 'kmax' ] ) | ||
|
||
### Initialize kriging grid, weights, and results prior to loop | ||
kriging_prediction_variance = np.empty( local_point_grid.shape[ 0 ] ) # Prediction variance | ||
kriging_sample_variance = np.empty( local_point_grid.shape[ 0 ] ) # Sample variance | ||
kriging_mean = np.empty( local_point_grid.shape[ 0 ] ) # Mean | ||
|
||
### Pull target data variable | ||
variable_data = dataframe.loc[ : , variable ].values | ||
|
||
### Iterate through the local point grid to evaluate the kriged/interpolated results | ||
for row in range( local_point_grid.shape[ 0 ] ): | ||
|
||
### Parse the k-closest points based on the defined range-threshold | ||
inside_indices , outside_indices , out_of_sample_weights = ( | ||
range_index_threshold( local_point_grid[ row , : ] , | ||
distance_matrix[ row , : ] , | ||
variogram_parameters[ 'range' ] , | ||
kriging_parameters[ 'kmin' ] ) | ||
) | ||
|
||
### Index the within-sample distance indices | ||
distance_within_indices = local_point_grid[ row , : ][ inside_indices ] | ||
|
||
### Calculate the theoretical (semi)variogram at defined lag distances | ||
modeled_semivariogram = variogram( distance_lags = distance_matrix[ row , distance_within_indices ] , | ||
variogram_parameters = variogram_parameters ) | ||
|
||
### For Ordinary Kriging, we shift the lags by 1 to include a constant term (1.0) | ||
# TODO: Should we put in statements for Objective mapping and Universal Kriging w/ Linear drift? # noqa | ||
lagged_semivariogram = np.concatenate( [ modeled_semivariogram , [ 1.0 ] ] ) | ||
|
||
### Calculate the covariance/kriging matrix | ||
kriging_matrix = compute_kriging_matrix( dataframe.x_transformed[ distance_within_indices ].values , | ||
dataframe.y_transformed[ distance_within_indices ].values , | ||
variogram_parameters = variogram_parameters ) | ||
|
||
### Use singular value decomposition (SVD) to solve for the | ||
### kriging weights (lambda) | ||
kriging_weights = compute_kriging_weights( kriging_parameters[ 'anisotropy' ] , lagged_semivariogram , kriging_matrix ) | ||
|
||
### Compute Kriging value and variance | ||
# Index the appropriate data values | ||
point_values = variable_data[ distance_within_indices ] | ||
|
||
### Calculate the kriged mean, predication variance, and sample variance | ||
kriged_stats = compute_kriging_statistics( point_values , | ||
lagged_semivariogram , | ||
kriging_weights , | ||
inside_indices , | ||
outside_indices , | ||
out_of_sample_weights ) | ||
|
||
### Fill in the iterable values from the output (tuple) of | ||
### `compute_kriging_statistics()` | ||
kriging_mean[ row ] = kriged_stats[ 0 ] | ||
kriging_prediction_variance[ row ] = kriged_stats[ 1 ] | ||
kriging_sample_variance[ row ] = kriged_stats[ 2 ] | ||
|
||
### Remove nonsense values (NaN, negative) | ||
kriging_mean = np.where( ( kriging_mean < 0 ) | np.isnan( kriging_mean ) , 0.0 , kriging_mean ) | ||
|
||
### Carriage return | ||
return kriging_mean , kriging_prediction_variance , kriging_sample_variance | ||
|
||
def kriging_interpolation( dataframe , | ||
transformed_mesh , | ||
dataframe_mesh , | ||
dataframe_geostrata , | ||
variogram_parameters , | ||
kriging_parameters , | ||
variable = 'B_a_adult' ): | ||
|
||
### Discretize latitudinal bins | ||
latitude_bins = np.concatenate( [ [ -90.0 ] , dataframe_geostrata.northlimit_latitude , [ 90.0 ] ] ) | ||
|
||
### Discretize mesh data into the same strata | ||
dataframe_mesh[ 'stratum_num' ] = pd.cut( dataframe_mesh.centroid_latitude , | ||
latitude_bins , | ||
labels = list( dataframe_geostrata.stratum_num ) + [ 1 ] , | ||
ordered = False ) | ||
|
||
### Run kriging | ||
kriged_results = ordinary_kriging( dataframe , transformed_mesh , variogram_parameters , kriging_parameters ) | ||
|
||
### Append results to the mesh dataframe | ||
dataframe_mesh[ 'B_a_adult_mean' ] = kriged_results[ 0 ] | ||
dataframe_mesh[ 'B_a_adult_prediction_variance' ] = kriged_results[ 1 ] | ||
dataframe_mesh[ 'B_a_adult_sample_variance' ] = kriged_results[ 2 ] | ||
|
||
### Calculate cell area | ||
kriged_result_df = ( | ||
dataframe_mesh | ||
.assign( cell_area_nmi2 = lambda x: kriging_parameters[ 'A0' ] * x.fraction_cell_in_polygon , | ||
B_adult_kriged = lambda x: x.B_a_adult_mean * x.cell_area_nmi2 ) | ||
) | ||
|
||
### Carriage return | ||
return kriged_result_df |
Oops, something went wrong.