Skip to content

Commit

Permalink
Merge pull request #51 from bsmurphy/refactor-statistics
Browse files Browse the repository at this point in the history
Refactor statistics calculations
  • Loading branch information
bsmurphy authored Mar 9, 2017
2 parents 8e0e37d + e8ffdc2 commit 22c8a69
Show file tree
Hide file tree
Showing 7 changed files with 283 additions and 209 deletions.
266 changes: 143 additions & 123 deletions pykrige/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,15 @@
variogram_function, weight):
Returns variogram model parameters that minimize the RMSE between the
specified variogram function and the actual calculated variogram points.
krige(x, y, z, coords, variogram_function, variogram_model_parameters):
_krige(X, y, coords, variogram_function, variogram_model_parameters,
coordinates_type):
Function that solves the ordinary kriging system for a single specified
point. Returns Z value and sigma squared for the specified coordinates.
krige_3d(x, y, z, vals, coords, variogram_function,
variogram_model_parameters):
Function that solves the ordinary kriging system for a single specified
point. Returns the interpolated value and sigma squared for the
specified coordinates.
find_statistics(x, y, z, variogram_funtion, variogram_model_parameters):
Used in statistics calculation.
_find_statistics(X, y, variogram_funtion, variogram_model_parameters,
coordinates_type):
Returns the delta, sigma, and epsilon values for the variogram fit.
These arrays are used for statistics calculations.
calcQ1(epsilon):
Returns the Q1 statistic for the variogram fit (see Kitanidis).
calcQ2(epsilon):
Expand All @@ -63,9 +62,11 @@
"""

import numpy as np
from scipy.spatial.distance import pdist
from scipy.spatial.distance import pdist, squareform, cdist
from scipy.optimize import least_squares

eps = 1.e-10 # Cutoff for comparison to zero


def great_circle_distance(lon1, lat1, lon2, lat2):
"""
Expand Down Expand Up @@ -607,127 +608,146 @@ def _calculate_variogram_model(lags, semivariance, variogram_model,
return res.x


def krige(x, y, z, coords, variogram_function, variogram_model_parameters, coordinates_type):
"""Sets up and solves the kriging matrix for the given coordinate pair.
This function is now only used for the statistics calculations."""

zero_index = None
zero_value = False

x1, x2 = np.meshgrid(x, x, sparse=True)
y1, y2 = np.meshgrid(y, y, sparse=True)

if coordinates_type == 'euclidean':
d = np.sqrt((x1 - x2)**2 + (y1 - y2)**2)
bd = np.sqrt((x - coords[0])**2 + (y - coords[1])**2)
elif coordinates_type == 'geographic':
d = great_circle_distance(x1, y1, x2, y2)
bd = great_circle_distance(x, y, coords[0]*np.ones(x.shape),
coords[1]*np.ones(y.shape))
if np.any(np.absolute(bd) <= 1e-10):
zero_value = True
zero_index = np.where(bd <= 1e-10)[0][0]

n = x.shape[0]
a = np.zeros((n+1, n+1))
a[:n, :n] = - variogram_function(variogram_model_parameters, d)
np.fill_diagonal(a, 0.0)
a[n, :] = 1.0
a[:, n] = 1.0
a[n, n] = 0.0

b = np.zeros((n+1, 1))
b[:n, 0] = - variogram_function(variogram_model_parameters, bd)
if zero_value:
b[zero_index, 0] = 0.0
b[n, 0] = 1.0

x_ = np.linalg.solve(a, b)
zinterp = np.sum(x_[:n, 0] * z)
sigmasq = np.sum(x_[:, 0] * -b[:, 0])

return zinterp, sigmasq


def krige_3d(x, y, z, vals, coords, variogram_function, variogram_model_parameters):
"""Sets up and solves the kriging matrix for the given coordinate pair.
This function is now only used for the statistics calculations."""

zero_index = None
zero_value = False

x1, x2 = np.meshgrid(x, x, sparse=True)
y1, y2 = np.meshgrid(y, y, sparse=True)
z1, z2 = np.meshgrid(z, z, sparse=True)
d = np.sqrt((x1 - x2)**2 + (y1 - y2)**2 + (z1 - z2)**2)
bd = np.sqrt((x - coords[0])**2 + (y - coords[1])**2 + (z - coords[2])**2)
if np.any(np.absolute(bd) <= 1e-10):
zero_value = True
zero_index = np.where(bd <= 1e-10)[0][0]

n = x.shape[0]
a = np.zeros((n+1, n+1))
a[:n, :n] = - variogram_function(variogram_model_parameters, d)
np.fill_diagonal(a, 0.0)
a[n, :] = 1.0
a[:, n] = 1.0
a[n, n] = 0.0

b = np.zeros((n+1, 1))
b[:n, 0] = - variogram_function(variogram_model_parameters, bd)
if zero_value:
b[zero_index, 0] = 0.0
b[n, 0] = 1.0

x_ = np.linalg.solve(a, b)
zinterp = np.sum(x_[:n, 0] * vals)
sigmasq = np.sum(x_[:, 0] * -b[:, 0])

return zinterp, sigmasq


def find_statistics(x, y, z, variogram_function, variogram_model_parameters, coordinates_type):
"""Calculates variogram fit statistics."""

delta = np.zeros(z.shape)
sigma = np.zeros(z.shape)

for n in range(z.shape[0]):
if n == 0:
delta[n] = 0.0
sigma[n] = 0.0
else:
z_, ss_ = krige(x[:n], y[:n], z[:n], (x[n], y[n]), variogram_function,
variogram_model_parameters, coordinates_type)
d = z[n] - z_
delta[n] = d
sigma[n] = np.sqrt(ss_)

delta = delta[1:]
sigma = sigma[1:]
epsilon = delta/sigma
def _krige(X, y, coords, variogram_function,
variogram_model_parameters, coordinates_type):
"""Sets up and solves the kriging matrix for the given coordinate pair.
This function is only used for the statistics calculations.
return delta, sigma, epsilon
Parameters
----------
X: ndarray
float array [n_samples, n_dim], the input array of coordinates
y: ndarray
float array [n_samples], the input array of measurement values
coords: ndarray
float array [1, n_dim], point at which to evaluate the kriging system
variogram_function: callable
function that will be called to evaluate variogram model
variogram_model_parameters: list
user-specified parameters for variogram model
coordinates_type: str
type of coordinates in X array, can be 'euclidean' for standard
rectangular coordinates or 'geographic' if the coordinates are lat/lon
Returns
-------
zinterp: float
kriging estimate at the specified point
sigmasq: float
mean square error of the kriging estimate
"""

def find_statistics_3d(x, y, z, vals, variogram_function, variogram_model_parameters):
"""Calculates variogram fit statistics for 3D problems."""
zero_index = None
zero_value = False

delta = np.zeros(vals.shape)
sigma = np.zeros(vals.shape)
# calculate distance between points... need a square distance matrix
# of inter-measurement-point distances and a vector of distances between
# measurement points (X) and the kriging point (coords)
if coordinates_type == 'euclidean':
d = squareform(pdist(X, metric='euclidean'))
bd = np.squeeze(cdist(X, coords[None, :], metric='euclidean'))

for n in range(z.shape[0]):
if n == 0:
delta[n] = 0.0
sigma[n] = 0.0
else:
val_, ss_ = krige_3d(x[:n], y[:n], z[:n], vals[:n], (x[n], y[n], z[n]),
variogram_function, variogram_model_parameters)
delta[n] = vals[n] - val_
sigma[n] = np.sqrt(ss_)
# geographic coordinate distances still calculated in the old way...
# assume X[:, 0] ('x') => lon, X[:, 1] ('y') => lat
# also assume problem is 2D; check done earlier in initializing variogram
elif coordinates_type == 'geographic':
x1, x2 = np.meshgrid(X[:, 0], X[:, 0], sparse=True)
y1, y2 = np.meshgrid(X[:, 1], X[:, 1], sparse=True)
d = great_circle_distance(x1, y1, x2, y2)
bd = great_circle_distance(X[:, 0], X[:, 1],
coords[0] * np.ones(X.shape[0]),
coords[1] * np.ones(X.shape[0]))

delta = delta[1:]
sigma = sigma[1:]
# this check is done when initializing variogram, but kept here anyways...
else:
raise ValueError("Specified coordinate type '%s' "
"is not supported." % coordinates_type)

# check if kriging point overlaps with measurement point
if np.any(np.absolute(bd) <= 1e-10):
zero_value = True
zero_index = np.where(bd <= 1e-10)[0][0]

# set up kriging matrix
n = X.shape[0]
a = np.zeros((n+1, n+1))
a[:n, :n] = - variogram_function(variogram_model_parameters, d)
np.fill_diagonal(a, 0.0)
a[n, :] = 1.0
a[:, n] = 1.0
a[n, n] = 0.0

# set up RHS
b = np.zeros((n+1, 1))
b[:n, 0] = - variogram_function(variogram_model_parameters, bd)
if zero_value:
b[zero_index, 0] = 0.0
b[n, 0] = 1.0

# solve
res = np.linalg.solve(a, b)
zinterp = np.sum(res[:n, 0] * y)
sigmasq = np.sum(res[:, 0] * -b[:, 0])

return zinterp, sigmasq


def _find_statistics(X, y, variogram_function,
variogram_model_parameters, coordinates_type):
"""Calculates variogram fit statistics.
Parameters
----------
X: ndarray
float array [n_samples, n_dim], the input array of coordinates
y: ndarray
float array [n_samples], the input array of measurement values
variogram_function: callable
function that will be called to evaluate variogram model
variogram_model_parameters: list
user-specified parameters for variogram model
coordinates_type: str
type of coordinates in X array, can be 'euclidean' for standard
rectangular coordinates or 'geographic' if the coordinates are lat/lon
Returns
-------
delta: ndarray
residuals between observed values and kriged estimates for those values
sigma: ndarray
mean error in kriging estimates
epsilon: ndarray
residuals normalized by their mean error
"""

delta = np.zeros(y.shape)
sigma = np.zeros(y.shape)

for i in range(y.shape[0]):

# skip the first value in the kriging problem
if i == 0:
continue

else:
k, ss = _krige(X[:i, :], y[:i], X[i, :], variogram_function,
variogram_model_parameters, coordinates_type)

# if the estimation error is zero, it's probably because
# the evaluation point X[i, :] is really close to one of the
# kriging system points in X[:i, :]...
# in the case of zero estimation error, the results are not stored
if np.absolute(ss) < eps:
continue

delta[i] = y[i] - k
sigma[i] = np.sqrt(ss)

# only use non-zero entries in these arrays... sigma is used to pull out
# non-zero entries in both cases because it is guaranteed to be positive,
# whereas delta can be either positive or negative
delta = delta[sigma > eps]
sigma = sigma[sigma > eps]
epsilon = delta/sigma

return delta, sigma, epsilon
Expand Down
21 changes: 12 additions & 9 deletions pykrige/ok.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from . import variogram_models
from . import core
from .core import _adjust_for_anisotropy, _initialize_variogram_model, \
_make_variogram_parameter_list
_make_variogram_parameter_list, _find_statistics
import warnings


Expand Down Expand Up @@ -353,10 +353,12 @@ def __init__(self, x, y, z, variogram_model='linear',
if self.verbose:
print("Calculating statistics on variogram model fit...")
if enable_statistics:
self.delta, self.sigma, self.epsilon = core.find_statistics(self.X_ADJUSTED, self.Y_ADJUSTED,
self.Z, self.variogram_function,
self.variogram_model_parameters,
self.coordinates_type)
self.delta, self.sigma, self.epsilon = \
_find_statistics(np.vstack((self.X_ADJUSTED,
self.Y_ADJUSTED)).T,
self.Z, self.variogram_function,
self.variogram_model_parameters,
self.coordinates_type)
self.Q1 = core.calcQ1(self.epsilon)
self.Q2 = core.calcQ2(self.epsilon)
self.cR = core.calc_cR(self.Q2, self.sigma)
Expand Down Expand Up @@ -442,10 +444,11 @@ def update_variogram_model(self, variogram_model, variogram_parameters=None,

if self.verbose:
print("Calculating statistics on variogram model fit...")
self.delta, self.sigma, self.epsilon = core.find_statistics(self.X_ADJUSTED, self.Y_ADJUSTED,
self.Z, self.variogram_function,
self.variogram_model_parameters,
self.coordinates_type)
self.delta, self.sigma, self.epsilon = \
_find_statistics(np.vstack((self.X_ADJUSTED, self.Y_ADJUSTED)).T,
self.Z, self.variogram_function,
self.variogram_model_parameters,
self.coordinates_type)
self.Q1 = core.calcQ1(self.epsilon)
self.Q2 = core.calcQ2(self.epsilon)
self.cR = core.calc_cR(self.Q2, self.sigma)
Expand Down
25 changes: 15 additions & 10 deletions pykrige/ok3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from . import variogram_models
from . import core
from .core import _adjust_for_anisotropy, _initialize_variogram_model, \
_make_variogram_parameter_list
_make_variogram_parameter_list, _find_statistics
import warnings


Expand Down Expand Up @@ -351,18 +351,21 @@ def __init__(self, x, y, z, val, variogram_model='linear',
else:
print("Using '%s' Variogram Model" % self.variogram_model)
print("Partial Sill:", self.variogram_model_parameters[0])
print("Full Sill:", self.variogram_model_parameters[0] + self.variogram_model_parameters[2])
print("Full Sill:", self.variogram_model_parameters[0] +
self.variogram_model_parameters[2])
print("Range:", self.variogram_model_parameters[1])
print("Nugget:", self.variogram_model_parameters[2], '\n')
if self.enable_plotting:
self.display_variogram_model()

if self.verbose:
print("Calculating statistics on variogram model fit...")
self.delta, self.sigma, self.epsilon = core.find_statistics_3d(self.X_ADJUSTED, self.Y_ADJUSTED,
self.Z_ADJUSTED, self.VALUES,
self.variogram_function,
self.variogram_model_parameters)
self.delta, self.sigma, self.epsilon = \
_find_statistics(np.vstack((self.X_ADJUSTED,
self.Y_ADJUSTED,
self.Z_ADJUSTED)).T,
self.VALUES, self.variogram_function,
self.variogram_model_parameters, 'euclidean')
self.Q1 = core.calcQ1(self.epsilon)
self.Q2 = core.calcQ2(self.epsilon)
self.cR = core.calc_cR(self.Q2, self.sigma)
Expand Down Expand Up @@ -441,10 +444,12 @@ def update_variogram_model(self, variogram_model, variogram_parameters=None,

if self.verbose:
print("Calculating statistics on variogram model fit...")
self.delta, self.sigma, self.epsilon = core.find_statistics_3d(self.X_ADJUSTED, self.Y_ADJUSTED,
self.Z_ADJUSTED, self.VALUES,
self.variogram_function,
self.variogram_model_parameters)
self.delta, self.sigma, self.epsilon = \
_find_statistics(np.vstack((self.X_ADJUSTED,
self.Y_ADJUSTED,
self.Z_ADJUSTED)).T,
self.VALUES, self.variogram_function,
self.variogram_model_parameters, 'euclidean')
self.Q1 = core.calcQ1(self.epsilon)
self.Q2 = core.calcQ2(self.epsilon)
self.cR = core.calc_cR(self.Q2, self.sigma)
Expand Down
Loading

0 comments on commit 22c8a69

Please sign in to comment.