Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Metric space #68

Merged
merged 38 commits into from
Apr 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
7c13abf
Separate class to store variogram results in
Nov 25, 2020
93d01ca
Merge branch 'distance-cacl-speedup2'
Nov 25, 2020
0794933
Merge branch 'master' of github.com:mmaelicke/scikit-gstat
Nov 25, 2020
9a9466c
cKDTree based optimization for the euclidean case
Nov 25, 2020
082b930
Optimized dist_mat calculation
Dec 3, 2020
e2c051a
Bugfix for the explicit zero handling og sparse dok format
Dec 3, 2020
1f8a7a7
Use a faster sparse format
Dec 3, 2020
3cbf2ff
Merge branch 'master' of github.com:mmaelicke/scikit-gstat into ckdtr…
Dec 15, 2020
55a96bc
Extracted all space metrics handling to a separate module
Dec 17, 2020
c247282
Removed old code
Dec 17, 2020
dc5366a
Updated default value for spare to match what was discussed in https:…
Dec 17, 2020
07f3eb0
Bugfix
Dec 17, 2020
98109eb
Support constructing Kriging() from the output of Variogram.describe(…
Dec 17, 2020
b81ff2c
Let the user specify a MetricSpace as coordinates
Dec 17, 2020
10ad212
Let the user specify a MetricSpace as coordinates for transform
Dec 17, 2020
cf0751b
Added some aserts
Dec 17, 2020
38bdbfa
Merge branch 'master' of https://github.com/mmaelicke/scikit-gstat
Apr 8, 2021
762465b
Merge branch 'master' into metric-space
Apr 8, 2021
9e26cdd
Bugfix
Apr 8, 2021
959293c
Bugfix
Apr 8, 2021
faf78bd
Optimize Variogram for situations where range << bbox(data)
Apr 8, 2021
4303b1f
Bugfix for rename
Apr 8, 2021
fc91fda
Handle harmonized models too
Apr 8, 2021
4931d57
Bugfixes for harmonized model
Apr 8, 2021
009a506
Some bugfixes to plotting
Apr 9, 2021
5fc4dfe
Fixed some deprecation warnings
Apr 9, 2021
b52dbb2
Some fixes for directional variograms
Apr 9, 2021
5288a2e
Bugfixes
Apr 9, 2021
1f57236
test bugfix
Apr 9, 2021
c5f93e4
Bugfix for normalized vs non-normalized parameters
Apr 9, 2021
e6c75a7
Bugfix for SpaceTimeVariogram
Apr 12, 2021
deebce6
Merge branch 'master' of github.com:mmaelicke/scikit-gstat into metri…
Apr 12, 2021
2892452
Big bugfix for sparse variograms
Apr 12, 2021
1b30abd
Unit tests for variograms with maxlag
Apr 12, 2021
9e067b8
Bugfix
Apr 12, 2021
e2d0a27
Merge branch 'master' of github.com:mmaelicke/scikit-gstat into metri…
Apr 12, 2021
aaefd81
Bugfix
Apr 12, 2021
544157e
Bugfix
Apr 12, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 21 additions & 11 deletions skgstat/DirectionalVariogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from .Variogram import Variogram
from skgstat import plotting
from .MetricSpace import MetricSpace, MetricSpacePair


class DirectionalVariogram(Variogram):
Expand Down Expand Up @@ -227,8 +228,14 @@ def __init__(self,

self._direction_mask_cache = None

if not isinstance(coordinates, MetricSpace):
coordinates = np.asarray(coordinates)
coordinates = MetricSpace(coordinates.copy(), dist_func, maxlag if maxlag and not isinstance(maxlag, str) and maxlag >= 1 else None)
else:
assert self.dist_func == coordinates.dist_metric, "Distance metric of variogram differs from distance metric of coordinates"

# Set coordinates
self._X = np.asarray(coordinates)
self._X = coordinates

# pairwise difference
self._diff = None
Expand Down Expand Up @@ -310,7 +317,6 @@ def __init__(self,
self.fit(force=True)

def preprocessing(self, force=False):
self._calc_distances(force=force)
self._calc_direction_mask_data(force)
self._calc_diff(force=force)
self._calc_groups(force=force)
Expand Down Expand Up @@ -344,24 +350,28 @@ def _calc_direction_mask_data(self, force=False):
`azimuth <skgstat.DirectionalVariogram.azimuth>`_

"""

# FIXME: This should be optimized for the sparse case (range << bbox(coordinates)),
# i.e. use the MetricSpace in self._X

# check if already calculated
if self._angles is not None and not force:
return

# if self._X is of just one dimension, concat zeros.
if self._X.ndim == 1:
_x = np.vstack(zip(self._X, np.zeros(len(self._X))))
elif self._X.ndim == 2:
_x = self._X
# if self.coordinates is of just one dimension, concat zeros.
if self.coordinates.ndim == 1:
_x = np.vstack(zip(self.coordinates, np.zeros(len(self.coordinates))))
elif self.coordinates.ndim == 2:
_x = self.coordinates
else:
raise NotImplementedError('N-dimensional coordinates cannot be handled')

# for angles, we need Euklidean distance,
# no matter which distance function is used
if self._dist_func_name == "euclidean":
self._euclidean_dist = self._dist
else:
self._euclidean_dist = pdist(_x, "euclidean")
#if self._dist_func_name == "euclidean":
# self._euclidean_dist = scipy.spatial.distance.squareform(self.distance_matrix)
#else:
self._euclidean_dist = pdist(_x, "euclidean")

# Calculate the angles
# (a - b).[1,0] = ||a - b|| * ||[1,0]|| * cos(v)
Expand Down
116 changes: 61 additions & 55 deletions skgstat/Kriging.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import scipy.spatial.distance

from .Variogram import Variogram

from .MetricSpace import MetricSpace, MetricSpacePair

class LessPointsError(RuntimeError):
pass
Expand All @@ -30,7 +30,6 @@ class IllMatrixError(RuntimeWarning):
def inv_solve(a, b):
return inv(a).dot(b)


class OrdinaryKriging:
def __init__(
self,
Expand All @@ -41,7 +40,11 @@ def __init__(
precision=100,
solver='inv',
n_jobs=1,
perf=False
perf=False,
sparse=False,

coordinates=None,
values=None
):
"""Ordinary Kriging routine

Expand Down Expand Up @@ -88,14 +91,25 @@ def __init__(
If True, the different parts of the algorithm will record their
processing time. This is meant to be used for optimization and
will be removed in a future version. Do not rely on this argument.
sparse : bool

coordinates: numpy.ndarray, MetricSpace
values: numpy.ndarray

"""
# store arguments to the instance
if not isinstance(variogram, Variogram):
raise TypeError('variogram has to be of type skgstat.Variogram.')

if isinstance(variogram, Variogram):
if coordinates is None: coordinates = variogram.coordinates
if values is None: values = variogram.values
variogram_descr = variogram.describe()
if variogram_descr["model"] == "harmonize":
variogram_descr["model"] = variogram._build_harmonized_model()
variogram = variogram_descr

self.sparse = sparse

# general attributes
self.V = variogram
self._minp = min_points
self._maxp = max_points
self.min_points = min_points
Expand All @@ -105,14 +119,21 @@ def __init__(
self.n_jobs = n_jobs
self.perf = perf

params = self.V.describe()
self.range = params['effective_range']
self.nugget = params['nugget']
self.sill = params['sill']

self.range = variogram['effective_range']
self.nugget = variogram['nugget']
self.sill = variogram['sill']
self.dist_metric = variogram["dist_func"]
# coordinates and semivariance function
self.coords, self.values = self._get_coordinates_and_values()
self.gamma_model = self.V.fitted_model
if not isinstance(coordinates, MetricSpace):
coordinates, values = self._remove_duplicated_coordinates(coordinates, values)
coordinates = MetricSpace(coordinates.copy(), self.dist_metric, self.range if self.sparse else None)
else:
assert self.dist_metric == coordinates.dist_metric, "Distance metric of variogram differs from distance metric of coordinates"
assert coordinates.max_dist is None or coordinates.max_dist == self.range, "Sparse coordinates must have max_dist == variogram.effective_range"
self.values = values.copy()
self.coords = coordinates
self.gamma_model = Variogram.fitted_model_function(**variogram)
self.z = None

# calculation mode; self.range has to be initialized
Expand All @@ -132,40 +153,28 @@ def __init__(
self.singular_error = 0
self.no_points_error = 0
self.ill_matrix = 0

# performance counter
if self.perf:
self.perf_dist = list()
self.perf_mat = list()
self.perf_solv = list()

@property
def dist(self):
return self.V._dist_func_wrapper

@property
def dist_metric(self):
return self.V._dist_func_name

def _get_coordinates_and_values(self):
def dist(self, x):
return Variogram.wrapped_distance_function(self.dist_metric, x)

@classmethod
def _remove_duplicated_coordinates(cls, coords, values):
"""Extract the coordinates and values

The coordinates and values array is extracted from the Variogram
instance. Additionally, the coordinates array is checked for
duplicates and only the first instance of a duplicate is used.
Duplicated coordinates would result in duplicated rows in the
variogram matrix and make it singular.

Returns
-------
coords : numpy.array
copy of Variogram.coordines without duplicates
values : numpy.array
copy of Variogram.values without duplicates
The coordinates array is checked for duplicates and only the
first instance of a duplicate is used. Duplicated coordinates
would result in duplicated rows in the variogram matrix and
make it singular.

"""
c = self.V.coordinates.copy()
v = self.V.values.copy()
c = coords
v = values

_, idx = np.unique(c, axis=0, return_index=True)

Expand Down Expand Up @@ -260,7 +269,7 @@ def transform(self, *x):

Parameters
----------
x : numpy.array
x : numpy.array, MetricSpace
One 1D array for each coordinate dimension. Typically two or
three array, x, y, (z) are passed for 2D and 3D Kriging

Expand All @@ -279,20 +288,23 @@ def transform(self, *x):
if self.perf:
self.perf_dist, self.perf_mat, self.perf_solv = [], [], []

self.transform_coordinates = np.column_stack(x)
self.transform_dists = scipy.spatial.distance.cdist(self.transform_coordinates, self.coords, metric=self.dist_metric)
if len(x) != 1 or not isinstance(x[0], MetricSpace):
self.transform_coords = MetricSpace(np.column_stack(x).copy(), self.dist_metric, self.range if self.sparse else None)
else:
self.transform_coords = x[0]
self.transform_coords_pair = MetricSpacePair(self.transform_coords, self.coords)

# DEV: this is dirty, not sure how to do it better at the moment
self.sigma = np.empty(len(x[0]))
self.__sigma_index = 0
# if multi-core, than here
if self.n_jobs is None or self.n_jobs == 1:
z = np.fromiter(map(self._estimator, range(len(self.transform_coordinates))), dtype=float)
z = np.fromiter(map(self._estimator, range(len(self.transform_coords))), dtype=float)
else:
def f(idxs):
return self._estimator(idxs)
with Pool(self.n_jobs) as p:
z = p.starmap(f, range(len(self.transform_coordinates)))
z = p.starmap(f, range(len(self.transform_coords)))

# print warnings
if self.singular_error > 0:
Expand Down Expand Up @@ -389,25 +401,19 @@ def _krige(self, idx):
if self.perf:
t0 = time.time()

p = self.transform_coordinates[idx,:]
dists = self.transform_dists[idx,:]

# find all points within the search distance
idx = np.where(dists <= self.range)[0]
p = self.transform_coords.coords[idx,:]

idx = self.transform_coords_pair.find_closest(idx, self.range, self._maxp)

# raise an error if not enough points are found
if idx.size < self._minp:
raise LessPointsError

if idx.size > self._maxp:
sorted_idx = np.argsort(dists)
idx = sorted_idx[np.isin(sorted_idx, idx)][:self._maxp]


# finally find the points and values
in_range = self.coords[idx]
in_range = self.coords.coords[idx]
values = self.values[idx]
dist_mat = self.dist(in_range)

dist_mat = self.coords.diagonal(idx)
# if performance is tracked, time this step
if self.perf:
t1 = time.time()
Expand Down
Loading