From 9a9466cc057f7251d641e8236cc8375567a6a109 Mon Sep 17 00:00:00 2001 From: Egil Date: Wed, 25 Nov 2020 18:51:01 +0100 Subject: [PATCH 1/5] cKDTree based optimization for the euclidean case --- skgstat/Kriging.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/skgstat/Kriging.py b/skgstat/Kriging.py index 8efa25e..d831509 100644 --- a/skgstat/Kriging.py +++ b/skgstat/Kriging.py @@ -279,7 +279,13 @@ def transform(self, *x): self.perf_dist, self.perf_mat, self.perf_solv = [], [], [] self.transform_coordinates = np.column_stack(x) - self.transform_dists = scipy.spatial.distance.cdist(self.transform_coordinates, self.coords, metric=self.dist_metric) + if self.dist_metric == "euclidean": + tt = scipy.spatial.cKDTree(self.transform_coordinates) + ct = scipy.spatial.cKDTree(self.coords) + + self.transform_dists = tt.sparse_distance_matrix(ct, self.range).todok() + else: + self.transform_dists = scipy.spatial.distance.cdist(self.transform_coordinates, self.coords, metric=self.dist_metric) # DEV: this is dirty, not sure how to do it better at the moment self.sigma = np.empty(len(x[0])) @@ -389,16 +395,23 @@ def _krige(self, idx): dists = self.transform_dists[idx,:] # find all points within the search distance - idx = np.where(dists <= self.range)[0] + if isinstance(dists, scipy.sparse.spmatrix): + idx = np.array([k[1] for k in dists.keys()]) + else: + idx = np.where(dists <= self.range)[0] # raise an error if not enough points are found if idx.size < self._minp: raise LessPointsError if idx.size > self._maxp: - sorted_idx = np.argsort(dists) - idx = sorted_idx[np.isin(sorted_idx, idx)][:self._maxp] - + if isinstance(dists, scipy.sparse.spmatrix): + selected_dists = dists[0, idx].toarray()[0,:] + else: + selected_dists = dists[idx] + sorted_idx = np.argsort(selected_dists, kind="stable") + idx = idx[sorted_idx][:self._maxp] + # finally find the points and values in_range = self.coords[idx] values = self.values[idx] From 082b9304257df043f014516ebd84ff1475539484 Mon Sep 17 00:00:00 2001 From: Egil Date: Thu, 3 Dec 2020 09:27:10 +0100 Subject: [PATCH 2/5] Optimized dist_mat calculation --- skgstat/Kriging.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/skgstat/Kriging.py b/skgstat/Kriging.py index d831509..745c46a 100644 --- a/skgstat/Kriging.py +++ b/skgstat/Kriging.py @@ -30,6 +30,11 @@ class IllMatrixError(RuntimeWarning): def inv_solve(a, b): return inv(a).dot(b) +def sparse_dok_get(m, fill_value=np.NaN): + mm = np.full(m.shape, fill_value) + for (x, y), value in m.items(): + mm[x,y] = value + return mm class OrdinaryKriging: def __init__( @@ -132,6 +137,13 @@ def __init__( self.no_points_error = 0 self.ill_matrix = 0 + if self.dist_metric == "euclidean": + self.coords_tree = scipy.spatial.cKDTree(self.coords) + self.coords_dists = self.coords_tree.sparse_distance_matrix(self.coords_tree, self.range).todok() + else: + self.coords_dists = scipy.spatial.distance.squareform( + scipy.spatial.distance.pdist(self.coords, metric=self.dist_metric)) + # performance counter if self.perf: self.perf_dist = list() @@ -281,9 +293,7 @@ def transform(self, *x): self.transform_coordinates = np.column_stack(x) if self.dist_metric == "euclidean": tt = scipy.spatial.cKDTree(self.transform_coordinates) - ct = scipy.spatial.cKDTree(self.coords) - - self.transform_dists = tt.sparse_distance_matrix(ct, self.range).todok() + self.transform_dists = tt.sparse_distance_matrix(self.coords_tree, self.range).todok() else: self.transform_dists = scipy.spatial.distance.cdist(self.transform_coordinates, self.coords, metric=self.dist_metric) @@ -411,12 +421,16 @@ def _krige(self, idx): selected_dists = dists[idx] sorted_idx = np.argsort(selected_dists, kind="stable") idx = idx[sorted_idx][:self._maxp] - + # finally find the points and values in_range = self.coords[idx] values = self.values[idx] - dist_mat = self.dist(in_range) - + dist_mat = self.coords_dists[idx,:][:,idx] + if isinstance(dist_mat, scipy.sparse.spmatrix): + dist_mat = sparse_dok_get(dist_mat, np.inf) + np.fill_diagonal(dist_mat, 0) # Normally set to inf + dist_mat = scipy.spatial.distance.squareform(dist_mat) + # if performance is tracked, time this step if self.perf: t1 = time.time() From e2c051a2720d27e0837b9845a0e841e58fe98b7e Mon Sep 17 00:00:00 2001 From: Egil Date: Thu, 3 Dec 2020 15:07:56 +0100 Subject: [PATCH 3/5] Bugfix for the explicit zero handling og sparse dok format --- skgstat/Kriging.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/skgstat/Kriging.py b/skgstat/Kriging.py index 745c46a..acef504 100644 --- a/skgstat/Kriging.py +++ b/skgstat/Kriging.py @@ -46,7 +46,8 @@ def __init__( precision=100, solver='inv', n_jobs=1, - perf=False + perf=False, + sparse=True ): """Ordinary Kriging routine @@ -99,6 +100,8 @@ def __init__( if not isinstance(variogram, Variogram): raise TypeError('variogram has to be of type skgstat.Variogram.') + self.sparse = sparse + # general attributes self.V = variogram self._minp = min_points @@ -137,7 +140,7 @@ def __init__( self.no_points_error = 0 self.ill_matrix = 0 - if self.dist_metric == "euclidean": + if self.sparse and self.dist_metric == "euclidean": self.coords_tree = scipy.spatial.cKDTree(self.coords) self.coords_dists = self.coords_tree.sparse_distance_matrix(self.coords_tree, self.range).todok() else: @@ -291,9 +294,9 @@ def transform(self, *x): self.perf_dist, self.perf_mat, self.perf_solv = [], [], [] self.transform_coordinates = np.column_stack(x) - if self.dist_metric == "euclidean": + if self.sparse and self.dist_metric == "euclidean": tt = scipy.spatial.cKDTree(self.transform_coordinates) - self.transform_dists = tt.sparse_distance_matrix(self.coords_tree, self.range).todok() + self.transform_dists = tt.sparse_distance_matrix(self.coords_tree, self.range).tolil() else: self.transform_dists = scipy.spatial.distance.cdist(self.transform_coordinates, self.coords, metric=self.dist_metric) @@ -406,7 +409,7 @@ def _krige(self, idx): # find all points within the search distance if isinstance(dists, scipy.sparse.spmatrix): - idx = np.array([k[1] for k in dists.keys()]) + idx = np.array([k[1] for k in dists.todok().keys()]) else: idx = np.where(dists <= self.range)[0] From 1f8a7a74d6173860435616dc243d472fae6c4b30 Mon Sep 17 00:00:00 2001 From: Egil Date: Thu, 3 Dec 2020 16:37:14 +0100 Subject: [PATCH 4/5] Use a faster sparse format --- skgstat/Kriging.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/skgstat/Kriging.py b/skgstat/Kriging.py index acef504..1e5b232 100644 --- a/skgstat/Kriging.py +++ b/skgstat/Kriging.py @@ -142,7 +142,7 @@ def __init__( if self.sparse and self.dist_metric == "euclidean": self.coords_tree = scipy.spatial.cKDTree(self.coords) - self.coords_dists = self.coords_tree.sparse_distance_matrix(self.coords_tree, self.range).todok() + self.coords_dists = self.coords_tree.sparse_distance_matrix(self.coords_tree, self.range, output_type="coo_matrix").tocsr() else: self.coords_dists = scipy.spatial.distance.squareform( scipy.spatial.distance.pdist(self.coords, metric=self.dist_metric)) @@ -296,7 +296,7 @@ def transform(self, *x): self.transform_coordinates = np.column_stack(x) if self.sparse and self.dist_metric == "euclidean": tt = scipy.spatial.cKDTree(self.transform_coordinates) - self.transform_dists = tt.sparse_distance_matrix(self.coords_tree, self.range).tolil() + self.transform_dists = tt.sparse_distance_matrix(self.coords_tree, self.range, output_type="coo_matrix").tocsr() else: self.transform_dists = scipy.spatial.distance.cdist(self.transform_coordinates, self.coords, metric=self.dist_metric) @@ -405,7 +405,10 @@ def _krige(self, idx): t0 = time.time() p = self.transform_coordinates[idx,:] - dists = self.transform_dists[idx,:] + if isinstance(self.transform_dists, scipy.sparse.spmatrix): + dists = self.transform_dists.getrow(idx) + else: + dists = self.transform_dists[idx,:] # find all points within the search distance if isinstance(dists, scipy.sparse.spmatrix): @@ -429,8 +432,8 @@ def _krige(self, idx): in_range = self.coords[idx] values = self.values[idx] dist_mat = self.coords_dists[idx,:][:,idx] - if isinstance(dist_mat, scipy.sparse.spmatrix): - dist_mat = sparse_dok_get(dist_mat, np.inf) + if isinstance(self.coords_dists, scipy.sparse.spmatrix): + dist_mat = sparse_dok_get(dist_mat.todok(), np.inf) np.fill_diagonal(dist_mat, 0) # Normally set to inf dist_mat = scipy.spatial.distance.squareform(dist_mat) From 4d37646ff696d97a0b065ede1bb9f6b7c0a28d0c Mon Sep 17 00:00:00 2001 From: Egil Date: Wed, 16 Dec 2020 23:40:24 +0100 Subject: [PATCH 5/5] Changed the default for sparse --- skgstat/Kriging.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skgstat/Kriging.py b/skgstat/Kriging.py index 1e5b232..6009def 100644 --- a/skgstat/Kriging.py +++ b/skgstat/Kriging.py @@ -30,7 +30,7 @@ class IllMatrixError(RuntimeWarning): def inv_solve(a, b): return inv(a).dot(b) -def sparse_dok_get(m, fill_value=np.NaN): +def _sparse_dok_get(m, fill_value=np.NaN): mm = np.full(m.shape, fill_value) for (x, y), value in m.items(): mm[x,y] = value @@ -47,7 +47,7 @@ def __init__( solver='inv', n_jobs=1, perf=False, - sparse=True + sparse=False ): """Ordinary Kriging routine @@ -433,7 +433,7 @@ def _krige(self, idx): values = self.values[idx] dist_mat = self.coords_dists[idx,:][:,idx] if isinstance(self.coords_dists, scipy.sparse.spmatrix): - dist_mat = sparse_dok_get(dist_mat.todok(), np.inf) + dist_mat = _sparse_dok_get(dist_mat.todok(), np.inf) np.fill_diagonal(dist_mat, 0) # Normally set to inf dist_mat = scipy.spatial.distance.squareform(dist_mat)