diff --git a/skgstat/Kriging.py b/skgstat/Kriging.py index 8efa25e..6009def 100644 --- a/skgstat/Kriging.py +++ b/skgstat/Kriging.py @@ -30,6 +30,11 @@ class IllMatrixError(RuntimeWarning): def inv_solve(a, b): return inv(a).dot(b) +def _sparse_dok_get(m, fill_value=np.NaN): + mm = np.full(m.shape, fill_value) + for (x, y), value in m.items(): + mm[x,y] = value + return mm class OrdinaryKriging: def __init__( @@ -41,7 +46,8 @@ def __init__( precision=100, solver='inv', n_jobs=1, - perf=False + perf=False, + sparse=False ): """Ordinary Kriging routine @@ -94,6 +100,8 @@ def __init__( if not isinstance(variogram, Variogram): raise TypeError('variogram has to be of type skgstat.Variogram.') + self.sparse = sparse + # general attributes self.V = variogram self._minp = min_points @@ -132,6 +140,13 @@ def __init__( self.no_points_error = 0 self.ill_matrix = 0 + if self.sparse and self.dist_metric == "euclidean": + self.coords_tree = scipy.spatial.cKDTree(self.coords) + self.coords_dists = self.coords_tree.sparse_distance_matrix(self.coords_tree, self.range, output_type="coo_matrix").tocsr() + else: + self.coords_dists = scipy.spatial.distance.squareform( + scipy.spatial.distance.pdist(self.coords, metric=self.dist_metric)) + # performance counter if self.perf: self.perf_dist = list() @@ -279,7 +294,11 @@ def transform(self, *x): self.perf_dist, self.perf_mat, self.perf_solv = [], [], [] self.transform_coordinates = np.column_stack(x) - self.transform_dists = scipy.spatial.distance.cdist(self.transform_coordinates, self.coords, metric=self.dist_metric) + if self.sparse and self.dist_metric == "euclidean": + tt = scipy.spatial.cKDTree(self.transform_coordinates) + self.transform_dists = tt.sparse_distance_matrix(self.coords_tree, self.range, output_type="coo_matrix").tocsr() + else: + self.transform_dists = scipy.spatial.distance.cdist(self.transform_coordinates, self.coords, metric=self.dist_metric) # DEV: this is dirty, not sure how to do it better at the moment self.sigma = np.empty(len(x[0])) @@ -386,24 +405,38 @@ def _krige(self, idx): t0 = time.time() p = self.transform_coordinates[idx,:] - dists = self.transform_dists[idx,:] + if isinstance(self.transform_dists, scipy.sparse.spmatrix): + dists = self.transform_dists.getrow(idx) + else: + dists = self.transform_dists[idx,:] # find all points within the search distance - idx = np.where(dists <= self.range)[0] + if isinstance(dists, scipy.sparse.spmatrix): + idx = np.array([k[1] for k in dists.todok().keys()]) + else: + idx = np.where(dists <= self.range)[0] # raise an error if not enough points are found if idx.size < self._minp: raise LessPointsError if idx.size > self._maxp: - sorted_idx = np.argsort(dists) - idx = sorted_idx[np.isin(sorted_idx, idx)][:self._maxp] - + if isinstance(dists, scipy.sparse.spmatrix): + selected_dists = dists[0, idx].toarray()[0,:] + else: + selected_dists = dists[idx] + sorted_idx = np.argsort(selected_dists, kind="stable") + idx = idx[sorted_idx][:self._maxp] + # finally find the points and values in_range = self.coords[idx] values = self.values[idx] - dist_mat = self.dist(in_range) - + dist_mat = self.coords_dists[idx,:][:,idx] + if isinstance(self.coords_dists, scipy.sparse.spmatrix): + dist_mat = _sparse_dok_get(dist_mat.todok(), np.inf) + np.fill_diagonal(dist_mat, 0) # Normally set to inf + dist_mat = scipy.spatial.distance.squareform(dist_mat) + # if performance is tracked, time this step if self.perf: t1 = time.time()