From 3be499924e5265c9f24de97b4fdb299423aaec13 Mon Sep 17 00:00:00 2001 From: mralbu Date: Sat, 12 Sep 2020 19:31:04 -0300 Subject: [PATCH] Add ClassificationKriging --- README.rst | 4 +- examples/10_classification_kriging2d.py | 4 +- pykrige/ck.py | 290 ++++++++++++ pykrige/compat.py | 270 +++++++++++ pykrige/rk.py | 567 +----------------------- tests/test_api.py | 4 +- tests/test_classification_krige.py | 16 +- 7 files changed, 577 insertions(+), 578 deletions(-) create mode 100644 pykrige/ck.py diff --git a/README.rst b/README.rst index 22ce9b4..6da0083 100644 --- a/README.rst +++ b/README.rst @@ -111,11 +111,11 @@ A demonstration of the regression kriging is provided in the `corresponding example `_. Classification Kriging ------------------- +---------------------- `Simplifical Indicator kriging `_ can be performed with `pykrige.rk.ClassificationKriging `_. -This class takes as parameters a scikit-learn classification model, and details of either either +This class takes as parameters a scikit-learn classification model, and details of either the ``OrdinaryKriging`` or the ``UniversalKriging`` class, and performs a correction steps on the ML classification prediction. A demonstration of the classification kriging is provided in the diff --git a/examples/10_classification_kriging2d.py b/examples/10_classification_kriging2d.py index 97ea9f8..c21036f 100644 --- a/examples/10_classification_kriging2d.py +++ b/examples/10_classification_kriging2d.py @@ -13,7 +13,7 @@ from sklearn.datasets import fetch_california_housing from sklearn.preprocessing import KBinsDiscretizer -from pykrige.rk import ClassificationKriging +from pykrige.ck import ClassificationKriging from pykrige.compat import train_test_split svc_model = SVC(C=0.1, gamma="auto", probability=True) @@ -41,7 +41,7 @@ for m in models: print("=" * 40) - print("regression model:", m.__class__.__name__) + print("classification model:", m.__class__.__name__) m_ck = ClassificationKriging(classification_model=m, n_closest_points=10) m_ck.fit(p_train, x_train, target_train) print("Classification Score: ", diff --git a/pykrige/ck.py b/pykrige/ck.py new file mode 100644 index 0000000..dcac598 --- /dev/null +++ b/pykrige/ck.py @@ -0,0 +1,290 @@ +# coding: utf-8 +from pykrige.compat import Krige, validate_sklearn, check_sklearn_model + +validate_sklearn() + +from sklearn.metrics import r2_score, accuracy_score +from sklearn.svm import SVC +from sklearn.preprocessing import OneHotEncoder +import numpy as np +from scipy.linalg import helmert + + +class ClassificationKriging: + """ + An implementation of Simplicial Indicator Kriging applied to classification ilr transformed residuals. + + Parameters + ---------- + classification_model: machine learning model instance from sklearn + method: str, optional + type of kriging to be performed + variogram_model: str, optional + variogram model to be used during Kriging + n_closest_points: int + number of closest points to be used during Ordinary Kriging + nlags: int + see OK/UK class description + weight: bool + see OK/UK class description + verbose: bool + see OK/UK class description + exact_values : bool + see OK/UK class description + variogram_parameters : list or dict + see OK/UK class description + variogram_function : callable + see OK/UK class description + anisotropy_scaling : tuple + single value for 2D (UK/OK) and two values in 3D (UK3D/OK3D) + anisotropy_angle : tuple + single value for 2D (UK/OK) and three values in 3D (UK3D/OK3D) + enable_statistics : bool + see OK class description + coordinates_type : str + see OK/UK class description + drift_terms : list of strings + see UK/UK3D class description + point_drift : array_like + see UK class description + ext_drift_grid : tuple + Holding the three values external_drift, external_drift_x and + external_drift_z for the UK class + functional_drift : list of callable + see UK/UK3D class description + """ + + def __init__( + self, + classification_model=SVC(), + method="ordinary", + variogram_model="linear", + n_closest_points=10, + nlags=6, + weight=False, + verbose=False, + exact_values=True, + pseudo_inv=False, + pseudo_inv_type="pinv", + variogram_parameters=None, + variogram_function=None, + anisotropy_scaling=(1.0, 1.0), + anisotropy_angle=(0.0, 0.0, 0.0), + enable_statistics=False, + coordinates_type="euclidean", + drift_terms=None, + point_drift=None, + ext_drift_grid=(None, None, None), + functional_drift=None, + ): + check_sklearn_model(classification_model, task="classification") + self.classification_model = classification_model + self.n_closest_points = n_closest_points + self._kriging_kwargs = dict( + method=method, + variogram_model=variogram_model, + nlags=nlags, + weight=weight, + n_closest_points=n_closest_points, + verbose=verbose, + exact_values=exact_values, + pseudo_inv=pseudo_inv, + pseudo_inv_type=pseudo_inv_type, + variogram_parameters=variogram_parameters, + variogram_function=variogram_function, + anisotropy_scaling=anisotropy_scaling, + anisotropy_angle=anisotropy_angle, + enable_statistics=enable_statistics, + coordinates_type=coordinates_type, + drift_terms=drift_terms, + point_drift=point_drift, + ext_drift_grid=ext_drift_grid, + functional_drift=functional_drift, + ) + + def fit(self, p, x, y): + """ + Fit the classification method and also krige the residual. + + Parameters + ---------- + p: ndarray + (Ns, d) array of predictor variables (Ns samples, d dimensions) + for classification + x: ndarray + ndarray of (x, y) points. Needs to be a (Ns, 2) array + corresponding to the lon/lat, for example 2d classification kriging. + array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging + y: ndarray + array of targets (Ns, ) + """ + self.classification_model.fit(p, y.ravel()) + print("Finished learning classification model") + self.classes_ = self.classification_model.classes_ + + self.krige = [] + for i in range(len(self.classes_) - 1): + self.krige.append(Krige(**self._kriging_kwargs)) + + ml_pred = self.classification_model.predict_proba(p) + ml_pred_ilr = ilr_transformation(ml_pred) + + self.onehotencode = OneHotEncoder(categories=[self.classes_]) + y_ohe = np.array(self.onehotencode.fit_transform(y).todense()) + y_ohe_ilr = ilr_transformation(y_ohe) + + for i in range(len(self.classes_) - 1): + self.krige[i].fit(x=x, y=y_ohe_ilr[:, i] - ml_pred_ilr[:, i]) + + print("Finished kriging residuals") + + def predict(self, p, x, **kwargs): + """ + Predict. + + Parameters + ---------- + p: ndarray + (Ns, d) array of predictor variables (Ns samples, d dimensions) + for classification + x: ndarray + ndarray of (x, y) points. Needs to be a (Ns, 2) array + corresponding to the lon/lat, for example. + array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging + + Returns + ------- + pred: ndarray + The expected value of ys for the query inputs, of shape (Ns,). + + """ + + ml_pred = self.classification_model.predict_proba(p) + ml_pred_ilr = ilr_transformation(ml_pred) + + pred_proba_ilr = self.krige_residual(x, **kwargs) + ml_pred_ilr + pred_proba = inverse_ilr_transformation(pred_proba_ilr) + + return np.argmax(pred_proba, axis=1) + + def krige_residual(self, x, **kwargs): + """ + Calculate the residuals. + + Parameters + ---------- + x: ndarray + ndarray of (x, y) points. Needs to be a (Ns, 2) array + corresponding to the lon/lat, for example. + + Returns + ------- + residual: ndarray + kriged residual values + """ + + krig_pred = [ + self.krige[i].predict(x=x, **kwargs) for i in range(len(self.classes_) - 1) + ] + + return np.vstack(krig_pred).T + + def score(self, p, x, y, sample_weight=None, **kwargs): + """ + Overloading default classification score method. + + Parameters + ---------- + p: ndarray + (Ns, d) array of predictor variables (Ns samples, d dimensions) + for classification + x: ndarray + ndarray of (x, y) points. Needs to be a (Ns, 2) array + corresponding to the lon/lat, for example. + array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging + y: ndarray + array of targets (Ns, ) + """ + return accuracy_score( + y_pred=self.predict(p, x, **kwargs), y_true=y, sample_weight=sample_weight + ) + + +def closure(data, k=1.0): + """Apply closure to data, sample-wise. + Adapted from https://github.com/ofgulban/compoda. + + Parameters + ---------- + data : 2d numpy array, shape [n_samples, n_measurements] + Data to be closed to a certain constant. Do not forget to deal with + zeros in the data before this operation. + k : float, positive + Sum of the measurements will be equal to this number. + + Returns + ------- + data : 2d numpy array, shape [n_samples, n_measurements] + Closed data. + + Reference + --------- + [1] Pawlowsky-Glahn, V., Egozcue, J. J., & Tolosana-Delgado, R. + (2015). Modelling and Analysis of Compositional Data, pg. 9. + Chichester, UK: John Wiley & Sons, Ltd. + DOI: 10.1002/9781119003144 + """ + + return k * data / np.sum(data, axis=1)[:, np.newaxis] + + +def ilr_transformation(data): + """Isometric logratio transformation (not vectorized). + Adapted from https://github.com/ofgulban/compoda. + + Parameters + ---------- + data : 2d numpy array, shape [n_samples, n_coordinates] + Barycentric coordinates (closed) in simplex space. + + Returns + ------- + out : 2d numpy array, shape [n_samples, n_coordinates-1] + Coordinates in real space. + + Reference + --------- + [1] Pawlowsky-Glahn, V., Egozcue, J. J., & Tolosana-Delgado, R. + (2015). Modelling and Analysis of Compositional Data, pg. 37. + Chichester, UK: John Wiley & Sons, Ltd. + DOI: 10.1002/9781119003144 + """ + data = data + np.finfo(float).eps + + return np.einsum("ij,jk->ik", np.log(data), -helmert(data.shape[1]).T) + + +def inverse_ilr_transformation(data): + """Inverse isometric logratio transformation (not vectorized). + Adapted from https://github.com/ofgulban/compoda. + + Parameters + ---------- + data : 2d numpy array, shape [n_samples, n_coordinates] + Isometric log-ratio transformed coordinates in real space. + + Returns + ------- + out : 2d numpy array, shape [n_samples, n_coordinates+1] + Barycentric coordinates (closed) in simplex space. + + Reference + --------- + [1] Pawlowsky-Glahn, V., Egozcue, J. J., & Tolosana-Delgado, R. + (2015). Modelling and Analysis of Compositional Data, pg. 37. + Chichester, UK: John Wiley & Sons, Ltd. + DOI: 10.1002/9781119003144 + """ + data = data + np.finfo(float).eps + + return closure(np.exp(np.einsum("ij,jk->ik", data, -helmert(data.shape[1] + 1)))) diff --git a/pykrige/compat.py b/pykrige/compat.py index 73434aa..d3847e4 100644 --- a/pykrige/compat.py +++ b/pykrige/compat.py @@ -3,24 +3,294 @@ """For compatibility""" from functools import partial +from pykrige.uk3d import UniversalKriging3D +from pykrige.ok3d import OrdinaryKriging3D +from pykrige.uk import UniversalKriging +from pykrige.ok import OrdinaryKriging # sklearn try: from sklearn.model_selection import GridSearchCV from sklearn.model_selection import train_test_split + from sklearn.base import RegressorMixin, ClassifierMixin, BaseEstimator SKLEARN_INSTALLED = True except ImportError: SKLEARN_INSTALLED = False +krige_methods = { + "ordinary": OrdinaryKriging, + "universal": UniversalKriging, + "ordinary3d": OrdinaryKriging3D, + "universal3d": UniversalKriging3D, +} + +threed_krige = ("ordinary3d", "universal3d") + +krige_methods_kws = { + "ordinary": [ + "anisotropy_scaling", + "anisotropy_angle", + "enable_statistics", + "coordinates_type", + ], + "universal": [ + "anisotropy_scaling", + "anisotropy_angle", + "drift_terms", + "point_drift", + "external_drift", + "external_drift_x", + "external_drift_y", + "functional_drift", + ], + "ordinary3d": [ + "anisotropy_scaling_y", + "anisotropy_scaling_z", + "anisotropy_angle_x", + "anisotropy_angle_y", + "anisotropy_angle_z", + ], + "universal3d": [ + "anisotropy_scaling_y", + "anisotropy_scaling_z", + "anisotropy_angle_x", + "anisotropy_angle_y", + "anisotropy_angle_z", + "drift_terms", + "functional_drift", + ], +} + class SklearnException(Exception): pass +def validate_method(method): + """Validate the kriging method in use.""" + if method not in krige_methods.keys(): + raise ValueError( + "Kriging method must be one of {}".format(krige_methods.keys()) + ) + + def validate_sklearn(): if not SKLEARN_INSTALLED: raise SklearnException( "sklearn needs to be installed in order to use this module" ) + + +class Krige(RegressorMixin, BaseEstimator): + """ + A scikit-learn wrapper class for Ordinary and Universal Kriging. + + This works with both Grid/RandomSearchCv for finding the best + Krige parameters combination for a problem. + + Parameters + ---------- + method: str, optional + type of kriging to be performed + variogram_model: str, optional + variogram model to be used during Kriging + nlags: int + see OK/UK class description + weight: bool + see OK/UK class description + n_closest_points: int + number of closest points to be used during Ordinary Kriging + verbose: bool + see OK/UK class description + exact_values : bool + see OK/UK class description + variogram_parameters : list or dict + see OK/UK class description + variogram_function : callable + see OK/UK class description + anisotropy_scaling : tuple + single value for 2D (UK/OK) and two values in 3D (UK3D/OK3D) + anisotropy_angle : tuple + single value for 2D (UK/OK) and three values in 3D (UK3D/OK3D) + enable_statistics : bool + see OK class description + coordinates_type : str + see OK/UK class description + drift_terms : list of strings + see UK/UK3D class description + point_drift : array_like + see UK class description + ext_drift_grid : tuple + Holding the three values external_drift, external_drift_x and + external_drift_z for the UK class + functional_drift : list of callable + see UK/UK3D class description + """ + + def __init__( + self, + method="ordinary", + variogram_model="linear", + nlags=6, + weight=False, + n_closest_points=10, + verbose=False, + exact_values=True, + pseudo_inv=False, + pseudo_inv_type="pinv", + variogram_parameters=None, + variogram_function=None, + anisotropy_scaling=(1.0, 1.0), + anisotropy_angle=(0.0, 0.0, 0.0), + enable_statistics=False, + coordinates_type="euclidean", + drift_terms=None, + point_drift=None, + ext_drift_grid=(None, None, None), + functional_drift=None, + ): + validate_method(method) + self.variogram_model = variogram_model + self.variogram_parameters = variogram_parameters + self.variogram_function = variogram_function + self.nlags = nlags + self.weight = weight + self.verbose = verbose + self.exact_values = exact_values + self.pseudo_inv = pseudo_inv + self.pseudo_inv_type = pseudo_inv_type + self.anisotropy_scaling = anisotropy_scaling + self.anisotropy_angle = anisotropy_angle + self.enable_statistics = enable_statistics + self.coordinates_type = coordinates_type + self.drift_terms = drift_terms + self.point_drift = point_drift + self.ext_drift_grid = ext_drift_grid + self.functional_drift = functional_drift + self.model = None # not trained + self.n_closest_points = n_closest_points + self.method = method + + def fit(self, x, y, *args, **kwargs): + """ + Fit the current model. + + Parameters + ---------- + x: ndarray + array of Points, (x, y) pairs of shape (N, 2) for 2d kriging + array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging + y: ndarray + array of targets (N, ) + """ + val_kw = "val" if self.method in threed_krige else "z" + setup = dict( + variogram_model=self.variogram_model, + variogram_parameters=self.variogram_parameters, + variogram_function=self.variogram_function, + nlags=self.nlags, + weight=self.weight, + verbose=self.verbose, + exact_values=self.exact_values, + pseudo_inv=self.pseudo_inv, + pseudo_inv_type=self.pseudo_inv_type, + ) + add_setup = dict( + anisotropy_scaling=self.anisotropy_scaling[0], + anisotropy_angle=self.anisotropy_angle[0], + enable_statistics=self.enable_statistics, + coordinates_type=self.coordinates_type, + anisotropy_scaling_y=self.anisotropy_scaling[0], + anisotropy_scaling_z=self.anisotropy_scaling[1], + anisotropy_angle_x=self.anisotropy_angle[0], + anisotropy_angle_y=self.anisotropy_angle[1], + anisotropy_angle_z=self.anisotropy_angle[2], + drift_terms=self.drift_terms, + point_drift=self.point_drift, + external_drift=self.ext_drift_grid[0], + external_drift_x=self.ext_drift_grid[1], + external_drift_y=self.ext_drift_grid[2], + functional_drift=self.functional_drift, + ) + for kw in krige_methods_kws[self.method]: + setup[kw] = add_setup[kw] + input_kw = self._dimensionality_check(x) + input_kw.update(setup) + input_kw[val_kw] = y + self.model = krige_methods[self.method](**input_kw) + + def _dimensionality_check(self, x, ext=""): + if self.method in ("ordinary", "universal"): + if x.shape[1] != 2: + raise ValueError("2d krige can use only 2d points") + else: + return {"x" + ext: x[:, 0], "y" + ext: x[:, 1]} + if self.method in ("ordinary3d", "universal3d"): + if x.shape[1] != 3: + raise ValueError("3d krige can use only 3d points") + else: + return { + "x" + ext: x[:, 0], + "y" + ext: x[:, 1], + "z" + ext: x[:, 2], + } + + def predict(self, x, *args, **kwargs): + """ + Predict. + + Parameters + ---------- + x: ndarray + array of Points, (x, y) pairs of shape (N, 2) for 2d kriging + array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging + Returns + ------- + Prediction array + """ + if not self.model: + raise Exception("Not trained. Train first") + points = self._dimensionality_check(x, ext="points") + return self.execute(points, *args, **kwargs)[0] + + def execute(self, points, *args, **kwargs): + # TODO array of Points, (x, y) pairs of shape (N, 2) + """ + Execute. + + Parameters + ---------- + points: dict + + Returns + ------- + Prediction array + Variance array + """ + default_kw = dict(style="points", backend="loop") + default_kw.update(kwargs) + points.update(default_kw) + if isinstance(self.model, (OrdinaryKriging, OrdinaryKriging3D)): + points.update(dict(n_closest_points=self.n_closest_points)) + else: + print("n_closest_points will be ignored for UniversalKriging") + prediction, variance = self.model.execute(**points) + return prediction, variance + + +def check_sklearn_model(model, task="regression"): + """Check the sklearn method in use.""" + if task == "regression": + if not (isinstance(model, BaseEstimator) and isinstance(model, RegressorMixin)): + raise RuntimeError( + "Needs to supply an instance of a scikit-learn regression class." + ) + elif task == "classification": + if not ( + isinstance(model, BaseEstimator) and isinstance(model, ClassifierMixin) + ): + raise RuntimeError( + "Needs to supply an instance of a scikit-learn classification class." + ) diff --git a/pykrige/rk.py b/pykrige/rk.py index 20c9ecf..7e6821b 100644 --- a/pykrige/rk.py +++ b/pykrige/rk.py @@ -1,280 +1,10 @@ # coding: utf-8 -from scipy.linalg import helmert -import numpy as np -from sklearn.metrics import r2_score, accuracy_score -from sklearn.svm import SVR -from sklearn.base import RegressorMixin, ClassifierMixin, BaseEstimator -from sklearn.preprocessing import OneHotEncoder -from pykrige.uk3d import UniversalKriging3D -from pykrige.ok3d import OrdinaryKriging3D -from pykrige.uk import UniversalKriging -from pykrige.ok import OrdinaryKriging -from pykrige.compat import validate_sklearn +from pykrige.compat import Krige, validate_sklearn, check_sklearn_model validate_sklearn() -krige_methods = { - "ordinary": OrdinaryKriging, - "universal": UniversalKriging, - "ordinary3d": OrdinaryKriging3D, - "universal3d": UniversalKriging3D, -} - -threed_krige = ("ordinary3d", "universal3d") - -krige_methods_kws = { - "ordinary": [ - "anisotropy_scaling", - "anisotropy_angle", - "enable_statistics", - "coordinates_type", - ], - "universal": [ - "anisotropy_scaling", - "anisotropy_angle", - "drift_terms", - "point_drift", - "external_drift", - "external_drift_x", - "external_drift_y", - "functional_drift", - ], - "ordinary3d": [ - "anisotropy_scaling_y", - "anisotropy_scaling_z", - "anisotropy_angle_x", - "anisotropy_angle_y", - "anisotropy_angle_z", - ], - "universal3d": [ - "anisotropy_scaling_y", - "anisotropy_scaling_z", - "anisotropy_angle_x", - "anisotropy_angle_y", - "anisotropy_angle_z", - "drift_terms", - "functional_drift", - ], -} - - -def validate_method(method): - """Validate the kriging method in use.""" - if method not in krige_methods.keys(): - raise ValueError( - "Kriging method must be one of {}".format(krige_methods.keys()) - ) - - -class Krige(RegressorMixin, BaseEstimator): - """ - A scikit-learn wrapper class for Ordinary and Universal Kriging. - - This works with both Grid/RandomSearchCv for finding the best - Krige parameters combination for a problem. - - Parameters - ---------- - method: str, optional - type of kriging to be performed - variogram_model: str, optional - variogram model to be used during Kriging - nlags: int - see OK/UK class description - weight: bool - see OK/UK class description - n_closest_points: int - number of closest points to be used during Ordinary Kriging - verbose: bool - see OK/UK class description - exact_values : bool - see OK/UK class description - variogram_parameters : list or dict - see OK/UK class description - variogram_function : callable - see OK/UK class description - anisotropy_scaling : tuple - single value for 2D (UK/OK) and two values in 3D (UK3D/OK3D) - anisotropy_angle : tuple - single value for 2D (UK/OK) and three values in 3D (UK3D/OK3D) - enable_statistics : bool - see OK class description - coordinates_type : str - see OK/UK class description - drift_terms : list of strings - see UK/UK3D class description - point_drift : array_like - see UK class description - ext_drift_grid : tuple - Holding the three values external_drift, external_drift_x and - external_drift_z for the UK class - functional_drift : list of callable - see UK/UK3D class description - """ - - def __init__( - self, - method="ordinary", - variogram_model="linear", - nlags=6, - weight=False, - n_closest_points=10, - verbose=False, - exact_values=True, - pseudo_inv=False, - pseudo_inv_type="pinv", - variogram_parameters=None, - variogram_function=None, - anisotropy_scaling=(1.0, 1.0), - anisotropy_angle=(0.0, 0.0, 0.0), - enable_statistics=False, - coordinates_type="euclidean", - drift_terms=None, - point_drift=None, - ext_drift_grid=(None, None, None), - functional_drift=None, - ): - validate_method(method) - self.variogram_model = variogram_model - self.variogram_parameters = variogram_parameters - self.variogram_function = variogram_function - self.nlags = nlags - self.weight = weight - self.verbose = verbose - self.exact_values = exact_values - self.pseudo_inv = pseudo_inv - self.pseudo_inv_type = pseudo_inv_type - self.anisotropy_scaling = anisotropy_scaling - self.anisotropy_angle = anisotropy_angle - self.enable_statistics = enable_statistics - self.coordinates_type = coordinates_type - self.drift_terms = drift_terms - self.point_drift = point_drift - self.ext_drift_grid = ext_drift_grid - self.functional_drift = functional_drift - self.model = None # not trained - self.n_closest_points = n_closest_points - self.method = method - - def fit(self, x, y, *args, **kwargs): - """ - Fit the current model. - - Parameters - ---------- - x: ndarray - array of Points, (x, y) pairs of shape (N, 2) for 2d kriging - array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging - y: ndarray - array of targets (N, ) - """ - val_kw = "val" if self.method in threed_krige else "z" - setup = dict( - variogram_model=self.variogram_model, - variogram_parameters=self.variogram_parameters, - variogram_function=self.variogram_function, - nlags=self.nlags, - weight=self.weight, - verbose=self.verbose, - exact_values=self.exact_values, - pseudo_inv=self.pseudo_inv, - pseudo_inv_type=self.pseudo_inv_type, - ) - add_setup = dict( - anisotropy_scaling=self.anisotropy_scaling[0], - anisotropy_angle=self.anisotropy_angle[0], - enable_statistics=self.enable_statistics, - coordinates_type=self.coordinates_type, - anisotropy_scaling_y=self.anisotropy_scaling[0], - anisotropy_scaling_z=self.anisotropy_scaling[1], - anisotropy_angle_x=self.anisotropy_angle[0], - anisotropy_angle_y=self.anisotropy_angle[1], - anisotropy_angle_z=self.anisotropy_angle[2], - drift_terms=self.drift_terms, - point_drift=self.point_drift, - external_drift=self.ext_drift_grid[0], - external_drift_x=self.ext_drift_grid[1], - external_drift_y=self.ext_drift_grid[2], - functional_drift=self.functional_drift, - ) - for kw in krige_methods_kws[self.method]: - setup[kw] = add_setup[kw] - input_kw = self._dimensionality_check(x) - input_kw.update(setup) - input_kw[val_kw] = y - self.model = krige_methods[self.method](**input_kw) - - def _dimensionality_check(self, x, ext=""): - if self.method in ("ordinary", "universal"): - if x.shape[1] != 2: - raise ValueError("2d krige can use only 2d points") - else: - return {"x" + ext: x[:, 0], "y" + ext: x[:, 1]} - if self.method in ("ordinary3d", "universal3d"): - if x.shape[1] != 3: - raise ValueError("3d krige can use only 3d points") - else: - return { - "x" + ext: x[:, 0], - "y" + ext: x[:, 1], - "z" + ext: x[:, 2], - } - - def predict(self, x, *args, **kwargs): - """ - Predict. - - Parameters - ---------- - x: ndarray - array of Points, (x, y) pairs of shape (N, 2) for 2d kriging - array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging - Returns - ------- - Prediction array - """ - if not self.model: - raise Exception("Not trained. Train first") - points = self._dimensionality_check(x, ext="points") - return self.execute(points, *args, **kwargs)[0] - - def execute(self, points, *args, **kwargs): - # TODO array of Points, (x, y) pairs of shape (N, 2) - """ - Execute. - - Parameters - ---------- - points: dict - - Returns - ------- - Prediction array - Variance array - """ - default_kw = dict(style="points", backend="loop") - default_kw.update(kwargs) - points.update(default_kw) - if isinstance(self.model, (OrdinaryKriging, OrdinaryKriging3D)): - points.update(dict(n_closest_points=self.n_closest_points)) - else: - print("n_closest_points will be ignored for UniversalKriging") - prediction, variance = self.model.execute(**points) - return prediction, variance - - -def check_sklearn_model(model, task="regression"): - """Check the sklearn method in use.""" - if task == "regression": - if not (isinstance(model, BaseEstimator) and isinstance(model, RegressorMixin)): - raise RuntimeError( - "Needs to supply an instance of a scikit-learn regression class." - ) - elif task == "classification": - if not (isinstance(model, BaseEstimator) and isinstance(model, ClassifierMixin)): - raise RuntimeError( - "Needs to supply an instance of a scikit-learn classification class." - ) +from sklearn.metrics import r2_score, accuracy_score +from sklearn.svm import SVR class RegressionKriging: @@ -453,294 +183,3 @@ def score(self, p, x, y, sample_weight=None, **kwargs): return r2_score( y_pred=self.predict(p, x, **kwargs), y_true=y, sample_weight=sample_weight ) - - -class ClassificationKriging: - """ - An implementation of Simplicial Indicator Kriging. - - Parameters - ---------- - classification_model: machine learning model instance from sklearn - method: str, optional - type of kriging to be performed - variogram_model: str, optional - variogram model to be used during Kriging - n_closest_points: int - number of closest points to be used during Ordinary Kriging - nlags: int - see OK/UK class description - weight: bool - see OK/UK class description - verbose: bool - see OK/UK class description - exact_values : bool - see OK/UK class description - variogram_parameters : list or dict - see OK/UK class description - variogram_function : callable - see OK/UK class description - anisotropy_scaling : tuple - single value for 2D (UK/OK) and two values in 3D (UK3D/OK3D) - anisotropy_angle : tuple - single value for 2D (UK/OK) and three values in 3D (UK3D/OK3D) - enable_statistics : bool - see OK class description - coordinates_type : str - see OK/UK class description - drift_terms : list of strings - see UK/UK3D class description - point_drift : array_like - see UK class description - ext_drift_grid : tuple - Holding the three values external_drift, external_drift_x and - external_drift_z for the UK class - functional_drift : list of callable - see UK/UK3D class description - """ - - def __init__( - self, - classification_model=SVR(), - method="ordinary", - variogram_model="linear", - n_closest_points=10, - nlags=6, - weight=False, - verbose=False, - exact_values=True, - pseudo_inv=False, - pseudo_inv_type="pinv", - variogram_parameters=None, - variogram_function=None, - anisotropy_scaling=(1.0, 1.0), - anisotropy_angle=(0.0, 0.0, 0.0), - enable_statistics=False, - coordinates_type="euclidean", - drift_terms=None, - point_drift=None, - ext_drift_grid=(None, None, None), - functional_drift=None, - ): - check_sklearn_model(classification_model, task="classification") - self.classification_model = classification_model - self.n_closest_points = n_closest_points - self._kriging_kwargs = dict(method=method, - variogram_model=variogram_model, - nlags=nlags, - weight=weight, - n_closest_points=n_closest_points, - verbose=verbose, - exact_values=exact_values, - pseudo_inv=pseudo_inv, - pseudo_inv_type=pseudo_inv_type, - variogram_parameters=variogram_parameters, - variogram_function=variogram_function, - anisotropy_scaling=anisotropy_scaling, - anisotropy_angle=anisotropy_angle, - enable_statistics=enable_statistics, - coordinates_type=coordinates_type, - drift_terms=drift_terms, - point_drift=point_drift, - ext_drift_grid=ext_drift_grid, - functional_drift=functional_drift) - - def fit(self, p, x, y): - """ - Fit the classification method and also krige the residual. - - Parameters - ---------- - p: ndarray - (Ns, d) array of predictor variables (Ns samples, d dimensions) - for classification - x: ndarray - ndarray of (x, y) points. Needs to be a (Ns, 2) array - corresponding to the lon/lat, for example 2d classification kriging. - array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging - y: ndarray - array of targets (Ns, ) - """ - self.classification_model.fit(p, y.ravel()) - print("Finished learning classification model") - self.classes_ = self.classification_model.classes_ - - self.krige = [] - for i in range(len(self.classes_) - 1): - self.krige.append(Krige(**self._kriging_kwargs)) - - ml_pred = self.classification_model.predict_proba(p) - ml_pred_ilr = ilr_transformation(ml_pred) - - self.onehotencode = OneHotEncoder( - categories=[self.classes_] - ) - y_ohe = np.array(self.onehotencode.fit_transform(y).todense()) - y_ohe_ilr = ilr_transformation(y_ohe) - - for i in range(len(self.classes_) - 1): - self.krige[i].fit(x=x, y=y_ohe_ilr[:, i] - ml_pred_ilr[:, i]) - - print("Finished kriging residuals") - - def predict(self, p, x, **kwargs): - """ - Predict. - - Parameters - ---------- - p: ndarray - (Ns, d) array of predictor variables (Ns samples, d dimensions) - for classification - x: ndarray - ndarray of (x, y) points. Needs to be a (Ns, 2) array - corresponding to the lon/lat, for example. - array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging - - Returns - ------- - pred: ndarray - The expected value of ys for the query inputs, of shape (Ns,). - - """ - - ml_pred = self.classification_model.predict_proba(p) - ml_pred_ilr = ilr_transformation(ml_pred) - - pred_proba_ilr = self.krige_residual(x, **kwargs) + ml_pred_ilr - pred_proba = inverse_ilr_transformation(pred_proba_ilr) - - return np.argmax(pred_proba, axis=1) - - def krige_residual(self, x, **kwargs): - """ - Calculate the residuals. - - Parameters - ---------- - x: ndarray - ndarray of (x, y) points. Needs to be a (Ns, 2) array - corresponding to the lon/lat, for example. - - Returns - ------- - residual: ndarray - kriged residual values - """ - - krig_pred = [self.krige[i].predict( - x=x, **kwargs) for i in range(len(self.classes_) - 1)] - - return np.vstack(krig_pred).T - - def score(self, p, x, y, sample_weight=None, **kwargs): - """ - Overloading default classification score method. - - Parameters - ---------- - p: ndarray - (Ns, d) array of predictor variables (Ns samples, d dimensions) - for classification - x: ndarray - ndarray of (x, y) points. Needs to be a (Ns, 2) array - corresponding to the lon/lat, for example. - array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging - y: ndarray - array of targets (Ns, ) - """ - return accuracy_score( - y_pred=self.predict(p, x, **kwargs), y_true=y, sample_weight=sample_weight - ) - - -def closure(data, k=1.0): - """Apply closure to data, sample-wise. - Adapted from https://github.com/ofgulban/compoda. - - Parameters - ---------- - data : 2d numpy array, shape [n_samples, n_measurements] - Data to be closed to a certain constant. Do not forget to deal with - zeros in the data before this operation. - k : float, positive - Sum of the measurements will be equal to this number. - - Returns - ------- - data : 2d numpy array, shape [n_samples, n_measurements] - Closed data. - - Reference - --------- - [1] Pawlowsky-Glahn, V., Egozcue, J. J., & Tolosana-Delgado, R. - (2015). Modelling and Analysis of Compositional Data, pg. 9. - Chichester, UK: John Wiley & Sons, Ltd. - DOI: 10.1002/9781119003144 - """ - data_sum = np.sum(data, axis=1) - out = np.copy(data) - for i in range(data.shape[1]): - out[:, i] = np.divide(out[:, i], data_sum) - out = out * k - return out - - -def ilr_transformation(data): - """Isometric logratio transformation (not vectorized). - Adapted from https://github.com/ofgulban/compoda. - - Parameters - ---------- - data : 2d numpy array, shape [n_samples, n_coordinates] - Barycentric coordinates (closed) in simplex space. - - Returns - ------- - out : 2d numpy array, shape [n_samples, n_coordinates-1] - Coordinates in real space. - - Reference - --------- - [1] Pawlowsky-Glahn, V., Egozcue, J. J., & Tolosana-Delgado, R. - (2015). Modelling and Analysis of Compositional Data, pg. 37. - Chichester, UK: John Wiley & Sons, Ltd. - DOI: 10.1002/9781119003144 - """ - data = data + np.finfo(float).eps - dims = data.shape - out = np.zeros((dims[0], dims[1]-1)) - helmertian = helmert(dims[1]).T - for i in range(data.shape[0]): - out[i, :] = np.dot(np.log(data[i, :]), helmertian) - return out - - -def inverse_ilr_transformation(data): - """Inverse isometric logratio transformation (not vectorized). - Adapted from https://github.com/ofgulban/compoda. - - Parameters - ---------- - data : 2d numpy array, shape [n_samples, n_coordinates] - Isometric log-ratio transformed coordinates in real space. - - Returns - ------- - out : 2d numpy array, shape [n_samples, n_coordinates+1] - Barycentric coordinates (closed) in simplex space. - - Reference - --------- - [1] Pawlowsky-Glahn, V., Egozcue, J. J., & Tolosana-Delgado, R. - (2015). Modelling and Analysis of Compositional Data, pg. 37. - Chichester, UK: John Wiley & Sons, Ltd. - DOI: 10.1002/9781119003144 - """ - data = data + np.finfo(float).eps - dims = data.shape - out = np.zeros((dims[0], dims[1]+1)) - helmertian = helmert(dims[1]+1) - for i in range(data.shape[0]): - out[i, :] = np.exp(np.dot(data[i, :], helmertian)) - return closure(out) diff --git a/tests/test_api.py b/tests/test_api.py index 98d2de0..6c033ca 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -3,8 +3,8 @@ import numpy as np import pytest -from pykrige.rk import Krige -from pykrige.rk import threed_krige +from pykrige.compat import Krige +from pykrige.compat import threed_krige def _method_and_vergiogram(): diff --git a/tests/test_classification_krige.py b/tests/test_classification_krige.py index 240d3e7..8942e12 100644 --- a/tests/test_classification_krige.py +++ b/tests/test_classification_krige.py @@ -3,7 +3,7 @@ import numpy as np -from pykrige.rk import ClassificationKriging +from pykrige.ck import ClassificationKriging try: from sklearn.svm import SVC @@ -55,11 +55,11 @@ def test_classification_krige(): ) for ml_model, krige_method in _methods(): - reg_class_model = ClassificationKriging( + class_model = ClassificationKriging( classification_model=ml_model, method=krige_method, n_closest_points=2 ) - reg_class_model.fit(X_train, lon_lat_train, y_train) - assert reg_class_model.score(X_test, lon_lat_test, y_test) > 0.25 + class_model.fit(X_train, lon_lat_train, y_train) + assert class_model.score(X_test, lon_lat_test, y_test) > 0.25 @pytest.mark.skipif(not SKLEARN_INSTALLED, reason="requires scikit-learn") @@ -91,11 +91,11 @@ def test_krige_classification_housing(): for ml_model, krige_method in _methods(): - reg_class_model = ClassificationKriging( + class_model = ClassificationKriging( classification_model=ml_model, method=krige_method, n_closest_points=2 ) - reg_class_model.fit(p_train, x_train, y_train) + class_model.fit(p_train, x_train, y_train) if krige_method == "ordinary": - assert reg_class_model.score(p_test, x_test, y_test) > 0.5 + assert class_model.score(p_test, x_test, y_test) > 0.5 else: - assert reg_class_model.score(p_test, x_test, y_test) > 0.0 + assert class_model.score(p_test, x_test, y_test) > 0.0