From 0838a03fc80b158b504c4b8b714bf1b42c5c5f72 Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 11:17:15 +0000 Subject: [PATCH 01/26] ensemble crash on NaNs --- .../plugins/ensemble/classifiers.py | 2 +- src/autoprognosis/plugins/ensemble/combos.py | 26 ++++++++++--------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/autoprognosis/plugins/ensemble/classifiers.py b/src/autoprognosis/plugins/ensemble/classifiers.py index dd7f4730..3ed55573 100644 --- a/src/autoprognosis/plugins/ensemble/classifiers.py +++ b/src/autoprognosis/plugins/ensemble/classifiers.py @@ -308,7 +308,7 @@ def __init__( self, models: List[PipelineMeta], meta_model: PipelineMeta = Pipeline( - ["prediction.classifier.logistic_regression"] + ["imputer.default.ice", "prediction.classifier.logistic_regression"] )(output="numpy"), clf: Union[None, Stacking] = None, explainer_plugins: list = [], diff --git a/src/autoprognosis/plugins/ensemble/combos.py b/src/autoprognosis/plugins/ensemble/combos.py index 556662b5..e70faa36 100644 --- a/src/autoprognosis/plugins/ensemble/combos.py +++ b/src/autoprognosis/plugins/ensemble/combos.py @@ -16,7 +16,10 @@ import numpy as np from pyod.utils.utility import check_parameter from scipy.special import erf +from sklearn.experimental import enable_iterative_imputer # noqa: F401,E402 +from sklearn.impute import IterativeImputer from sklearn.linear_model import LogisticRegression +from sklearn.pipeline import Pipeline from sklearn.preprocessing import MinMaxScaler from sklearn.utils import ( check_array, @@ -417,9 +420,6 @@ class Stacking(BaseAggregator): base_estimators: list or numpy array (n_estimators,) A list of base classifiers. - meta_clf : object, optional (default=LogisticRegression) - The meta classifier to make the final prediction. - n_folds : int, optional (default=2) The number of splits of the training sample. @@ -451,7 +451,7 @@ def __init__( self, base_estimators, meta_clf=None, - n_folds=2, + n_folds=3, keep_original=True, use_proba=False, shuffle_data=False, @@ -473,7 +473,9 @@ def __init__( if meta_clf is not None: self.meta_clf = meta_clf else: - self.meta_clf = LogisticRegression() + self.meta_clf = Pipeline( + ("imputer", IterativeImputer()), ("output", LogisticRegression()) + ) # set flags self.keep_original = keep_original @@ -504,8 +506,8 @@ def fit(self, X, y): """ # Validate inputs X and y - X, y = check_X_y(X, y) - X = check_array(X) + X, y = check_X_y(X, y, force_all_finite=False) + X = check_array(X, force_all_finite=False) self._set_n_classes(y) n_samples = X.shape[0] @@ -574,7 +576,7 @@ def _process_data(self, X): The processed dataset of X. """ check_is_fitted(self, ["fitted_"]) - X = check_array(X) + X = check_array(X, force_all_finite=False) n_samples = X.shape[0] # initialize matrix for storing newly generated features @@ -718,8 +720,8 @@ def fit(self, X, y): """ # Validate inputs X and y - X, y = check_X_y(X, y) - X = check_array(X) + X, y = check_X_y(X, y, force_all_finite=False) + X = check_array(X, force_all_finite=False) self._set_n_classes(y) if self.pre_fitted: @@ -744,7 +746,7 @@ def predict(self, X): labels : numpy array of shape (n_samples,) Class labels for each data sample. """ - X = check_array(X) + X = check_array(X, force_all_finite=False) all_scores = np.zeros([X.shape[0], self.n_base_estimators_]) @@ -782,7 +784,7 @@ def predict_proba(self, X): The class probabilities of the input samples. Classes are ordered by lexicographic order. """ - X = check_array(X) + X = check_array(X, force_all_finite=False) all_scores = np.zeros([X.shape[0], self._classes, self.n_base_estimators_]) for i in range(self.n_base_estimators_): From f18dc6edd3dac5289ad08dbb7e5f94ec6f1e6827 Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 11:20:41 +0000 Subject: [PATCH 02/26] add regression test --- tests/bugfixing/test_ensemble_crash.py | 45 ++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 tests/bugfixing/test_ensemble_crash.py diff --git a/tests/bugfixing/test_ensemble_crash.py b/tests/bugfixing/test_ensemble_crash.py new file mode 100644 index 00000000..52d17f0a --- /dev/null +++ b/tests/bugfixing/test_ensemble_crash.py @@ -0,0 +1,45 @@ +# stdlib +from pathlib import Path +import random + +# third party +import numpy as np +from sklearn.datasets import load_breast_cancer + +# autoprognosis absolute +from autoprognosis.studies.classifiers import ClassifierStudy + + +def test_ensemble_crash() -> None: + X, Y = load_breast_cancer(return_X_y=True, as_frame=True) + + # Simulate missingness + total_len = len(X) + + for col in ["mean texture", "mean compactness"]: + indices = random.sample(range(0, total_len), 10) + X.loc[indices, col] = np.nan + + dataset = X.copy() + dataset["target"] = Y + + workspace = Path("workspace") + workspace.mkdir(parents=True, exist_ok=True) + + study_name = "classification_example_imputation" + + study = ClassifierStudy( + study_name=study_name, + dataset=dataset, + target="target", + num_iter=1, + num_study_iter=1, + timeout=1, + imputers=["mean", "ice", "median"], + classifiers=["logistic_regression", "lda"], + feature_scaling=[], # feature preprocessing is disabled + score_threshold=0.4, + workspace=workspace, + ) + + study.run() From 4a7d9a7412bddab43c659cc0c92ef29e31190d66 Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 11:29:09 +0000 Subject: [PATCH 03/26] not fitted error --- src/autoprognosis/plugins/core/base_plugin.py | 11 +++++++++- .../plugins/ensemble/classifiers.py | 20 +++++++++++++++++-- src/autoprognosis/plugins/prediction/base.py | 3 +++ .../plugins/prediction/classifiers/base.py | 6 +++++- .../plugins/prediction/regression/base.py | 1 + .../prediction/risk_estimation/base.py | 4 ++++ 6 files changed, 41 insertions(+), 4 deletions(-) diff --git a/src/autoprognosis/plugins/core/base_plugin.py b/src/autoprognosis/plugins/core/base_plugin.py index a9949229..73475e5b 100644 --- a/src/autoprognosis/plugins/core/base_plugin.py +++ b/src/autoprognosis/plugins/core/base_plugin.py @@ -42,6 +42,7 @@ def __init__(self) -> None: self.output = pd.DataFrame self._backup_encoders: Optional[Dict[str, LabelEncoder]] = {} self._drop_features: Optional[List[str]] = [] + self._fitted = False def change_output(self, output: str) -> None: if output not in ["pandas", "numpy"]: @@ -165,13 +166,19 @@ def _transform_input(self, X: pd.DataFrame) -> pd.DataFrame: def fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "Plugin": X = self._fit_input(X) - return self._fit(X, *args, **kwargs) + self._fit(X, *args, **kwargs) + + self._fitted = True + + return self @abstractmethod def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "Plugin": ... def transform(self, X: pd.DataFrame) -> pd.DataFrame: + if not self._fitted: + raise RuntimeError("Fit the model first") X = self._transform_input(X) return self.output(self._transform(X)) @@ -180,6 +187,8 @@ def _transform(self, X: pd.DataFrame) -> pd.DataFrame: ... def predict(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame: + if not self._fitted: + raise RuntimeError("Fit the model first") X = self._transform_input(X) return self.output(self._predict(X, *args, *kwargs)) diff --git a/src/autoprognosis/plugins/ensemble/classifiers.py b/src/autoprognosis/plugins/ensemble/classifiers.py index 3ed55573..a55cf76e 100644 --- a/src/autoprognosis/plugins/ensemble/classifiers.py +++ b/src/autoprognosis/plugins/ensemble/classifiers.py @@ -90,6 +90,7 @@ def __init__( self.explainer_plugins = explainer_plugins self.explanations_nepoch = explanations_nepoch self.explainers = explainers + self._fitted = False for idx, weight in enumerate(weights): if weight == 0: @@ -121,9 +122,13 @@ def fit_model(k: int) -> Any: ) self.explainers[exp] = exp_model + self._fitted = True return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: + if not self._fitted: + raise RuntimeError("Fit the model first") + preds_ = [] for k in range(len(self.models)): preds_.append(self.models[k].predict_proba(X, *args) * self.weights[k]) @@ -247,9 +252,12 @@ def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "WeightedEnsembleCV": ) self.explainers[exp] = exp_model + self._fitted = True return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: + if not self._fitted: + raise RuntimeError("Fit the model first") result, _ = self.predict_proba_with_uncertainity(X) return result @@ -322,6 +330,7 @@ def __init__( self.explainer_plugins = explainer_plugins self.explainers: Optional[dict] self.explanations_nepoch = explanations_nepoch + self._fitted = False for model in self.models: model.change_output("numpy") @@ -349,10 +358,13 @@ def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "StackingEnsemble": n_epoch=self.explanations_nepoch, prefit=True, ) - + self._fitted = True return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: + if not self._fitted: + raise RuntimeError("Fit the model first") + return pd.DataFrame(self.clf.predict_proba(X)) def explain(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: @@ -427,6 +439,7 @@ def __init__( self.explainer_plugins = explainer_plugins self.explainers: Optional[dict] self.explanations_nepoch = explanations_nepoch + self._fitted = False if clf: self.clf = clf @@ -449,10 +462,13 @@ def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "AggregatingEnsemble": n_epoch=self.explanations_nepoch, prefit=True, ) - + self._fitted = True return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: + if not self._fitted: + raise RuntimeError("Fit the model first") + return pd.DataFrame(self.clf.predict_proba(X)) def explain(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: diff --git a/src/autoprognosis/plugins/prediction/base.py b/src/autoprognosis/plugins/prediction/base.py index 31265bee..e662d146 100644 --- a/src/autoprognosis/plugins/prediction/base.py +++ b/src/autoprognosis/plugins/prediction/base.py @@ -43,6 +43,9 @@ def explain(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame: raise NotImplementedError(f"Explainer not implemented for {self.name()}") def predict_proba(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame: + if not self._fitted: + raise RuntimeError("Fit the model first") + X = self._transform_input(X) return pd.DataFrame(self._predict_proba(X, *args, **kwargs)) diff --git a/src/autoprognosis/plugins/prediction/classifiers/base.py b/src/autoprognosis/plugins/prediction/classifiers/base.py index e902e8c9..3a53ce5a 100644 --- a/src/autoprognosis/plugins/prediction/classifiers/base.py +++ b/src/autoprognosis/plugins/prediction/classifiers/base.py @@ -39,7 +39,11 @@ def fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> plugin.Plugin: raise RuntimeError("Training requires X, y") Y = cast.to_dataframe(args[0]).values.ravel() - return self._fit(X, Y, **kwargs) + self._fit(X, Y, **kwargs) + + self._fitted = True + + return self def score(self, X: pd.DataFrame, y: pd.DataFrame, metric: str = "aucroc") -> float: ev = classifier_evaluator(metric) diff --git a/src/autoprognosis/plugins/prediction/regression/base.py b/src/autoprognosis/plugins/prediction/regression/base.py index ea45e32f..88b91b3a 100644 --- a/src/autoprognosis/plugins/prediction/regression/base.py +++ b/src/autoprognosis/plugins/prediction/regression/base.py @@ -43,6 +43,7 @@ def fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "RegressionPlugin": X = self._fit_input(X) self._fit(X, *args, **kwargs) + self._fitted = True return self diff --git a/src/autoprognosis/plugins/prediction/risk_estimation/base.py b/src/autoprognosis/plugins/prediction/risk_estimation/base.py index 46f02da0..97fb29d4 100644 --- a/src/autoprognosis/plugins/prediction/risk_estimation/base.py +++ b/src/autoprognosis/plugins/prediction/risk_estimation/base.py @@ -78,9 +78,13 @@ def fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "RiskEstimationPlug task_type="risk_estimation", ) + self._fitted = True return self def explain(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame: + if not self._fitted: + raise RuntimeError("Fit the model first") + X = self._transform_input(X) if self.explainer is None: raise ValueError("Interpretability is not enabled for this model") From ddb860c0ed4e105ef996e7752582ee28ce744ba9 Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 11:33:17 +0000 Subject: [PATCH 04/26] cleanup --- tests/bugfixing/test_not_fitted_error.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 tests/bugfixing/test_not_fitted_error.py diff --git a/tests/bugfixing/test_not_fitted_error.py b/tests/bugfixing/test_not_fitted_error.py new file mode 100644 index 00000000..90feb244 --- /dev/null +++ b/tests/bugfixing/test_not_fitted_error.py @@ -0,0 +1,17 @@ +# third party +import pandas as pd + +# autoprognosis absolute +from autoprognosis.plugins.prediction import Predictions + + +def test_train_error() -> None: + model = Predictions().get("logistic_regression") + + err = "" + try: + model.predict_proba(pd.DataFrame([])) + except BaseException as e: + err = str(e) + + assert "Fit the model first" == err From 2888915a1dc6ccb0cba73a7ff83bb4e7241fd141 Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 11:33:51 +0000 Subject: [PATCH 05/26] bump version --- src/autoprognosis/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/autoprognosis/version.py b/src/autoprognosis/version.py index 9cb17e79..c11f861a 100644 --- a/src/autoprognosis/version.py +++ b/src/autoprognosis/version.py @@ -1 +1 @@ -__version__ = "0.1.8" +__version__ = "0.1.9" From bacb6a4f07d19c7c907a95364a912101eb0328af Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 12:18:59 +0000 Subject: [PATCH 06/26] ensemble bugfixing --- src/autoprognosis/plugins/core/base_plugin.py | 3 +++ .../plugins/ensemble/classifiers.py | 26 +++++++++++++++---- .../plugins/pipeline/__init__.py | 2 ++ .../plugins/pipeline/generators.py | 8 ++++++ 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/autoprognosis/plugins/core/base_plugin.py b/src/autoprognosis/plugins/core/base_plugin.py index 73475e5b..2574f0cb 100644 --- a/src/autoprognosis/plugins/core/base_plugin.py +++ b/src/autoprognosis/plugins/core/base_plugin.py @@ -125,6 +125,9 @@ def subtype() -> str: def fqdn(cls) -> str: return cls.type() + "." + cls.subtype() + "." + cls.name() + def is_fitted(self) -> bool: + return self._fitted + def fit_transform(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame: return pd.DataFrame(self.fit(X, *args, *kwargs).transform(X)) diff --git a/src/autoprognosis/plugins/ensemble/classifiers.py b/src/autoprognosis/plugins/ensemble/classifiers.py index a55cf76e..814f4e3b 100644 --- a/src/autoprognosis/plugins/ensemble/classifiers.py +++ b/src/autoprognosis/plugins/ensemble/classifiers.py @@ -90,7 +90,10 @@ def __init__( self.explainer_plugins = explainer_plugins self.explanations_nepoch = explanations_nepoch self.explainers = explainers - self._fitted = False + + self._fitted = True + for model in models: + self._fitted |= model.is_fitted() for idx, weight in enumerate(weights): if weight == 0: @@ -98,6 +101,9 @@ def __init__( self.models.append(models[idx]) self.weights.append(weights[idx]) + def is_fitted(self) -> bool: + return self._fitted + def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "WeightedEnsemble": def fit_model(k: int) -> Any: return self.models[k].fit(X, Y) @@ -256,8 +262,6 @@ def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "WeightedEnsembleCV": return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: - if not self._fitted: - raise RuntimeError("Fit the model first") result, _ = self.predict_proba_with_uncertainity(X) return result @@ -330,7 +334,10 @@ def __init__( self.explainer_plugins = explainer_plugins self.explainers: Optional[dict] self.explanations_nepoch = explanations_nepoch - self._fitted = False + + self._fitted = True + for model in models: + self._fitted |= model.is_fitted() for model in self.models: model.change_output("numpy") @@ -344,6 +351,9 @@ def __init__( use_proba=True, ) + def is_fitted(self) -> bool: + return self._fitted + def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "StackingEnsemble": self.clf.fit(X, Y) @@ -439,13 +449,19 @@ def __init__( self.explainer_plugins = explainer_plugins self.explainers: Optional[dict] self.explanations_nepoch = explanations_nepoch - self._fitted = False + + self._fitted = True + for model in models: + self._fitted |= model.is_fitted() if clf: self.clf = clf else: self.clf = SimpleClassifierAggregator(models, method=method) + def is_fitted(self) -> bool: + return self._fitted + def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "AggregatingEnsemble": Y = pd.DataFrame(Y).values.ravel() diff --git a/src/autoprognosis/plugins/pipeline/__init__.py b/src/autoprognosis/plugins/pipeline/__init__.py index c5d59c62..fcda6c0a 100644 --- a/src/autoprognosis/plugins/pipeline/__init__.py +++ b/src/autoprognosis/plugins/pipeline/__init__.py @@ -17,6 +17,7 @@ _generate_getstate, _generate_hyperparameter_space_for_layer_impl, _generate_hyperparameter_space_impl, + _generate_is_fitted, _generate_load, _generate_load_template, _generate_name_impl, @@ -37,6 +38,7 @@ def __new__(cls: Type, name: str, plugins: Tuple[Type, ...], dct: dict) -> Any: dct["__setstate__"] = _generate_setstate() dct["__getstate__"] = _generate_getstate() dct["fit"] = _generate_fit() + dct["is_fitted"] = _generate_is_fitted() dct["predict"] = _generate_predict() dct["predict_proba"] = _generate_predict_proba() dct["score"] = _generate_score() diff --git a/src/autoprognosis/plugins/pipeline/generators.py b/src/autoprognosis/plugins/pipeline/generators.py index 9e3102eb..791c5a9b 100644 --- a/src/autoprognosis/plugins/pipeline/generators.py +++ b/src/autoprognosis/plugins/pipeline/generators.py @@ -128,6 +128,13 @@ def fit_impl(self: Any, X: pd.DataFrame, *args: Any, **kwargs: Any) -> Any: return fit_impl +def _generate_is_fitted() -> Callable: + def fit_impl(self: Any) -> Any: + return self.stages[-1].is_fitted() + + return fit_impl + + def _generate_predict() -> Callable: @decorators.benchmark def predict_impl( @@ -249,6 +256,7 @@ def getstate_impl(self: Any) -> dict: "_generate_sample_param_impl", "_generate_constructor", "_generate_fit", + "_generate_is_fitted", "_generate_predict", "_generate_predict_proba", "_generate_score", From a0e45a8fb80ae214b49a1d9892fbe03d3cc69cd7 Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 13:50:25 +0000 Subject: [PATCH 07/26] bugfixing --- src/autoprognosis/plugins/prediction/risk_estimation/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/autoprognosis/plugins/prediction/risk_estimation/base.py b/src/autoprognosis/plugins/prediction/risk_estimation/base.py index 97fb29d4..6c9aabbb 100644 --- a/src/autoprognosis/plugins/prediction/risk_estimation/base.py +++ b/src/autoprognosis/plugins/prediction/risk_estimation/base.py @@ -56,6 +56,7 @@ def fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "RiskEstimationPlug X = self._fit_input(X) self._fit(X, *args, **kwargs) + self._fitted = True if self.with_explanations and self.explainer is None: if "eval_times" not in kwargs: @@ -78,7 +79,6 @@ def fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "RiskEstimationPlug task_type="risk_estimation", ) - self._fitted = True return self def explain(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame: From 290591fc6fe41c5523f8cc799cfdfcb14916d9ef Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 14:56:20 +0000 Subject: [PATCH 08/26] debug --- .github/workflows/test_R.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_R.yml b/.github/workflows/test_R.yml index 8ad9223a..0bf01ec8 100644 --- a/.github/workflows/test_R.yml +++ b/.github/workflows/test_R.yml @@ -34,7 +34,7 @@ jobs: Rscript -e "install.packages(c('remotes','reticulate'))" - name: Test R run: | - Rscript tests/bindings/R/test_survival_analysis.R Rscript tests/bindings/R/test_classification.R Rscript tests/bindings/R/test_classification_with_missing_data.R Rscript tests/bindings/R/test_regression.R + Rscript tests/bindings/R/test_survival_analysis.R From 6cbb54ece95bcb85675cc59efd40784e5ba5249a Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 15:17:50 +0000 Subject: [PATCH 09/26] debug R --- tests/bindings/R/test_classification.R | 2 +- tests/bindings/R/test_classification_with_missing_data.R | 2 +- tests/bindings/R/test_regression.R | 2 +- tests/bindings/R/test_survival_analysis.R | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/bindings/R/test_classification.R b/tests/bindings/R/test_classification.R index 9ac48dbe..b39be84f 100644 --- a/tests/bindings/R/test_classification.R +++ b/tests/bindings/R/test_classification.R @@ -1,5 +1,5 @@ library(reticulate) -py_install(".", pip = TRUE) +py_install(".", pip = TRUE, ignore_installed = TRUE) pathlib <- import("pathlib", convert=FALSE) warnings <- import("warnings", convert=FALSE) diff --git a/tests/bindings/R/test_classification_with_missing_data.R b/tests/bindings/R/test_classification_with_missing_data.R index d27bccd2..eca96a20 100644 --- a/tests/bindings/R/test_classification_with_missing_data.R +++ b/tests/bindings/R/test_classification_with_missing_data.R @@ -1,5 +1,5 @@ library(reticulate) -py_install(".", pip = TRUE) +py_install(".", pip = TRUE, ignore_installed = TRUE) pathlib <- import("pathlib", convert=FALSE) warnings <- import("warnings", convert=FALSE) diff --git a/tests/bindings/R/test_regression.R b/tests/bindings/R/test_regression.R index 6f0d0c7f..5b81a541 100644 --- a/tests/bindings/R/test_regression.R +++ b/tests/bindings/R/test_regression.R @@ -1,5 +1,5 @@ library(reticulate) -py_install(".", pip = TRUE) +py_install(".", pip = TRUE, ignore_installed = TRUE) pathlib <- import("pathlib", convert=FALSE) warnings <- import("warnings", convert=FALSE) diff --git a/tests/bindings/R/test_survival_analysis.R b/tests/bindings/R/test_survival_analysis.R index ae18c6a4..6a786b48 100644 --- a/tests/bindings/R/test_survival_analysis.R +++ b/tests/bindings/R/test_survival_analysis.R @@ -1,7 +1,7 @@ library(reticulate) library(survival) -py_install(".", pip = TRUE) +py_install(".", pip = TRUE, ignore_installed = TRUE) pathlib <- import("pathlib", convert=FALSE) warnings <- import("warnings", convert=FALSE) From ae9e217b98ddd2f749041d310abedf11ba60e31e Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 15:34:17 +0000 Subject: [PATCH 10/26] debug --- tests/bindings/R/test_classification.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/bindings/R/test_classification.R b/tests/bindings/R/test_classification.R index b39be84f..719e18b5 100644 --- a/tests/bindings/R/test_classification.R +++ b/tests/bindings/R/test_classification.R @@ -1,5 +1,5 @@ library(reticulate) -py_install(".", pip = TRUE, ignore_installed = TRUE) +py_install("autoprognosis", pip = TRUE, ignore_installed = TRUE) pathlib <- import("pathlib", convert=FALSE) warnings <- import("warnings", convert=FALSE) From 54534adabc59dcd5801bdb9c4dc929cbbfcdff61 Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 15:43:05 +0000 Subject: [PATCH 11/26] cleanup --- src/autoprognosis/plugins/core/base_plugin.py | 9 +++++--- .../plugins/ensemble/classifiers.py | 22 +++++++++++++------ src/autoprognosis/plugins/prediction/base.py | 2 +- .../prediction/risk_estimation/base.py | 2 +- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/autoprognosis/plugins/core/base_plugin.py b/src/autoprognosis/plugins/core/base_plugin.py index 2574f0cb..96b12df3 100644 --- a/src/autoprognosis/plugins/core/base_plugin.py +++ b/src/autoprognosis/plugins/core/base_plugin.py @@ -126,7 +126,10 @@ def fqdn(cls) -> str: return cls.type() + "." + cls.subtype() + "." + cls.name() def is_fitted(self) -> bool: - return self._fitted + try: + return self._fitted + except BaseException: + return True def fit_transform(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame: return pd.DataFrame(self.fit(X, *args, *kwargs).transform(X)) @@ -180,7 +183,7 @@ def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "Plugin": ... def transform(self, X: pd.DataFrame) -> pd.DataFrame: - if not self._fitted: + if not self.is_fitted(): raise RuntimeError("Fit the model first") X = self._transform_input(X) return self.output(self._transform(X)) @@ -190,7 +193,7 @@ def _transform(self, X: pd.DataFrame) -> pd.DataFrame: ... def predict(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame: - if not self._fitted: + if not self.is_fitted(): raise RuntimeError("Fit the model first") X = self._transform_input(X) return self.output(self._predict(X, *args, *kwargs)) diff --git a/src/autoprognosis/plugins/ensemble/classifiers.py b/src/autoprognosis/plugins/ensemble/classifiers.py index 814f4e3b..45261fd9 100644 --- a/src/autoprognosis/plugins/ensemble/classifiers.py +++ b/src/autoprognosis/plugins/ensemble/classifiers.py @@ -102,7 +102,10 @@ def __init__( self.weights.append(weights[idx]) def is_fitted(self) -> bool: - return self._fitted + try: + return self._fitted + except BaseException: + return True # backwards compatible def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "WeightedEnsemble": def fit_model(k: int) -> Any: @@ -132,7 +135,7 @@ def fit_model(k: int) -> Any: return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: - if not self._fitted: + if not self.is_fitted(): raise RuntimeError("Fit the model first") preds_ = [] @@ -258,7 +261,6 @@ def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "WeightedEnsembleCV": ) self.explainers[exp] = exp_model - self._fitted = True return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: @@ -352,7 +354,10 @@ def __init__( ) def is_fitted(self) -> bool: - return self._fitted + try: + return self._fitted + except BaseException: + return True # backwards compatible def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "StackingEnsemble": self.clf.fit(X, Y) @@ -372,7 +377,7 @@ def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "StackingEnsemble": return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: - if not self._fitted: + if not self.is_fitted(): raise RuntimeError("Fit the model first") return pd.DataFrame(self.clf.predict_proba(X)) @@ -460,7 +465,10 @@ def __init__( self.clf = SimpleClassifierAggregator(models, method=method) def is_fitted(self) -> bool: - return self._fitted + try: + return self._fitted + except BaseException: + return True # backwards compatible def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "AggregatingEnsemble": Y = pd.DataFrame(Y).values.ravel() @@ -482,7 +490,7 @@ def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "AggregatingEnsemble": return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: - if not self._fitted: + if not self.is_fitted(): raise RuntimeError("Fit the model first") return pd.DataFrame(self.clf.predict_proba(X)) diff --git a/src/autoprognosis/plugins/prediction/base.py b/src/autoprognosis/plugins/prediction/base.py index e662d146..71744731 100644 --- a/src/autoprognosis/plugins/prediction/base.py +++ b/src/autoprognosis/plugins/prediction/base.py @@ -43,7 +43,7 @@ def explain(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame: raise NotImplementedError(f"Explainer not implemented for {self.name()}") def predict_proba(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame: - if not self._fitted: + if not self.is_fitted(): raise RuntimeError("Fit the model first") X = self._transform_input(X) diff --git a/src/autoprognosis/plugins/prediction/risk_estimation/base.py b/src/autoprognosis/plugins/prediction/risk_estimation/base.py index 6c9aabbb..dd042ce8 100644 --- a/src/autoprognosis/plugins/prediction/risk_estimation/base.py +++ b/src/autoprognosis/plugins/prediction/risk_estimation/base.py @@ -82,7 +82,7 @@ def fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "RiskEstimationPlug return self def explain(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame: - if not self._fitted: + if not self.is_fitted(): raise RuntimeError("Fit the model first") X = self._transform_input(X) From 5fd1d2f9f8bde824483af517eb3a3924e2836945 Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 15:53:13 +0000 Subject: [PATCH 12/26] debug R --- src/autoprognosis/plugins/pipeline/__init__.py | 3 +++ tests/bindings/R/test_classification.R | 2 +- tests/bindings/R/test_classification_with_missing_data.R | 2 +- tests/bindings/R/test_regression.R | 2 +- tests/bindings/R/test_survival_analysis.R | 2 +- 5 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/autoprognosis/plugins/pipeline/__init__.py b/src/autoprognosis/plugins/pipeline/__init__.py index fcda6c0a..8369de36 100644 --- a/src/autoprognosis/plugins/pipeline/__init__.py +++ b/src/autoprognosis/plugins/pipeline/__init__.py @@ -86,6 +86,9 @@ def get_args(*args: Any, **kwargs: Any) -> Dict: def fit(self: Any, X: pd.DataFrame, *args: Any, **kwargs: Any) -> Any: raise NotImplementedError("not implemented") + def is_fitted(self: Any) -> Any: + raise NotImplementedError("not implemented") + def predict(*args: Any, **kwargs: Any) -> pd.DataFrame: raise NotImplementedError("not implemented") diff --git a/tests/bindings/R/test_classification.R b/tests/bindings/R/test_classification.R index 719e18b5..9ac48dbe 100644 --- a/tests/bindings/R/test_classification.R +++ b/tests/bindings/R/test_classification.R @@ -1,5 +1,5 @@ library(reticulate) -py_install("autoprognosis", pip = TRUE, ignore_installed = TRUE) +py_install(".", pip = TRUE) pathlib <- import("pathlib", convert=FALSE) warnings <- import("warnings", convert=FALSE) diff --git a/tests/bindings/R/test_classification_with_missing_data.R b/tests/bindings/R/test_classification_with_missing_data.R index eca96a20..d27bccd2 100644 --- a/tests/bindings/R/test_classification_with_missing_data.R +++ b/tests/bindings/R/test_classification_with_missing_data.R @@ -1,5 +1,5 @@ library(reticulate) -py_install(".", pip = TRUE, ignore_installed = TRUE) +py_install(".", pip = TRUE) pathlib <- import("pathlib", convert=FALSE) warnings <- import("warnings", convert=FALSE) diff --git a/tests/bindings/R/test_regression.R b/tests/bindings/R/test_regression.R index 5b81a541..6f0d0c7f 100644 --- a/tests/bindings/R/test_regression.R +++ b/tests/bindings/R/test_regression.R @@ -1,5 +1,5 @@ library(reticulate) -py_install(".", pip = TRUE, ignore_installed = TRUE) +py_install(".", pip = TRUE) pathlib <- import("pathlib", convert=FALSE) warnings <- import("warnings", convert=FALSE) diff --git a/tests/bindings/R/test_survival_analysis.R b/tests/bindings/R/test_survival_analysis.R index 6a786b48..ae18c6a4 100644 --- a/tests/bindings/R/test_survival_analysis.R +++ b/tests/bindings/R/test_survival_analysis.R @@ -1,7 +1,7 @@ library(reticulate) library(survival) -py_install(".", pip = TRUE, ignore_installed = TRUE) +py_install(".", pip = TRUE) pathlib <- import("pathlib", convert=FALSE) warnings <- import("warnings", convert=FALSE) From 46cf3457f5cabd77cb78ccf584652a903314a4ee Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 16:53:17 +0000 Subject: [PATCH 13/26] debug --- tests/studies/test_classifiers_studies.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/studies/test_classifiers_studies.py b/tests/studies/test_classifiers_studies.py index a7d3be90..be89fb53 100644 --- a/tests/studies/test_classifiers_studies.py +++ b/tests/studies/test_classifiers_studies.py @@ -15,7 +15,6 @@ from autoprognosis.utils.tester import evaluate_estimator -@pytest.mark.slow def test_search() -> None: X, Y = load_breast_cancer(return_X_y=True, as_frame=True) From d01d25121d1830a92f2d13689b6da4935fb961ce Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 17:16:31 +0000 Subject: [PATCH 14/26] debug R --- .github/workflows/test_R.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_R.yml b/.github/workflows/test_R.yml index 0bf01ec8..66ea0fa4 100644 --- a/.github/workflows/test_R.yml +++ b/.github/workflows/test_R.yml @@ -34,7 +34,7 @@ jobs: Rscript -e "install.packages(c('remotes','reticulate'))" - name: Test R run: | - Rscript tests/bindings/R/test_classification.R - Rscript tests/bindings/R/test_classification_with_missing_data.R + #Rscript tests/bindings/R/test_classification.R + #Rscript tests/bindings/R/test_classification_with_missing_data.R Rscript tests/bindings/R/test_regression.R Rscript tests/bindings/R/test_survival_analysis.R From 1f2287e7f975980fd44e4bbf8ffd5d1aae07152d Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 17:33:59 +0000 Subject: [PATCH 15/26] revert 1 --- .../plugins/ensemble/classifiers.py | 46 ++----------------- 1 file changed, 3 insertions(+), 43 deletions(-) diff --git a/src/autoprognosis/plugins/ensemble/classifiers.py b/src/autoprognosis/plugins/ensemble/classifiers.py index 45261fd9..dd7f4730 100644 --- a/src/autoprognosis/plugins/ensemble/classifiers.py +++ b/src/autoprognosis/plugins/ensemble/classifiers.py @@ -91,22 +91,12 @@ def __init__( self.explanations_nepoch = explanations_nepoch self.explainers = explainers - self._fitted = True - for model in models: - self._fitted |= model.is_fitted() - for idx, weight in enumerate(weights): if weight == 0: continue self.models.append(models[idx]) self.weights.append(weights[idx]) - def is_fitted(self) -> bool: - try: - return self._fitted - except BaseException: - return True # backwards compatible - def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "WeightedEnsemble": def fit_model(k: int) -> Any: return self.models[k].fit(X, Y) @@ -131,13 +121,9 @@ def fit_model(k: int) -> Any: ) self.explainers[exp] = exp_model - self._fitted = True return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: - if not self.is_fitted(): - raise RuntimeError("Fit the model first") - preds_ = [] for k in range(len(self.models)): preds_.append(self.models[k].predict_proba(X, *args) * self.weights[k]) @@ -322,7 +308,7 @@ def __init__( self, models: List[PipelineMeta], meta_model: PipelineMeta = Pipeline( - ["imputer.default.ice", "prediction.classifier.logistic_regression"] + ["prediction.classifier.logistic_regression"] )(output="numpy"), clf: Union[None, Stacking] = None, explainer_plugins: list = [], @@ -337,10 +323,6 @@ def __init__( self.explainers: Optional[dict] self.explanations_nepoch = explanations_nepoch - self._fitted = True - for model in models: - self._fitted |= model.is_fitted() - for model in self.models: model.change_output("numpy") @@ -353,12 +335,6 @@ def __init__( use_proba=True, ) - def is_fitted(self) -> bool: - try: - return self._fitted - except BaseException: - return True # backwards compatible - def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "StackingEnsemble": self.clf.fit(X, Y) @@ -373,13 +349,10 @@ def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "StackingEnsemble": n_epoch=self.explanations_nepoch, prefit=True, ) - self._fitted = True + return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: - if not self.is_fitted(): - raise RuntimeError("Fit the model first") - return pd.DataFrame(self.clf.predict_proba(X)) def explain(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: @@ -455,21 +428,11 @@ def __init__( self.explainers: Optional[dict] self.explanations_nepoch = explanations_nepoch - self._fitted = True - for model in models: - self._fitted |= model.is_fitted() - if clf: self.clf = clf else: self.clf = SimpleClassifierAggregator(models, method=method) - def is_fitted(self) -> bool: - try: - return self._fitted - except BaseException: - return True # backwards compatible - def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "AggregatingEnsemble": Y = pd.DataFrame(Y).values.ravel() @@ -486,13 +449,10 @@ def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "AggregatingEnsemble": n_epoch=self.explanations_nepoch, prefit=True, ) - self._fitted = True + return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: - if not self.is_fitted(): - raise RuntimeError("Fit the model first") - return pd.DataFrame(self.clf.predict_proba(X)) def explain(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: From 915217ddc1b3fe51c63ed24fe87398e65bb83f35 Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 17:34:12 +0000 Subject: [PATCH 16/26] debug --- .github/workflows/test_R.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_R.yml b/.github/workflows/test_R.yml index 66ea0fa4..0bf01ec8 100644 --- a/.github/workflows/test_R.yml +++ b/.github/workflows/test_R.yml @@ -34,7 +34,7 @@ jobs: Rscript -e "install.packages(c('remotes','reticulate'))" - name: Test R run: | - #Rscript tests/bindings/R/test_classification.R - #Rscript tests/bindings/R/test_classification_with_missing_data.R + Rscript tests/bindings/R/test_classification.R + Rscript tests/bindings/R/test_classification_with_missing_data.R Rscript tests/bindings/R/test_regression.R Rscript tests/bindings/R/test_survival_analysis.R From ed47cbbf25533df8092a2c102a8733614d548037 Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 17:44:03 +0000 Subject: [PATCH 17/26] Revert "revert 1" This reverts commit 1f2287e7f975980fd44e4bbf8ffd5d1aae07152d. --- .../plugins/ensemble/classifiers.py | 46 +++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/src/autoprognosis/plugins/ensemble/classifiers.py b/src/autoprognosis/plugins/ensemble/classifiers.py index dd7f4730..45261fd9 100644 --- a/src/autoprognosis/plugins/ensemble/classifiers.py +++ b/src/autoprognosis/plugins/ensemble/classifiers.py @@ -91,12 +91,22 @@ def __init__( self.explanations_nepoch = explanations_nepoch self.explainers = explainers + self._fitted = True + for model in models: + self._fitted |= model.is_fitted() + for idx, weight in enumerate(weights): if weight == 0: continue self.models.append(models[idx]) self.weights.append(weights[idx]) + def is_fitted(self) -> bool: + try: + return self._fitted + except BaseException: + return True # backwards compatible + def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "WeightedEnsemble": def fit_model(k: int) -> Any: return self.models[k].fit(X, Y) @@ -121,9 +131,13 @@ def fit_model(k: int) -> Any: ) self.explainers[exp] = exp_model + self._fitted = True return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: + if not self.is_fitted(): + raise RuntimeError("Fit the model first") + preds_ = [] for k in range(len(self.models)): preds_.append(self.models[k].predict_proba(X, *args) * self.weights[k]) @@ -308,7 +322,7 @@ def __init__( self, models: List[PipelineMeta], meta_model: PipelineMeta = Pipeline( - ["prediction.classifier.logistic_regression"] + ["imputer.default.ice", "prediction.classifier.logistic_regression"] )(output="numpy"), clf: Union[None, Stacking] = None, explainer_plugins: list = [], @@ -323,6 +337,10 @@ def __init__( self.explainers: Optional[dict] self.explanations_nepoch = explanations_nepoch + self._fitted = True + for model in models: + self._fitted |= model.is_fitted() + for model in self.models: model.change_output("numpy") @@ -335,6 +353,12 @@ def __init__( use_proba=True, ) + def is_fitted(self) -> bool: + try: + return self._fitted + except BaseException: + return True # backwards compatible + def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "StackingEnsemble": self.clf.fit(X, Y) @@ -349,10 +373,13 @@ def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "StackingEnsemble": n_epoch=self.explanations_nepoch, prefit=True, ) - + self._fitted = True return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: + if not self.is_fitted(): + raise RuntimeError("Fit the model first") + return pd.DataFrame(self.clf.predict_proba(X)) def explain(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: @@ -428,11 +455,21 @@ def __init__( self.explainers: Optional[dict] self.explanations_nepoch = explanations_nepoch + self._fitted = True + for model in models: + self._fitted |= model.is_fitted() + if clf: self.clf = clf else: self.clf = SimpleClassifierAggregator(models, method=method) + def is_fitted(self) -> bool: + try: + return self._fitted + except BaseException: + return True # backwards compatible + def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "AggregatingEnsemble": Y = pd.DataFrame(Y).values.ravel() @@ -449,10 +486,13 @@ def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "AggregatingEnsemble": n_epoch=self.explanations_nepoch, prefit=True, ) - + self._fitted = True return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: + if not self.is_fitted(): + raise RuntimeError("Fit the model first") + return pd.DataFrame(self.clf.predict_proba(X)) def explain(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: From 3bd4c247a4899e0871ab6d86d79212a7fdf48399 Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 18:23:34 +0000 Subject: [PATCH 18/26] debug R --- src/autoprognosis/plugins/ensemble/classifiers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/autoprognosis/plugins/ensemble/classifiers.py b/src/autoprognosis/plugins/ensemble/classifiers.py index 45261fd9..a08cc657 100644 --- a/src/autoprognosis/plugins/ensemble/classifiers.py +++ b/src/autoprognosis/plugins/ensemble/classifiers.py @@ -93,7 +93,7 @@ def __init__( self._fitted = True for model in models: - self._fitted |= model.is_fitted() + self._fitted = self._fitted or model.is_fitted() for idx, weight in enumerate(weights): if weight == 0: From 70564e96dc00862f4bed6df0339025054a35223d Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 18:40:07 +0000 Subject: [PATCH 19/26] debug --- src/autoprognosis/plugins/ensemble/classifiers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/autoprognosis/plugins/ensemble/classifiers.py b/src/autoprognosis/plugins/ensemble/classifiers.py index a08cc657..177b3e69 100644 --- a/src/autoprognosis/plugins/ensemble/classifiers.py +++ b/src/autoprognosis/plugins/ensemble/classifiers.py @@ -339,7 +339,7 @@ def __init__( self._fitted = True for model in models: - self._fitted |= model.is_fitted() + self._fitted = self._fitted or model.is_fitted() for model in self.models: model.change_output("numpy") @@ -457,7 +457,7 @@ def __init__( self._fitted = True for model in models: - self._fitted |= model.is_fitted() + self._fitted = self._fitted or model.is_fitted() if clf: self.clf = clf From 483b86f0f7b461cf86ed6582f76d0b9bb5a7ecfe Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 19:10:22 +0000 Subject: [PATCH 20/26] debug --- .../plugins/ensemble/classifiers.py | 46 ++----------------- 1 file changed, 3 insertions(+), 43 deletions(-) diff --git a/src/autoprognosis/plugins/ensemble/classifiers.py b/src/autoprognosis/plugins/ensemble/classifiers.py index 177b3e69..dd7f4730 100644 --- a/src/autoprognosis/plugins/ensemble/classifiers.py +++ b/src/autoprognosis/plugins/ensemble/classifiers.py @@ -91,22 +91,12 @@ def __init__( self.explanations_nepoch = explanations_nepoch self.explainers = explainers - self._fitted = True - for model in models: - self._fitted = self._fitted or model.is_fitted() - for idx, weight in enumerate(weights): if weight == 0: continue self.models.append(models[idx]) self.weights.append(weights[idx]) - def is_fitted(self) -> bool: - try: - return self._fitted - except BaseException: - return True # backwards compatible - def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "WeightedEnsemble": def fit_model(k: int) -> Any: return self.models[k].fit(X, Y) @@ -131,13 +121,9 @@ def fit_model(k: int) -> Any: ) self.explainers[exp] = exp_model - self._fitted = True return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: - if not self.is_fitted(): - raise RuntimeError("Fit the model first") - preds_ = [] for k in range(len(self.models)): preds_.append(self.models[k].predict_proba(X, *args) * self.weights[k]) @@ -322,7 +308,7 @@ def __init__( self, models: List[PipelineMeta], meta_model: PipelineMeta = Pipeline( - ["imputer.default.ice", "prediction.classifier.logistic_regression"] + ["prediction.classifier.logistic_regression"] )(output="numpy"), clf: Union[None, Stacking] = None, explainer_plugins: list = [], @@ -337,10 +323,6 @@ def __init__( self.explainers: Optional[dict] self.explanations_nepoch = explanations_nepoch - self._fitted = True - for model in models: - self._fitted = self._fitted or model.is_fitted() - for model in self.models: model.change_output("numpy") @@ -353,12 +335,6 @@ def __init__( use_proba=True, ) - def is_fitted(self) -> bool: - try: - return self._fitted - except BaseException: - return True # backwards compatible - def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "StackingEnsemble": self.clf.fit(X, Y) @@ -373,13 +349,10 @@ def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "StackingEnsemble": n_epoch=self.explanations_nepoch, prefit=True, ) - self._fitted = True + return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: - if not self.is_fitted(): - raise RuntimeError("Fit the model first") - return pd.DataFrame(self.clf.predict_proba(X)) def explain(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: @@ -455,21 +428,11 @@ def __init__( self.explainers: Optional[dict] self.explanations_nepoch = explanations_nepoch - self._fitted = True - for model in models: - self._fitted = self._fitted or model.is_fitted() - if clf: self.clf = clf else: self.clf = SimpleClassifierAggregator(models, method=method) - def is_fitted(self) -> bool: - try: - return self._fitted - except BaseException: - return True # backwards compatible - def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "AggregatingEnsemble": Y = pd.DataFrame(Y).values.ravel() @@ -486,13 +449,10 @@ def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "AggregatingEnsemble": n_epoch=self.explanations_nepoch, prefit=True, ) - self._fitted = True + return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: - if not self.is_fitted(): - raise RuntimeError("Fit the model first") - return pd.DataFrame(self.clf.predict_proba(X)) def explain(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: From 3c1b7ea77d229ca3df12cd06a1088421e7d49898 Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 19:22:40 +0000 Subject: [PATCH 21/26] debug --- src/autoprognosis/plugins/ensemble/classifiers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/autoprognosis/plugins/ensemble/classifiers.py b/src/autoprognosis/plugins/ensemble/classifiers.py index dd7f4730..3ed55573 100644 --- a/src/autoprognosis/plugins/ensemble/classifiers.py +++ b/src/autoprognosis/plugins/ensemble/classifiers.py @@ -308,7 +308,7 @@ def __init__( self, models: List[PipelineMeta], meta_model: PipelineMeta = Pipeline( - ["prediction.classifier.logistic_regression"] + ["imputer.default.ice", "prediction.classifier.logistic_regression"] )(output="numpy"), clf: Union[None, Stacking] = None, explainer_plugins: list = [], From b74677558518cf83f5ad852c11a3e8dc09c96e79 Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 19:38:50 +0000 Subject: [PATCH 22/26] Revert "debug" This reverts commit 3c1b7ea77d229ca3df12cd06a1088421e7d49898. --- src/autoprognosis/plugins/ensemble/classifiers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/autoprognosis/plugins/ensemble/classifiers.py b/src/autoprognosis/plugins/ensemble/classifiers.py index 3ed55573..dd7f4730 100644 --- a/src/autoprognosis/plugins/ensemble/classifiers.py +++ b/src/autoprognosis/plugins/ensemble/classifiers.py @@ -308,7 +308,7 @@ def __init__( self, models: List[PipelineMeta], meta_model: PipelineMeta = Pipeline( - ["imputer.default.ice", "prediction.classifier.logistic_regression"] + ["prediction.classifier.logistic_regression"] )(output="numpy"), clf: Union[None, Stacking] = None, explainer_plugins: list = [], From b6121a9f5be3ef542feb71afe01500d10a0aa3ec Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 19:39:16 +0000 Subject: [PATCH 23/26] Revert "debug" This reverts commit 483b86f0f7b461cf86ed6582f76d0b9bb5a7ecfe. --- .../plugins/ensemble/classifiers.py | 46 +++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/src/autoprognosis/plugins/ensemble/classifiers.py b/src/autoprognosis/plugins/ensemble/classifiers.py index dd7f4730..177b3e69 100644 --- a/src/autoprognosis/plugins/ensemble/classifiers.py +++ b/src/autoprognosis/plugins/ensemble/classifiers.py @@ -91,12 +91,22 @@ def __init__( self.explanations_nepoch = explanations_nepoch self.explainers = explainers + self._fitted = True + for model in models: + self._fitted = self._fitted or model.is_fitted() + for idx, weight in enumerate(weights): if weight == 0: continue self.models.append(models[idx]) self.weights.append(weights[idx]) + def is_fitted(self) -> bool: + try: + return self._fitted + except BaseException: + return True # backwards compatible + def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "WeightedEnsemble": def fit_model(k: int) -> Any: return self.models[k].fit(X, Y) @@ -121,9 +131,13 @@ def fit_model(k: int) -> Any: ) self.explainers[exp] = exp_model + self._fitted = True return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: + if not self.is_fitted(): + raise RuntimeError("Fit the model first") + preds_ = [] for k in range(len(self.models)): preds_.append(self.models[k].predict_proba(X, *args) * self.weights[k]) @@ -308,7 +322,7 @@ def __init__( self, models: List[PipelineMeta], meta_model: PipelineMeta = Pipeline( - ["prediction.classifier.logistic_regression"] + ["imputer.default.ice", "prediction.classifier.logistic_regression"] )(output="numpy"), clf: Union[None, Stacking] = None, explainer_plugins: list = [], @@ -323,6 +337,10 @@ def __init__( self.explainers: Optional[dict] self.explanations_nepoch = explanations_nepoch + self._fitted = True + for model in models: + self._fitted = self._fitted or model.is_fitted() + for model in self.models: model.change_output("numpy") @@ -335,6 +353,12 @@ def __init__( use_proba=True, ) + def is_fitted(self) -> bool: + try: + return self._fitted + except BaseException: + return True # backwards compatible + def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "StackingEnsemble": self.clf.fit(X, Y) @@ -349,10 +373,13 @@ def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "StackingEnsemble": n_epoch=self.explanations_nepoch, prefit=True, ) - + self._fitted = True return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: + if not self.is_fitted(): + raise RuntimeError("Fit the model first") + return pd.DataFrame(self.clf.predict_proba(X)) def explain(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: @@ -428,11 +455,21 @@ def __init__( self.explainers: Optional[dict] self.explanations_nepoch = explanations_nepoch + self._fitted = True + for model in models: + self._fitted = self._fitted or model.is_fitted() + if clf: self.clf = clf else: self.clf = SimpleClassifierAggregator(models, method=method) + def is_fitted(self) -> bool: + try: + return self._fitted + except BaseException: + return True # backwards compatible + def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "AggregatingEnsemble": Y = pd.DataFrame(Y).values.ravel() @@ -449,10 +486,13 @@ def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "AggregatingEnsemble": n_epoch=self.explanations_nepoch, prefit=True, ) - + self._fitted = True return self def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: + if not self.is_fitted(): + raise RuntimeError("Fit the model first") + return pd.DataFrame(self.clf.predict_proba(X)) def explain(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame: From fe8f5ffee76504ea8c4be9c9344db1d65327bfa9 Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 19:41:57 +0000 Subject: [PATCH 24/26] debug --- src/autoprognosis/plugins/ensemble/classifiers.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/autoprognosis/plugins/ensemble/classifiers.py b/src/autoprognosis/plugins/ensemble/classifiers.py index 177b3e69..4020c249 100644 --- a/src/autoprognosis/plugins/ensemble/classifiers.py +++ b/src/autoprognosis/plugins/ensemble/classifiers.py @@ -11,9 +11,12 @@ # autoprognosis absolute import autoprognosis.logger as log +from autoprognosis.plugins import group from autoprognosis.plugins.ensemble.combos import SimpleClassifierAggregator, Stacking from autoprognosis.plugins.explainers import Explainers +from autoprognosis.plugins.imputers import Imputers from autoprognosis.plugins.pipeline import Pipeline, PipelineMeta +from autoprognosis.plugins.prediction.classifiers import Classifiers from autoprognosis.utils.parallel import cpu_count import autoprognosis.utils.serialization as serialization from autoprognosis.utils.tester import classifier_evaluator @@ -322,7 +325,12 @@ def __init__( self, models: List[PipelineMeta], meta_model: PipelineMeta = Pipeline( - ["imputer.default.ice", "prediction.classifier.logistic_regression"] + group( + [ + Imputers().get_type("ice").fqdn(), + Classifiers().get_type("logistic_regression").fqdn(), + ] + ) )(output="numpy"), clf: Union[None, Stacking] = None, explainer_plugins: list = [], From 275252685a43f8b7d426ccc6899fcd501b80a948 Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 19:48:17 +0000 Subject: [PATCH 25/26] bugfixing --- src/autoprognosis/plugins/ensemble/classifiers.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/autoprognosis/plugins/ensemble/classifiers.py b/src/autoprognosis/plugins/ensemble/classifiers.py index 4020c249..66c6ad25 100644 --- a/src/autoprognosis/plugins/ensemble/classifiers.py +++ b/src/autoprognosis/plugins/ensemble/classifiers.py @@ -11,7 +11,6 @@ # autoprognosis absolute import autoprognosis.logger as log -from autoprognosis.plugins import group from autoprognosis.plugins.ensemble.combos import SimpleClassifierAggregator, Stacking from autoprognosis.plugins.explainers import Explainers from autoprognosis.plugins.imputers import Imputers @@ -325,12 +324,10 @@ def __init__( self, models: List[PipelineMeta], meta_model: PipelineMeta = Pipeline( - group( - [ - Imputers().get_type("ice").fqdn(), - Classifiers().get_type("logistic_regression").fqdn(), - ] - ) + [ + Imputers().get_type("ice").fqdn(), + Classifiers().get_type("logistic_regression").fqdn(), + ] )(output="numpy"), clf: Union[None, Stacking] = None, explainer_plugins: list = [], From 19c31fa7c6ec997053c47293e182ebb629e41c5a Mon Sep 17 00:00:00 2001 From: Bogdan Cebere Date: Mon, 28 Nov 2022 20:19:16 +0000 Subject: [PATCH 26/26] debug --- src/autoprognosis/plugins/ensemble/classifiers.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/autoprognosis/plugins/ensemble/classifiers.py b/src/autoprognosis/plugins/ensemble/classifiers.py index 66c6ad25..5c054610 100644 --- a/src/autoprognosis/plugins/ensemble/classifiers.py +++ b/src/autoprognosis/plugins/ensemble/classifiers.py @@ -323,12 +323,7 @@ class StackingEnsemble(BaseEnsemble): def __init__( self, models: List[PipelineMeta], - meta_model: PipelineMeta = Pipeline( - [ - Imputers().get_type("ice").fqdn(), - Classifiers().get_type("logistic_regression").fqdn(), - ] - )(output="numpy"), + meta_model: Optional[PipelineMeta] = None, clf: Union[None, Stacking] = None, explainer_plugins: list = [], explanations_nepoch: int = 10000, @@ -336,6 +331,13 @@ def __init__( super().__init__() self.models = models + if meta_model is None: + meta_model = Pipeline( + [ + Imputers().get_type("ice").fqdn(), + Classifiers().get_type("logistic_regression").fqdn(), + ] + )(output="numpy") self.meta_model = meta_model self.explainer_plugins = explainer_plugins