Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfixing & improvements #40

Merged
merged 26 commits into from
Nov 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test_R.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
Rscript -e "install.packages(c('remotes','reticulate'))"
- name: Test R
run: |
Rscript tests/bindings/R/test_survival_analysis.R
Rscript tests/bindings/R/test_classification.R
Rscript tests/bindings/R/test_classification_with_missing_data.R
Rscript tests/bindings/R/test_regression.R
Rscript tests/bindings/R/test_survival_analysis.R
17 changes: 16 additions & 1 deletion src/autoprognosis/plugins/core/base_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def __init__(self) -> None:
self.output = pd.DataFrame
self._backup_encoders: Optional[Dict[str, LabelEncoder]] = {}
self._drop_features: Optional[List[str]] = []
self._fitted = False

def change_output(self, output: str) -> None:
if output not in ["pandas", "numpy"]:
Expand Down Expand Up @@ -124,6 +125,12 @@ def subtype() -> str:
def fqdn(cls) -> str:
return cls.type() + "." + cls.subtype() + "." + cls.name()

def is_fitted(self) -> bool:
try:
return self._fitted
except BaseException:
return True

def fit_transform(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame:
return pd.DataFrame(self.fit(X, *args, *kwargs).transform(X))

Expand Down Expand Up @@ -165,13 +172,19 @@ def _transform_input(self, X: pd.DataFrame) -> pd.DataFrame:
def fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "Plugin":
X = self._fit_input(X)

return self._fit(X, *args, **kwargs)
self._fit(X, *args, **kwargs)

self._fitted = True

return self

@abstractmethod
def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "Plugin":
...

def transform(self, X: pd.DataFrame) -> pd.DataFrame:
if not self.is_fitted():
raise RuntimeError("Fit the model first")
X = self._transform_input(X)
return self.output(self._transform(X))

Expand All @@ -180,6 +193,8 @@ def _transform(self, X: pd.DataFrame) -> pd.DataFrame:
...

def predict(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame:
if not self.is_fitted():
raise RuntimeError("Fit the model first")
X = self._transform_input(X)
return self.output(self._predict(X, *args, *kwargs))

Expand Down
57 changes: 52 additions & 5 deletions src/autoprognosis/plugins/ensemble/classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
import autoprognosis.logger as log
from autoprognosis.plugins.ensemble.combos import SimpleClassifierAggregator, Stacking
from autoprognosis.plugins.explainers import Explainers
from autoprognosis.plugins.imputers import Imputers
from autoprognosis.plugins.pipeline import Pipeline, PipelineMeta
from autoprognosis.plugins.prediction.classifiers import Classifiers
from autoprognosis.utils.parallel import cpu_count
import autoprognosis.utils.serialization as serialization
from autoprognosis.utils.tester import classifier_evaluator
Expand Down Expand Up @@ -91,12 +93,22 @@ def __init__(
self.explanations_nepoch = explanations_nepoch
self.explainers = explainers

self._fitted = True
for model in models:
self._fitted = self._fitted or model.is_fitted()

for idx, weight in enumerate(weights):
if weight == 0:
continue
self.models.append(models[idx])
self.weights.append(weights[idx])

def is_fitted(self) -> bool:
try:
return self._fitted
except BaseException:
return True # backwards compatible

def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "WeightedEnsemble":
def fit_model(k: int) -> Any:
return self.models[k].fit(X, Y)
Expand All @@ -121,9 +133,13 @@ def fit_model(k: int) -> Any:
)
self.explainers[exp] = exp_model

self._fitted = True
return self

def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame:
if not self.is_fitted():
raise RuntimeError("Fit the model first")

preds_ = []
for k in range(len(self.models)):
preds_.append(self.models[k].predict_proba(X, *args) * self.weights[k])
Expand Down Expand Up @@ -307,22 +323,31 @@ class StackingEnsemble(BaseEnsemble):
def __init__(
self,
models: List[PipelineMeta],
meta_model: PipelineMeta = Pipeline(
["prediction.classifier.logistic_regression"]
)(output="numpy"),
meta_model: Optional[PipelineMeta] = None,
clf: Union[None, Stacking] = None,
explainer_plugins: list = [],
explanations_nepoch: int = 10000,
) -> None:
super().__init__()

self.models = models
if meta_model is None:
meta_model = Pipeline(
[
Imputers().get_type("ice").fqdn(),
Classifiers().get_type("logistic_regression").fqdn(),
]
)(output="numpy")
self.meta_model = meta_model

self.explainer_plugins = explainer_plugins
self.explainers: Optional[dict]
self.explanations_nepoch = explanations_nepoch

self._fitted = True
for model in models:
self._fitted = self._fitted or model.is_fitted()

for model in self.models:
model.change_output("numpy")

Expand All @@ -335,6 +360,12 @@ def __init__(
use_proba=True,
)

def is_fitted(self) -> bool:
try:
return self._fitted
except BaseException:
return True # backwards compatible

def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "StackingEnsemble":
self.clf.fit(X, Y)

Expand All @@ -349,10 +380,13 @@ def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "StackingEnsemble":
n_epoch=self.explanations_nepoch,
prefit=True,
)

self._fitted = True
return self

def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame:
if not self.is_fitted():
raise RuntimeError("Fit the model first")

return pd.DataFrame(self.clf.predict_proba(X))

def explain(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame:
Expand Down Expand Up @@ -428,11 +462,21 @@ def __init__(
self.explainers: Optional[dict]
self.explanations_nepoch = explanations_nepoch

self._fitted = True
for model in models:
self._fitted = self._fitted or model.is_fitted()

if clf:
self.clf = clf
else:
self.clf = SimpleClassifierAggregator(models, method=method)

def is_fitted(self) -> bool:
try:
return self._fitted
except BaseException:
return True # backwards compatible

def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "AggregatingEnsemble":
Y = pd.DataFrame(Y).values.ravel()

Expand All @@ -449,10 +493,13 @@ def fit(self, X: pd.DataFrame, Y: pd.DataFrame) -> "AggregatingEnsemble":
n_epoch=self.explanations_nepoch,
prefit=True,
)

self._fitted = True
return self

def predict_proba(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame:
if not self.is_fitted():
raise RuntimeError("Fit the model first")

return pd.DataFrame(self.clf.predict_proba(X))

def explain(self, X: pd.DataFrame, *args: Any) -> pd.DataFrame:
Expand Down
26 changes: 14 additions & 12 deletions src/autoprognosis/plugins/ensemble/combos.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@
import numpy as np
from pyod.utils.utility import check_parameter
from scipy.special import erf
from sklearn.experimental import enable_iterative_imputer # noqa: F401,E402
from sklearn.impute import IterativeImputer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import (
check_array,
Expand Down Expand Up @@ -417,9 +420,6 @@ class Stacking(BaseAggregator):
base_estimators: list or numpy array (n_estimators,)
A list of base classifiers.

meta_clf : object, optional (default=LogisticRegression)
The meta classifier to make the final prediction.

n_folds : int, optional (default=2)
The number of splits of the training sample.

Expand Down Expand Up @@ -451,7 +451,7 @@ def __init__(
self,
base_estimators,
meta_clf=None,
n_folds=2,
n_folds=3,
keep_original=True,
use_proba=False,
shuffle_data=False,
Expand All @@ -473,7 +473,9 @@ def __init__(
if meta_clf is not None:
self.meta_clf = meta_clf
else:
self.meta_clf = LogisticRegression()
self.meta_clf = Pipeline(
("imputer", IterativeImputer()), ("output", LogisticRegression())
)

# set flags
self.keep_original = keep_original
Expand Down Expand Up @@ -504,8 +506,8 @@ def fit(self, X, y):
"""

# Validate inputs X and y
X, y = check_X_y(X, y)
X = check_array(X)
X, y = check_X_y(X, y, force_all_finite=False)
X = check_array(X, force_all_finite=False)
self._set_n_classes(y)

n_samples = X.shape[0]
Expand Down Expand Up @@ -574,7 +576,7 @@ def _process_data(self, X):
The processed dataset of X.
"""
check_is_fitted(self, ["fitted_"])
X = check_array(X)
X = check_array(X, force_all_finite=False)
n_samples = X.shape[0]

# initialize matrix for storing newly generated features
Expand Down Expand Up @@ -718,8 +720,8 @@ def fit(self, X, y):
"""

# Validate inputs X and y
X, y = check_X_y(X, y)
X = check_array(X)
X, y = check_X_y(X, y, force_all_finite=False)
X = check_array(X, force_all_finite=False)
self._set_n_classes(y)

if self.pre_fitted:
Expand All @@ -744,7 +746,7 @@ def predict(self, X):
labels : numpy array of shape (n_samples,)
Class labels for each data sample.
"""
X = check_array(X)
X = check_array(X, force_all_finite=False)

all_scores = np.zeros([X.shape[0], self.n_base_estimators_])

Expand Down Expand Up @@ -782,7 +784,7 @@ def predict_proba(self, X):
The class probabilities of the input samples.
Classes are ordered by lexicographic order.
"""
X = check_array(X)
X = check_array(X, force_all_finite=False)
all_scores = np.zeros([X.shape[0], self._classes, self.n_base_estimators_])

for i in range(self.n_base_estimators_):
Expand Down
5 changes: 5 additions & 0 deletions src/autoprognosis/plugins/pipeline/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
_generate_getstate,
_generate_hyperparameter_space_for_layer_impl,
_generate_hyperparameter_space_impl,
_generate_is_fitted,
_generate_load,
_generate_load_template,
_generate_name_impl,
Expand All @@ -37,6 +38,7 @@ def __new__(cls: Type, name: str, plugins: Tuple[Type, ...], dct: dict) -> Any:
dct["__setstate__"] = _generate_setstate()
dct["__getstate__"] = _generate_getstate()
dct["fit"] = _generate_fit()
dct["is_fitted"] = _generate_is_fitted()
dct["predict"] = _generate_predict()
dct["predict_proba"] = _generate_predict_proba()
dct["score"] = _generate_score()
Expand Down Expand Up @@ -84,6 +86,9 @@ def get_args(*args: Any, **kwargs: Any) -> Dict:
def fit(self: Any, X: pd.DataFrame, *args: Any, **kwargs: Any) -> Any:
raise NotImplementedError("not implemented")

def is_fitted(self: Any) -> Any:
raise NotImplementedError("not implemented")

def predict(*args: Any, **kwargs: Any) -> pd.DataFrame:
raise NotImplementedError("not implemented")

Expand Down
8 changes: 8 additions & 0 deletions src/autoprognosis/plugins/pipeline/generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,13 @@ def fit_impl(self: Any, X: pd.DataFrame, *args: Any, **kwargs: Any) -> Any:
return fit_impl


def _generate_is_fitted() -> Callable:
def fit_impl(self: Any) -> Any:
return self.stages[-1].is_fitted()

return fit_impl


def _generate_predict() -> Callable:
@decorators.benchmark
def predict_impl(
Expand Down Expand Up @@ -249,6 +256,7 @@ def getstate_impl(self: Any) -> dict:
"_generate_sample_param_impl",
"_generate_constructor",
"_generate_fit",
"_generate_is_fitted",
"_generate_predict",
"_generate_predict_proba",
"_generate_score",
Expand Down
3 changes: 3 additions & 0 deletions src/autoprognosis/plugins/prediction/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ def explain(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame:
raise NotImplementedError(f"Explainer not implemented for {self.name()}")

def predict_proba(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame:
if not self.is_fitted():
raise RuntimeError("Fit the model first")

X = self._transform_input(X)
return pd.DataFrame(self._predict_proba(X, *args, **kwargs))

Expand Down
6 changes: 5 additions & 1 deletion src/autoprognosis/plugins/prediction/classifiers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@ def fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> plugin.Plugin:
raise RuntimeError("Training requires X, y")
Y = cast.to_dataframe(args[0]).values.ravel()

return self._fit(X, Y, **kwargs)
self._fit(X, Y, **kwargs)

self._fitted = True

return self

def score(self, X: pd.DataFrame, y: pd.DataFrame, metric: str = "aucroc") -> float:
ev = classifier_evaluator(metric)
Expand Down
1 change: 1 addition & 0 deletions src/autoprognosis/plugins/prediction/regression/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "RegressionPlugin":

X = self._fit_input(X)
self._fit(X, *args, **kwargs)
self._fitted = True

return self

Expand Down
4 changes: 4 additions & 0 deletions src/autoprognosis/plugins/prediction/risk_estimation/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "RiskEstimationPlug

X = self._fit_input(X)
self._fit(X, *args, **kwargs)
self._fitted = True

if self.with_explanations and self.explainer is None:
if "eval_times" not in kwargs:
Expand All @@ -81,6 +82,9 @@ def fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "RiskEstimationPlug
return self

def explain(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame:
if not self.is_fitted():
raise RuntimeError("Fit the model first")

X = self._transform_input(X)
if self.explainer is None:
raise ValueError("Interpretability is not enabled for this model")
Expand Down
2 changes: 1 addition & 1 deletion src/autoprognosis/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.8"
__version__ = "0.1.9"
Loading