diff --git a/pdr_backend/aimodel/aimodel_factory.py b/pdr_backend/aimodel/aimodel_factory.py index aab4cfeef..2ee9f75be 100644 --- a/pdr_backend/aimodel/aimodel_factory.py +++ b/pdr_backend/aimodel/aimodel_factory.py @@ -51,7 +51,9 @@ def build( ytrue[0], ytrue[1] = True, False skm = DummyClassifier(strategy="most_frequent") elif ss.approach == "LinearLogistic": - skm = LogisticRegression() + skm = LogisticRegression(max_iter=1000) + elif ss.approach == "LinearLogistic_Balanced": + skm = LogisticRegression(max_iter=1000, class_weight="balanced") elif ss.approach == "LinearSVC": skm = SVC(kernel="linear", probability=True, C=0.025) else: @@ -92,10 +94,21 @@ def build( # calibrate output probabilities if do_constant or ss.calibrate_probs == "None": pass - elif ss.calibrate_probs == "CalibratedClassifierCV_5x": + elif ss.calibrate_probs in [ + "CalibratedClassifierCV_Sigmoid", + "CalibratedClassifierCV_Isotonic", + ]: + N = X.shape[0] + method = ss.calibrate_probs_skmethod(N) # 'sigmoid' or 'isotonic' cv = min(smallest_n, 5) if cv > 1: - skm = CalibratedClassifierCV(skm, cv=cv) + skm = CalibratedClassifierCV( + skm, + method=method, + cv=cv, + ensemble=True, + n_jobs=-1, + ) else: raise ValueError(ss.calibrate_probs) diff --git a/pdr_backend/aimodel/test/test_aimodel_factory.py b/pdr_backend/aimodel/test/test_aimodel_factory.py index 972695ed9..ae2f76e80 100644 --- a/pdr_backend/aimodel/test/test_aimodel_factory.py +++ b/pdr_backend/aimodel/test/test_aimodel_factory.py @@ -68,7 +68,7 @@ def _test_aimodel_factory_2vars_main(approach): if approach != "Constant": assert classif_acc(ytrue_hat, ytrue) > 0.8 - assert 0 < min(yptrue_hat) < max(yptrue_hat) < 1.0 + assert 0 <= min(yptrue_hat) <= max(yptrue_hat) <= 1.0 assert_array_equal(yptrue_hat > 0.5, ytrue_hat) # test variable importances @@ -84,6 +84,9 @@ def _test_aimodel_factory_2vars_main(approach): figure = plot_aimodel_response(d) assert isinstance(figure, Figure) + if SHOW_PLOT: + figure.show() + @enforce_types def test_aimodel_factory_constantdata(): diff --git a/pdr_backend/cli/cli_module.py b/pdr_backend/cli/cli_module.py index d85840bbb..0304521d5 100644 --- a/pdr_backend/cli/cli_module.py +++ b/pdr_backend/cli/cli_module.py @@ -54,7 +54,7 @@ def _do_main(): parser = get_arg_parser(func_name) args, nested_args = parser.parse_known_args() print_args(args) - logger.info(nested_args) + logger.info("Nested args: %s", nested_args) func(args, nested_args) diff --git a/pdr_backend/ppss/aimodel_ss.py b/pdr_backend/ppss/aimodel_ss.py index 694a6fbda..4b3f90874 100644 --- a/pdr_backend/ppss/aimodel_ss.py +++ b/pdr_backend/ppss/aimodel_ss.py @@ -5,10 +5,19 @@ from pdr_backend.util.strutil import StrMixin -APPROACH_OPTIONS = ["LinearLogistic", "LinearSVC", "Constant"] +APPROACH_OPTIONS = [ + "LinearLogistic", + "LinearLogistic_Balanced", + "LinearSVC", + "Constant", +] WEIGHT_RECENT_OPTIONS = ["10x_5x", "None"] BALANCE_CLASSES_OPTIONS = ["SMOTE", "RandomOverSampler", "None"] -CALIBRATE_PROBS_OPTIONS = ["CalibratedClassifierCV_5x", "None"] +CALIBRATE_PROBS_OPTIONS = [ + "CalibratedClassifierCV_Sigmoid", + "CalibratedClassifierCV_Isotonic", + "None", +] class AimodelSS(StrMixin): @@ -64,9 +73,28 @@ def balance_classes(self) -> str: @property def calibrate_probs(self) -> str: - """eg 'CalibratedClassifierCV_5x'""" + """eg 'CalibratedClassifierCV_Sigmoid'""" return self.d["calibrate_probs"] + def calibrate_probs_skmethod(self, N: int) -> str: + """ + @description + Return the value for 'method' argument in sklearn + CalibratedClassiferCV(). + + @arguments + N -- number of samples + """ + if N < 200: + return "sigmoid" + + c = self.calibrate_probs + if c == "CalibratedClassifierCV_Sigmoid": + return "sigmoid" + if c == "CalibratedClassifierCV_Isotonic": + return "isotonic" + raise ValueError(c) + # ========================================================================= # utilities for testing @@ -88,6 +116,6 @@ def aimodel_ss_test_dict( "approach": approach or "LinearLogistic", "weight_recent": weight_recent or "10x_5x", "balance_classes": balance_classes or "SMOTE", - "calibrate_probs": calibrate_probs or "CalibratedClassifierCV_5x", + "calibrate_probs": calibrate_probs or "CalibratedClassifierCV_Sigmoid", } return d diff --git a/pdr_backend/ppss/test/test_aimodel_ss.py b/pdr_backend/ppss/test/test_aimodel_ss.py index d73bc68cb..f69536858 100644 --- a/pdr_backend/ppss/test/test_aimodel_ss.py +++ b/pdr_backend/ppss/test/test_aimodel_ss.py @@ -23,7 +23,9 @@ def test_aimodel_ss__default_values(): assert ss.approach == d["approach"] == "LinearLogistic" assert ss.weight_recent == d["weight_recent"] == "10x_5x" assert ss.balance_classes == d["balance_classes"] == "SMOTE" - assert ss.calibrate_probs == d["calibrate_probs"] == "CalibratedClassifierCV_5x" + assert ( + ss.calibrate_probs == d["calibrate_probs"] == "CalibratedClassifierCV_Sigmoid" + ) # str assert "AimodelSS" in str(ss) @@ -86,3 +88,16 @@ def test_aimodel_ss__bad_inputs(): with pytest.raises(ValueError): AimodelSS(aimodel_ss_test_dict(calibrate_probs="foo")) + + +@enforce_types +def test_aimodel_ss__calibrate_probs_skmethod(): + d = aimodel_ss_test_dict(calibrate_probs="CalibratedClassifierCV_Sigmoid") + ss = AimodelSS(d) + assert ss.calibrate_probs_skmethod(100) == "sigmoid" + assert ss.calibrate_probs_skmethod(1000) == "sigmoid" + + d = aimodel_ss_test_dict(calibrate_probs="CalibratedClassifierCV_Isotonic") + ss = AimodelSS(d) + assert ss.calibrate_probs_skmethod(100) == "sigmoid" # because N is small + assert ss.calibrate_probs_skmethod(1000) == "isotonic" diff --git a/ppss.yaml b/ppss.yaml index f4e724dd8..c7ab6c651 100644 --- a/ppss.yaml +++ b/ppss.yaml @@ -28,10 +28,10 @@ predictoor_ss: aimodel_ss: max_n_train: 5000 # no. epochs to train model on autoregressive_n: 1 # no. epochs that model looks back, to predict next - approach: LinearLogistic # LinearLogistic | LinearSVC | Constant + approach: LinearLogistic # LinearLogistic | LinearLogistic_Balanced | LinearSVC | Constant weight_recent: 10x_5x # 10x_5x | None balance_classes: None # SMOTE | RandomOverSampler | None - calibrate_probs: CalibratedClassifierCV_5x # CalibratedClassifierCV_5x | None + calibrate_probs: CalibratedClassifierCV_Sigmoid # CalibratedClassifierCV_Sigmoid | CalibratedClassifierCV_Isotonic | None exchange_mgr_ss: # used by trader and sim timeout: 30000