Skip to content

Commit

Permalink
Fix #890: [Aimodel] Add Jaime's techniques: balance class weights in …
Browse files Browse the repository at this point in the history
…logistic regression, calibrate with isotonic (#959)

Fix #890
  • Loading branch information
trentmc authored Apr 30, 2024
1 parent 89b69ce commit 0abf1ec
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 12 deletions.
19 changes: 16 additions & 3 deletions pdr_backend/aimodel/aimodel_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ def build(
ytrue[0], ytrue[1] = True, False
skm = DummyClassifier(strategy="most_frequent")
elif ss.approach == "LinearLogistic":
skm = LogisticRegression()
skm = LogisticRegression(max_iter=1000)
elif ss.approach == "LinearLogistic_Balanced":
skm = LogisticRegression(max_iter=1000, class_weight="balanced")
elif ss.approach == "LinearSVC":
skm = SVC(kernel="linear", probability=True, C=0.025)
else:
Expand Down Expand Up @@ -92,10 +94,21 @@ def build(
# calibrate output probabilities
if do_constant or ss.calibrate_probs == "None":
pass
elif ss.calibrate_probs == "CalibratedClassifierCV_5x":
elif ss.calibrate_probs in [
"CalibratedClassifierCV_Sigmoid",
"CalibratedClassifierCV_Isotonic",
]:
N = X.shape[0]
method = ss.calibrate_probs_skmethod(N) # 'sigmoid' or 'isotonic'
cv = min(smallest_n, 5)
if cv > 1:
skm = CalibratedClassifierCV(skm, cv=cv)
skm = CalibratedClassifierCV(
skm,
method=method,
cv=cv,
ensemble=True,
n_jobs=-1,
)
else:
raise ValueError(ss.calibrate_probs)

Expand Down
5 changes: 4 additions & 1 deletion pdr_backend/aimodel/test/test_aimodel_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def _test_aimodel_factory_2vars_main(approach):

if approach != "Constant":
assert classif_acc(ytrue_hat, ytrue) > 0.8
assert 0 < min(yptrue_hat) < max(yptrue_hat) < 1.0
assert 0 <= min(yptrue_hat) <= max(yptrue_hat) <= 1.0
assert_array_equal(yptrue_hat > 0.5, ytrue_hat)

# test variable importances
Expand All @@ -84,6 +84,9 @@ def _test_aimodel_factory_2vars_main(approach):
figure = plot_aimodel_response(d)
assert isinstance(figure, Figure)

if SHOW_PLOT:
figure.show()


@enforce_types
def test_aimodel_factory_constantdata():
Expand Down
2 changes: 1 addition & 1 deletion pdr_backend/cli/cli_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def _do_main():
parser = get_arg_parser(func_name)
args, nested_args = parser.parse_known_args()
print_args(args)
logger.info(nested_args)
logger.info("Nested args: %s", nested_args)

func(args, nested_args)

Expand Down
36 changes: 32 additions & 4 deletions pdr_backend/ppss/aimodel_ss.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,19 @@

from pdr_backend.util.strutil import StrMixin

APPROACH_OPTIONS = ["LinearLogistic", "LinearSVC", "Constant"]
APPROACH_OPTIONS = [
"LinearLogistic",
"LinearLogistic_Balanced",
"LinearSVC",
"Constant",
]
WEIGHT_RECENT_OPTIONS = ["10x_5x", "None"]
BALANCE_CLASSES_OPTIONS = ["SMOTE", "RandomOverSampler", "None"]
CALIBRATE_PROBS_OPTIONS = ["CalibratedClassifierCV_5x", "None"]
CALIBRATE_PROBS_OPTIONS = [
"CalibratedClassifierCV_Sigmoid",
"CalibratedClassifierCV_Isotonic",
"None",
]


class AimodelSS(StrMixin):
Expand Down Expand Up @@ -64,9 +73,28 @@ def balance_classes(self) -> str:

@property
def calibrate_probs(self) -> str:
"""eg 'CalibratedClassifierCV_5x'"""
"""eg 'CalibratedClassifierCV_Sigmoid'"""
return self.d["calibrate_probs"]

def calibrate_probs_skmethod(self, N: int) -> str:
"""
@description
Return the value for 'method' argument in sklearn
CalibratedClassiferCV().
@arguments
N -- number of samples
"""
if N < 200:
return "sigmoid"

c = self.calibrate_probs
if c == "CalibratedClassifierCV_Sigmoid":
return "sigmoid"
if c == "CalibratedClassifierCV_Isotonic":
return "isotonic"
raise ValueError(c)


# =========================================================================
# utilities for testing
Expand All @@ -88,6 +116,6 @@ def aimodel_ss_test_dict(
"approach": approach or "LinearLogistic",
"weight_recent": weight_recent or "10x_5x",
"balance_classes": balance_classes or "SMOTE",
"calibrate_probs": calibrate_probs or "CalibratedClassifierCV_5x",
"calibrate_probs": calibrate_probs or "CalibratedClassifierCV_Sigmoid",
}
return d
17 changes: 16 additions & 1 deletion pdr_backend/ppss/test/test_aimodel_ss.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ def test_aimodel_ss__default_values():
assert ss.approach == d["approach"] == "LinearLogistic"
assert ss.weight_recent == d["weight_recent"] == "10x_5x"
assert ss.balance_classes == d["balance_classes"] == "SMOTE"
assert ss.calibrate_probs == d["calibrate_probs"] == "CalibratedClassifierCV_5x"
assert (
ss.calibrate_probs == d["calibrate_probs"] == "CalibratedClassifierCV_Sigmoid"
)

# str
assert "AimodelSS" in str(ss)
Expand Down Expand Up @@ -86,3 +88,16 @@ def test_aimodel_ss__bad_inputs():

with pytest.raises(ValueError):
AimodelSS(aimodel_ss_test_dict(calibrate_probs="foo"))


@enforce_types
def test_aimodel_ss__calibrate_probs_skmethod():
d = aimodel_ss_test_dict(calibrate_probs="CalibratedClassifierCV_Sigmoid")
ss = AimodelSS(d)
assert ss.calibrate_probs_skmethod(100) == "sigmoid"
assert ss.calibrate_probs_skmethod(1000) == "sigmoid"

d = aimodel_ss_test_dict(calibrate_probs="CalibratedClassifierCV_Isotonic")
ss = AimodelSS(d)
assert ss.calibrate_probs_skmethod(100) == "sigmoid" # because N is small
assert ss.calibrate_probs_skmethod(1000) == "isotonic"
4 changes: 2 additions & 2 deletions ppss.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ predictoor_ss:
aimodel_ss:
max_n_train: 5000 # no. epochs to train model on
autoregressive_n: 1 # no. epochs that model looks back, to predict next
approach: LinearLogistic # LinearLogistic | LinearSVC | Constant
approach: LinearLogistic # LinearLogistic | LinearLogistic_Balanced | LinearSVC | Constant
weight_recent: 10x_5x # 10x_5x | None
balance_classes: None # SMOTE | RandomOverSampler | None
calibrate_probs: CalibratedClassifierCV_5x # CalibratedClassifierCV_5x | None
calibrate_probs: CalibratedClassifierCV_Sigmoid # CalibratedClassifierCV_Sigmoid | CalibratedClassifierCV_Isotonic | None

exchange_mgr_ss: # used by trader and sim
timeout: 30000
Expand Down

0 comments on commit 0abf1ec

Please sign in to comment.