Skip to content

Commit

Permalink
Second submission.
Browse files Browse the repository at this point in the history
  • Loading branch information
steenrotsman committed May 13, 2024
1 parent 8a3ffca commit a5ec569
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 15 deletions.
10 changes: 3 additions & 7 deletions submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import joblib
import pandas as pd

THRESHOLD = 0.1


def clean_df(df, background_df=None):
"""
Expand Down Expand Up @@ -83,13 +85,7 @@ def predict_outcomes(df, background_df=None, model_path="model.joblib"):
)

# Combine predictions for individual
df_predict = (
df_predict.groupby("nomem_encr")["prediction"]
.prod()
.round()
.astype(int)
.reset_index()
)
df_predict = (df_predict["prediction"] > THRESHOLD).astype(int)

# Return only dataset with predictions and identifier
return df_predict
16 changes: 8 additions & 8 deletions training.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import joblib
import pandas as pd
from lightgbm import LGBMClassifier
from sklearn.metrics import confusion_matrix, f1_score, roc_auc_score
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

from submission import clean_df
Expand Down Expand Up @@ -61,17 +61,17 @@ def train_save_model(cleaned_df, outcome_df):

# Get estimate of score
X1, X2, y1, y2 = train_test_split(X, y, test_size=0.5, stratify=y, random_state=123)
estimate(X1, X2, y1, y2)
estimate(X2, X1, y2, y1)
for thresh in range(0, 100, 10):
f11 = estimate(X1, X2, y1, y2, thresh / 100)
f12 = estimate(X2, X1, y2, y1, thresh / 100)
print(f"{thresh / 100}: {(f11 + f12) / 2:.3f}")


def estimate(X1, X2, y1, y2):
def estimate(X1, X2, y1, y2, thresh=0.5):
model = LGBMClassifier(verbose=-1, random_seed=123)
model.fit(X1, y1)
y_pred = model.predict(X2)
print(f1_score(y2, y_pred))
print(roc_auc_score(y2, model.predict_proba(X2)[:, 1]))
print(confusion_matrix(y2, y_pred))
y_pred = (model.predict_proba(X2)[:, 1] > thresh).astype(int)
return f1_score(y2, y_pred)


if __name__ == "__main__":
Expand Down

0 comments on commit a5ec569

Please sign in to comment.