Skip to content

Commit

Permalink
Merge branch 'churn_new' of github.com:Neo9061/amazon-sagemaker-examp…
Browse files Browse the repository at this point in the history
…les into churn_new
  • Loading branch information
Neo9061 committed Aug 18, 2022
2 parents 7c8a215 + b3c5513 commit d195021
Showing 1 changed file with 36 additions and 55 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -313,20 +313,7 @@
"metadata": {},
"outputs": [],
"source": [
"cat_columns = [\n",
" \"State\",\n",
" \"Account Length\",\n",
" \"Area Code\",\n",
" \"Phone\",\n",
" \"Int'l Plan\",\n",
" \"VMail Plan\",\n",
" \"VMail Message\",\n",
" \"Day Calls\",\n",
" \"Eve Calls\",\n",
" \"Night Calls\",\n",
" \"Intl Calls\",\n",
" \"CustServ Calls\",\n",
"]\n",
"cat_columns = [\"State\", \"Account Length\", \"Area Code\", \"Phone\", \"Int'l Plan\", \"VMail Plan\", \"VMail Message\", \"Day Calls\", \"Eve Calls\", \"Night Calls\", \"Intl Calls\", \"CustServ Calls\"]\n",
"\n",
"cat_idx = []\n",
"for idx, col_name in enumerate(churn.columns.tolist()):\n",
Expand Down Expand Up @@ -391,9 +378,7 @@
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"train, val_n_test = train_test_split(\n",
" churn, test_size=0.3, random_state=42, stratify=churn[\"target\"]\n",
")"
"train, val_n_test = train_test_split(churn, test_size=0.3, random_state=42, stratify=churn[\"target\"])"
]
},
{
Expand All @@ -403,9 +388,7 @@
"metadata": {},
"outputs": [],
"source": [
"val, test = train_test_split(\n",
" val_n_test, test_size=0.3, random_state=42, stratify=val_n_test[\"target\"]\n",
")"
"val, test = train_test_split(val_n_test, test_size=0.3, random_state=42, stratify=val_n_test[\"target\"])"
]
},
{
Expand Down Expand Up @@ -824,9 +807,7 @@
"def query_endpoint(encoded_tabular_data, endpoint_name):\n",
" client = boto3.client(\"runtime.sagemaker\")\n",
" response = client.invoke_endpoint(\n",
" EndpointName=endpoint_name,\n",
" ContentType=content_type,\n",
" Body=encoded_tabular_data,\n",
" EndpointName=endpoint_name, ContentType=content_type, Body=encoded_tabular_data,\n",
" )\n",
" return response\n",
"\n",
Expand All @@ -843,14 +824,16 @@
"for i in np.arange(0, num_examples, step=batch_size):\n",
" query_response_batch = query_endpoint(\n",
" features.iloc[i : (i + batch_size), :].to_csv(header=False, index=False).encode(\"utf-8\"),\n",
" endpoint_name,\n",
" endpoint_name\n",
" )\n",
" predict_prob_batch = parse_response(query_response_batch) # prediction probability per batch\n",
" predict_prob.append(predict_prob_batch)\n",
"\n",
"\n",
"predict_prob = np.concatenate(predict_prob, axis=0)\n",
"predict_label = np.argmax(predict_prob, axis=1)"
"predict_label = np.argmax(\n",
" predict_prob, axis=1\n",
") "
]
},
{
Expand Down Expand Up @@ -910,7 +893,7 @@
" \"AUC\": eval_auc,\n",
" },\n",
" orient=\"index\",\n",
" columns=[\"LightGBM with AMT\"],\n",
" columns= [\"LightGBM with AMT\"]\n",
")\n",
"\n",
"lgb_results"
Expand Down Expand Up @@ -994,7 +977,9 @@
")\n",
"\n",
"# [Optional] Override default hyperparameters with custom values\n",
"hyperparameters[\"iterations\"] = \"500\"\n",
"hyperparameters[\n",
" \"iterations\"\n",
"] = \"500\"\n",
"\n",
"\n",
"hyperparameters[\"eval_metric\"] = \"AUC\"\n",
Expand Down Expand Up @@ -1162,7 +1147,9 @@
"\n",
"\n",
"predict_prob_cat = np.concatenate(predict_prob_cat, axis=0)\n",
"predict_label_cat = np.argmax(predict_prob_cat, axis=1)"
"predict_label_cat = np.argmax(\n",
" predict_prob_cat, axis=1\n",
") "
]
},
{
Expand Down Expand Up @@ -1213,7 +1200,7 @@
" \"AUC\": eval_auc_cat,\n",
" },\n",
" orient=\"index\",\n",
" columns=[\"CatBoost with AMT\"],\n",
" columns= [\"CatBoost with AMT\"]\n",
")\n",
"\n",
"results_lab_cat = pd.concat([lgb_results, cat_results], axis=1)\n",
Expand Down Expand Up @@ -1251,11 +1238,7 @@
"metadata": {},
"outputs": [],
"source": [
"train_model_id, train_model_version, train_scope = (\n",
" \"pytorch-tabtransformerclassification-model\",\n",
" \"*\",\n",
" \"training\",\n",
")\n",
"train_model_id, train_model_version, train_scope = \"pytorch-tabtransformerclassification-model\", \"*\", \"training\"\n",
"training_instance_type = \"ml.p3.2xlarge\"\n",
"\n",
"# Retrieve the docker image\n",
Expand Down Expand Up @@ -1300,8 +1283,12 @@
")\n",
"\n",
"# [Optional] Override default hyperparameters with custom values\n",
"hyperparameters[\"n_epochs\"] = 40 # The same hyperparameter is named as \"iterations\" for CatBoost\n",
"hyperparameters[\"patience\"] = 10\n",
"hyperparameters[\n",
" \"n_epochs\"\n",
"] = 40 # The same hyperparameter is named as \"iterations\" for CatBoost\n",
"hyperparameters[\n",
" \"patience\"\n",
"] = 10\n",
"\n",
"print(hyperparameters)"
]
Expand Down Expand Up @@ -1331,13 +1318,7 @@
"metadata": {},
"outputs": [],
"source": [
"from sagemaker.tuner import (\n",
" ContinuousParameter,\n",
" IntegerParameter,\n",
" HyperparameterTuner,\n",
" CategoricalParameter,\n",
")\n",
"\n",
"from sagemaker.tuner import ContinuousParameter, IntegerParameter, HyperparameterTuner, CategoricalParameter\n",
"hyperparameter_ranges_tab = {\n",
" \"learning_rate\": ContinuousParameter(0.001, 0.01, scaling_type=\"Auto\"),\n",
" \"batch_size\": CategoricalParameter([64, 128, 256, 512]),\n",
Expand Down Expand Up @@ -1383,11 +1364,11 @@
"\n",
" tuner_tab = HyperparameterTuner(\n",
" tabular_estimator_tab,\n",
" \"f1_score\", # Note, TabTransformer currently does not support AUC score, thus we use its default setting F1 score as an alternative evaluation metric.\n",
" \"f1_score\", # Note, TabTransformer currently does not support AUC score, thus we use its default setting F1 score as an alternative evaluation metric.\n",
" hyperparameter_ranges_tab,\n",
" [{\"Name\": \"f1_score\", \"Regex\": \"metrics={'f1': (\\\\S+)}\"}],\n",
" max_jobs=10,\n",
" max_parallel_jobs=5, # reduce max_parallel_jobs number if the instance type is limited in your account\n",
" max_parallel_jobs=5, # reduce max_parallel_jobs number if the instance type is limited in your account\n",
" objective_type=\"Maximize\",\n",
" base_tuning_job_name=training_job_name,\n",
" )\n",
Expand Down Expand Up @@ -1465,7 +1446,9 @@
"\n",
"\n",
"predict_prob_tab = np.concatenate(predict_prob_tab, axis=0)\n",
"predict_label_tab = np.argmax(predict_prob_tab, axis=1)"
"predict_label_tab = np.argmax(\n",
" predict_prob_tab, axis=1\n",
") "
]
},
{
Expand Down Expand Up @@ -1516,7 +1499,7 @@
" \"AUC\": eval_auc_tab,\n",
" },\n",
" orient=\"index\",\n",
" columns=[\"TabTransformer with AMT\"],\n",
" columns= [\"TabTransformer with AMT\"]\n",
")\n",
"\n",
"results_lab_cat_tab = pd.concat([results_lab_cat, tab_results], axis=1)\n",
Expand Down Expand Up @@ -1558,12 +1541,8 @@
"\n",
"# Currently, not all the object detection models in jumpstart support finetuning. Thus, we manually select a model\n",
"# which supports finetuning.\n",
"train_model_id, train_model_version, train_scope = (\n",
" \"autogluon-classification-ensemble\",\n",
" \"*\",\n",
" \"training\",\n",
")\n",
"training_instance_type = \"ml.g4dn.2xlarge\" # set a different GPU type to avoid instance insufficiency for p3 instance that is used by TabTransformer\n",
"train_model_id, train_model_version, train_scope = \"autogluon-classification-ensemble\", \"*\", \"training\"\n",
"training_instance_type = \"ml.g4dn.2xlarge\" # set a different GPU type to avoid instance insufficiency for p3 instance that is used by TabTransformer\n",
"\n",
"# Retrieve the docker image\n",
"train_image_uri = image_uris.retrieve(\n",
Expand Down Expand Up @@ -1727,7 +1706,9 @@
"\n",
"\n",
"predict_prob_ag = np.concatenate(predict_prob_ag, axis=0)\n",
"predict_label_ag = np.argmax(predict_prob_ag, axis=1)"
"predict_label_ag = np.argmax(\n",
" predict_prob_ag, axis=1\n",
") "
]
},
{
Expand Down Expand Up @@ -1770,7 +1751,7 @@
" \"AUC\": eval_auc_ag,\n",
" },\n",
" orient=\"index\",\n",
" columns=[\"AutoGluon-Tabular\"],\n",
" columns= [\"AutoGluon-Tabular\"]\n",
")\n",
"\n",
"results_lab_cat_tab_ag = pd.concat([results_lab_cat_tab, ag_results], axis=1)\n",
Expand Down

0 comments on commit d195021

Please sign in to comment.