Merge branch 'churn_new' of github.com:Neo9061/amazon-sagemaker-examp…

…les into churn_new
aws · Aug 18, 2022 · d195021 · d195021
2 parents 7c8a215 + b3c5513
commit d195021
Showing 1 changed file with 36 additions and 55 deletions.
diff --git a/...sformer_autogluon_churn/churn-prediction-lightgbm-catboost-tabtransformer-autogluon.ipynb b/...sformer_autogluon_churn/churn-prediction-lightgbm-catboost-tabtransformer-autogluon.ipynb
@@ -313,20 +313,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cat_columns = [\n",
-    "    \"State\",\n",
-    "    \"Account Length\",\n",
-    "    \"Area Code\",\n",
-    "    \"Phone\",\n",
-    "    \"Int'l Plan\",\n",
-    "    \"VMail Plan\",\n",
-    "    \"VMail Message\",\n",
-    "    \"Day Calls\",\n",
-    "    \"Eve Calls\",\n",
-    "    \"Night Calls\",\n",
-    "    \"Intl Calls\",\n",
-    "    \"CustServ Calls\",\n",
-    "]\n",
+    "cat_columns = [\"State\", \"Account Length\", \"Area Code\", \"Phone\", \"Int'l Plan\", \"VMail Plan\", \"VMail Message\", \"Day Calls\", \"Eve Calls\", \"Night Calls\", \"Intl Calls\", \"CustServ Calls\"]\n",
     "\n",
     "cat_idx = []\n",
     "for idx, col_name in enumerate(churn.columns.tolist()):\n",
@@ -391,9 +378,7 @@
    "source": [
     "from sklearn.model_selection import train_test_split\n",
     "\n",
-    "train, val_n_test = train_test_split(\n",
-    "    churn, test_size=0.3, random_state=42, stratify=churn[\"target\"]\n",
-    ")"
+    "train, val_n_test = train_test_split(churn, test_size=0.3, random_state=42, stratify=churn[\"target\"])"
    ]
   },
   {
@@ -403,9 +388,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "val, test = train_test_split(\n",
-    "    val_n_test, test_size=0.3, random_state=42, stratify=val_n_test[\"target\"]\n",
-    ")"
+    "val, test = train_test_split(val_n_test, test_size=0.3, random_state=42, stratify=val_n_test[\"target\"])"
    ]
   },
   {
@@ -824,9 +807,7 @@
     "def query_endpoint(encoded_tabular_data, endpoint_name):\n",
     "    client = boto3.client(\"runtime.sagemaker\")\n",
     "    response = client.invoke_endpoint(\n",
-    "        EndpointName=endpoint_name,\n",
-    "        ContentType=content_type,\n",
-    "        Body=encoded_tabular_data,\n",
+    "        EndpointName=endpoint_name, ContentType=content_type, Body=encoded_tabular_data,\n",
     "    )\n",
     "    return response\n",
     "\n",
@@ -843,14 +824,16 @@
     "for i in np.arange(0, num_examples, step=batch_size):\n",
     "    query_response_batch = query_endpoint(\n",
     "        features.iloc[i : (i + batch_size), :].to_csv(header=False, index=False).encode(\"utf-8\"),\n",
-    "        endpoint_name,\n",
+    "        endpoint_name\n",
     "    )\n",
     "    predict_prob_batch = parse_response(query_response_batch)  # prediction probability per batch\n",
     "    predict_prob.append(predict_prob_batch)\n",
     "\n",
     "\n",
     "predict_prob = np.concatenate(predict_prob, axis=0)\n",
-    "predict_label = np.argmax(predict_prob, axis=1)"
+    "predict_label = np.argmax(\n",
+    "    predict_prob, axis=1\n",
+    ") "
    ]
   },
   {
@@ -910,7 +893,7 @@
     "        \"AUC\": eval_auc,\n",
     "    },\n",
     "    orient=\"index\",\n",
-    "    columns=[\"LightGBM with AMT\"],\n",
+    "    columns= [\"LightGBM with AMT\"]\n",
     ")\n",
     "\n",
     "lgb_results"
@@ -994,7 +977,9 @@
     ")\n",
     "\n",
     "# [Optional] Override default hyperparameters with custom values\n",
-    "hyperparameters[\"iterations\"] = \"500\"\n",
+    "hyperparameters[\n",
+    "    \"iterations\"\n",
+    "] = \"500\"\n",
     "\n",
     "\n",
     "hyperparameters[\"eval_metric\"] = \"AUC\"\n",
@@ -1162,7 +1147,9 @@
     "\n",
     "\n",
     "predict_prob_cat = np.concatenate(predict_prob_cat, axis=0)\n",
-    "predict_label_cat = np.argmax(predict_prob_cat, axis=1)"
+    "predict_label_cat = np.argmax(\n",
+    "    predict_prob_cat, axis=1\n",
+    ")  "
    ]
   },
   {
@@ -1213,7 +1200,7 @@
     "        \"AUC\": eval_auc_cat,\n",
     "    },\n",
     "    orient=\"index\",\n",
-    "    columns=[\"CatBoost with AMT\"],\n",
+    "    columns= [\"CatBoost with AMT\"]\n",
     ")\n",
     "\n",
     "results_lab_cat = pd.concat([lgb_results, cat_results], axis=1)\n",
@@ -1251,11 +1238,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "train_model_id, train_model_version, train_scope = (\n",
-    "    \"pytorch-tabtransformerclassification-model\",\n",
-    "    \"*\",\n",
-    "    \"training\",\n",
-    ")\n",
+    "train_model_id, train_model_version, train_scope = \"pytorch-tabtransformerclassification-model\", \"*\", \"training\"\n",
     "training_instance_type = \"ml.p3.2xlarge\"\n",
     "\n",
     "# Retrieve the docker image\n",
@@ -1300,8 +1283,12 @@
     ")\n",
     "\n",
     "# [Optional] Override default hyperparameters with custom values\n",
-    "hyperparameters[\"n_epochs\"] = 40  # The same hyperparameter is named as \"iterations\" for CatBoost\n",
-    "hyperparameters[\"patience\"] = 10\n",
+    "hyperparameters[\n",
+    "    \"n_epochs\"\n",
+    "] = 40  # The same hyperparameter is named as \"iterations\" for CatBoost\n",
+    "hyperparameters[\n",
+    "    \"patience\"\n",
+    "] = 10\n",
     "\n",
     "print(hyperparameters)"
    ]
@@ -1331,13 +1318,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from sagemaker.tuner import (\n",
-    "    ContinuousParameter,\n",
-    "    IntegerParameter,\n",
-    "    HyperparameterTuner,\n",
-    "    CategoricalParameter,\n",
-    ")\n",
-    "\n",
+    "from sagemaker.tuner import ContinuousParameter, IntegerParameter, HyperparameterTuner, CategoricalParameter\n",
     "hyperparameter_ranges_tab = {\n",
     "    \"learning_rate\": ContinuousParameter(0.001, 0.01, scaling_type=\"Auto\"),\n",
     "    \"batch_size\": CategoricalParameter([64, 128, 256, 512]),\n",
@@ -1383,11 +1364,11 @@
     "\n",
     "    tuner_tab = HyperparameterTuner(\n",
     "        tabular_estimator_tab,\n",
-    "        \"f1_score\",  # Note, TabTransformer currently does not support AUC score, thus we use its default setting F1 score as an alternative evaluation metric.\n",
+    "        \"f1_score\", # Note, TabTransformer currently does not support AUC score, thus we use its default setting F1 score as an alternative evaluation metric.\n",
     "        hyperparameter_ranges_tab,\n",
     "        [{\"Name\": \"f1_score\", \"Regex\": \"metrics={'f1': (\\\\S+)}\"}],\n",
     "        max_jobs=10,\n",
-    "        max_parallel_jobs=5,  # reduce max_parallel_jobs number if the instance type is limited in your account\n",
+    "        max_parallel_jobs=5, # reduce max_parallel_jobs number if the instance type is limited in your account\n",
     "        objective_type=\"Maximize\",\n",
     "        base_tuning_job_name=training_job_name,\n",
     "    )\n",
@@ -1465,7 +1446,9 @@
     "\n",
     "\n",
     "predict_prob_tab = np.concatenate(predict_prob_tab, axis=0)\n",
-    "predict_label_tab = np.argmax(predict_prob_tab, axis=1)"
+    "predict_label_tab = np.argmax(\n",
+    "    predict_prob_tab, axis=1\n",
+    ")  "
    ]
   },
   {
@@ -1516,7 +1499,7 @@
     "        \"AUC\": eval_auc_tab,\n",
     "    },\n",
     "    orient=\"index\",\n",
-    "    columns=[\"TabTransformer with AMT\"],\n",
+    "    columns= [\"TabTransformer with AMT\"]\n",
     ")\n",
     "\n",
     "results_lab_cat_tab = pd.concat([results_lab_cat, tab_results], axis=1)\n",
@@ -1558,12 +1541,8 @@
     "\n",
     "# Currently, not all the object detection models in jumpstart support finetuning. Thus, we manually select a model\n",
     "# which supports finetuning.\n",
-    "train_model_id, train_model_version, train_scope = (\n",
-    "    \"autogluon-classification-ensemble\",\n",
-    "    \"*\",\n",
-    "    \"training\",\n",
-    ")\n",
-    "training_instance_type = \"ml.g4dn.2xlarge\"  # set a different GPU type to avoid instance insufficiency for p3 instance that is used by TabTransformer\n",
+    "train_model_id, train_model_version, train_scope = \"autogluon-classification-ensemble\", \"*\", \"training\"\n",
+    "training_instance_type = \"ml.g4dn.2xlarge\" # set a different GPU type to avoid instance insufficiency for p3 instance that is used by TabTransformer\n",
     "\n",
     "# Retrieve the docker image\n",
     "train_image_uri = image_uris.retrieve(\n",
@@ -1727,7 +1706,9 @@
     "\n",
     "\n",
     "predict_prob_ag = np.concatenate(predict_prob_ag, axis=0)\n",
-    "predict_label_ag = np.argmax(predict_prob_ag, axis=1)"
+    "predict_label_ag = np.argmax(\n",
+    "    predict_prob_ag, axis=1\n",
+    ")  "
    ]
   },
   {
@@ -1770,7 +1751,7 @@
     "        \"AUC\": eval_auc_ag,\n",
     "    },\n",
     "    orient=\"index\",\n",
-    "    columns=[\"AutoGluon-Tabular\"],\n",
+    "    columns= [\"AutoGluon-Tabular\"]\n",
     ")\n",
     "\n",
     "results_lab_cat_tab_ag = pd.concat([results_lab_cat_tab, ag_results], axis=1)\n",