kubeflow · k8s-ci-robot · Jun 28, 2019 · Jun 24, 2019 · Jun 24, 2019 · Jun 25, 2019
diff --git a/samples/ai-platform/Chicago Crime Pipeline.ipynb b/samples/ai-platform/Chicago Crime Pipeline.ipynb
@@ -31,7 +31,7 @@
     "%%capture\n",
     "\n",
     "# Install the SDK (Uncomment the code if the SDK is not installed before)\n",
-    "KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.21/kfp.tar.gz'\n",
+    "KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.22/kfp.tar.gz'\n",
     "!pip3 install --upgrade pip -q\n",
     "!pip3 install $KFP_PACKAGE --upgrade -q\n",
     "!pip3 install pandas --upgrade -q"
@@ -46,6 +46,7 @@
     "import json\n",
     "\n",
     "import kfp\n",
+    "import kfp.compiler as compiler\n",
     "import kfp.components as comp\n",
     "import kfp.dsl as dsl\n",
     "import kfp.gcp as gcp\n",
@@ -92,6 +93,7 @@
     "])\n",
     "EXPERIMENT_NAME = 'Chicago Crime Prediction'\n",
     "PIPELINE_NAME = 'Chicago Crime Prediction'\n",
+    "PIPELINE_FILENAME_PREFIX = 'chicago'\n",
     "PIPELINE_DESCRIPTION = ''"
    ]
   },
@@ -266,8 +268,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pipeline_filename = pipeline_func.__name__ + '.pipeline.tar.gz'\n",
-    "import kfp.compiler as compiler\n",
+    "pipeline_filename = PIPELINE_FILENAME_PREFIX + '.pipeline.tar.gz'\n",
+    "\n",
     "compiler.Compiler().compile(pipeline_func, pipeline_filename)"
    ]
   },

diff --git a/samples/ai-platform/Chicago Crime Research.ipynb b/samples/ai-platform/Chicago Crime Research.ipynb
@@ -25,7 +25,7 @@
     "%%capture\n",
     "\n",
     "%pip install --upgrade pip\n",
-    "%pip install --upgrade matplotlib\n",
+    "%pip install --upgrade seaborn\n",
     "%pip install --upgrade numpy\n",
     "%pip install --upgrade pandas\n",
     "%pip install --upgrade tensorflow"
@@ -44,15 +44,19 @@
     "import pandas as pd\n",
     "from pandas.plotting import register_matplotlib_converters\n",
     "\n",
-    "from sklearn.preprocessing import StandardScaler\n",
+    "import matplotlib.pyplot as plt\n",
+    "register_matplotlib_converters()\n",
+    "\n",
+    "import seaborn as sns\n",
+    "\n",
+    "from sklearn.preprocessing import RobustScaler\n",
     "from sklearn.metrics import mean_squared_error\n",
     "\n",
     "import tensorflow as tf\n",
     "from tensorflow import keras\n",
     "from tensorflow.keras.layers import LSTM, Dense, Dropout\n",
     "\n",
-    "import matplotlib.pyplot as plt\n",
-    "register_matplotlib_converters()"
+    "import warnings"
    ]
   },
   {
@@ -98,12 +102,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plt.figure(figsize = (15, 5))\n",
-    "plt.plot(df, label = \"Reports\")\n",
-    "plt.xlabel(\"Time\")\n",
-    "plt.ylabel(\"Daily Crime Reports\")\n",
-    "plt.title(\"Daily Crime Reports from 2001-\")\n",
-    "plt.legend()\n",
+    "plt.figure(figsize=(20, 6))\n",
+    "with warnings.catch_warnings():\n",
+    "    warnings.simplefilter(\"ignore\")\n",
+    "    sns.lineplot(data=df).set_title('Daily Crime Reports')\n",
     "plt.show()"
    ]
   },
@@ -146,7 +148,7 @@
     "\n",
     "def preprocess_data(df, window_size):\n",
     "    # Normalize inputs to improve learning process\n",
-    "    scaler = StandardScaler()\n",
+    "    scaler = RobustScaler()\n",
     "\n",
     "    # Time series: split latest data into test set\n",
     "    train = df.values[:int(TRAINING_SPLIT * len(df)), :]\n",
@@ -338,31 +340,20 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "test_predict_plot = np.empty_like(df)\n",
-    "test_predict_plot[:, :] = 0\n",
-    "test_predict_plot[len(train_predict) + (WINDOW_SIZE * 2):len(df), :] = test_predict\n",
-    "\n",
-    "test_predict_plot = pd.DataFrame(test_predict_plot)\n",
-    "test_predict_plot.index = df.index\n",
-    "test_predict_plot = test_predict_plot[test_predict_plot[0] != 0]\n",
-    "\n",
-    "# Create the plot.\n",
-    "plt.figure(figsize = (15, 5))\n",
-    "plt.plot(df, label = \"True value\")\n",
-    "plt.plot(test_predict_plot, label = \"Test set prediction\")\n",
-    "plt.xlabel(\"Date reported\")\n",
-    "plt.ylabel(\"# of crime reports\")\n",
-    "plt.title(\"Comparison true vs. predicted test\")\n",
-    "plt.legend()\n",
+    "# Create new dataframe with similar indexes and columns to store prediction array\n",
+    "df_test_predict = pd.DataFrame().reindex_like(df)\n",
+    "# Assign test predictions to end of dataframe\n",
+    "df_test_predict['count'][len(train_predict) + (WINDOW_SIZE * 2):len(df)] = np.squeeze(test_predict)\n",
+    "# Append the test predictions to the end of the existing dataframe, while renaming the column to avoid collision\n",
+    "df_combined = df.join(df_test_predict.rename(index=str, columns={'count':'predicted'}))\n",
+    "\n",
+    "# Plot the predicted vs actual counts\n",
+    "plt.figure(figsize=(20, 6))\n",
+    "with warnings.catch_warnings():\n",
+    "    warnings.simplefilter(\"ignore\")\n",
+    "    sns.lineplot(data=df_combined).set_title('Daily Crime Reports')\n",
     "plt.show()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

diff --git a/samples/ai-platform/images/chicago_crime_predictions.png b/samples/ai-platform/images/chicago_crime_predictions.png