Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pipeline sample: Updated research notebook to use Seaborn #1546

Merged
merged 3 commits into from
Jun 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions samples/ai-platform/Chicago Crime Pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"%%capture\n",
"\n",
"# Install the SDK (Uncomment the code if the SDK is not installed before)\n",
"KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.21/kfp.tar.gz'\n",
"KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.22/kfp.tar.gz'\n",
"!pip3 install --upgrade pip -q\n",
"!pip3 install $KFP_PACKAGE --upgrade -q\n",
"!pip3 install pandas --upgrade -q"
Expand All @@ -46,6 +46,7 @@
"import json\n",
"\n",
"import kfp\n",
"import kfp.compiler as compiler\n",
"import kfp.components as comp\n",
"import kfp.dsl as dsl\n",
"import kfp.gcp as gcp\n",
Expand Down Expand Up @@ -92,6 +93,7 @@
"])\n",
"EXPERIMENT_NAME = 'Chicago Crime Prediction'\n",
"PIPELINE_NAME = 'Chicago Crime Prediction'\n",
"PIPELINE_FILENAME_PREFIX = 'chicago'\n",
"PIPELINE_DESCRIPTION = ''"
]
},
Expand Down Expand Up @@ -266,8 +268,8 @@
"metadata": {},
"outputs": [],
"source": [
"pipeline_filename = pipeline_func.__name__ + '.pipeline.tar.gz'\n",
"import kfp.compiler as compiler\n",
"pipeline_filename = PIPELINE_FILENAME_PREFIX + '.pipeline.tar.gz'\n",
"\n",
"compiler.Compiler().compile(pipeline_func, pipeline_filename)"
]
},
Expand Down
59 changes: 25 additions & 34 deletions samples/ai-platform/Chicago Crime Research.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"%%capture\n",
"\n",
"%pip install --upgrade pip\n",
"%pip install --upgrade matplotlib\n",
"%pip install --upgrade seaborn\n",
"%pip install --upgrade numpy\n",
"%pip install --upgrade pandas\n",
"%pip install --upgrade tensorflow"
Expand All @@ -44,15 +44,19 @@
"import pandas as pd\n",
"from pandas.plotting import register_matplotlib_converters\n",
"\n",
"from sklearn.preprocessing import StandardScaler\n",
"import matplotlib.pyplot as plt\n",
"register_matplotlib_converters()\n",
"\n",
"import seaborn as sns\n",
"\n",
"from sklearn.preprocessing import RobustScaler\n",
"from sklearn.metrics import mean_squared_error\n",
"\n",
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"from tensorflow.keras.layers import LSTM, Dense, Dropout\n",
"\n",
"import matplotlib.pyplot as plt\n",
"register_matplotlib_converters()"
"import warnings"
]
},
{
Expand Down Expand Up @@ -98,12 +102,10 @@
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize = (15, 5))\n",
"plt.plot(df, label = \"Reports\")\n",
"plt.xlabel(\"Time\")\n",
"plt.ylabel(\"Daily Crime Reports\")\n",
"plt.title(\"Daily Crime Reports from 2001-\")\n",
"plt.legend()\n",
"plt.figure(figsize=(20, 6))\n",
"with warnings.catch_warnings():\n",
" warnings.simplefilter(\"ignore\")\n",
" sns.lineplot(data=df).set_title('Daily Crime Reports')\n",
"plt.show()"
]
},
Expand Down Expand Up @@ -146,7 +148,7 @@
"\n",
"def preprocess_data(df, window_size):\n",
" # Normalize inputs to improve learning process\n",
" scaler = StandardScaler()\n",
" scaler = RobustScaler()\n",
"\n",
" # Time series: split latest data into test set\n",
" train = df.values[:int(TRAINING_SPLIT * len(df)), :]\n",
Expand Down Expand Up @@ -338,31 +340,20 @@
"metadata": {},
"outputs": [],
"source": [
"test_predict_plot = np.empty_like(df)\n",
"test_predict_plot[:, :] = 0\n",
"test_predict_plot[len(train_predict) + (WINDOW_SIZE * 2):len(df), :] = test_predict\n",
"\n",
"test_predict_plot = pd.DataFrame(test_predict_plot)\n",
"test_predict_plot.index = df.index\n",
"test_predict_plot = test_predict_plot[test_predict_plot[0] != 0]\n",
"\n",
"# Create the plot.\n",
"plt.figure(figsize = (15, 5))\n",
"plt.plot(df, label = \"True value\")\n",
"plt.plot(test_predict_plot, label = \"Test set prediction\")\n",
"plt.xlabel(\"Date reported\")\n",
"plt.ylabel(\"# of crime reports\")\n",
"plt.title(\"Comparison true vs. predicted test\")\n",
"plt.legend()\n",
"# Create new dataframe with similar indexes and columns to store prediction array\n",
"df_test_predict = pd.DataFrame().reindex_like(df)\n",
"# Assign test predictions to end of dataframe\n",
"df_test_predict['count'][len(train_predict) + (WINDOW_SIZE * 2):len(df)] = np.squeeze(test_predict)\n",
"# Append the test predictions to the end of the existing dataframe, while renaming the column to avoid collision\n",
"df_combined = df.join(df_test_predict.rename(index=str, columns={'count':'predicted'}))\n",
"\n",
"# Plot the predicted vs actual counts\n",
"plt.figure(figsize=(20, 6))\n",
"with warnings.catch_warnings():\n",
" warnings.simplefilter(\"ignore\")\n",
" sns.lineplot(data=df_combined).set_title('Daily Crime Reports')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
Binary file modified samples/ai-platform/images/chicago_crime_predictions.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.