From 5a723f05d099b6ad78b1c4c2dfa503c2ca1775b3 Mon Sep 17 00:00:00 2001 From: atqy <95724753+atqy@users.noreply.github.com> Date: Fri, 19 Aug 2022 10:16:34 -0700 Subject: [PATCH] fix batch_transform_pca_dbscan_movie_clusters_notebook.ipynb (#3566) * fix batch_transform_pca_dbscan_movie_clusters.ipynb * lower test sample * cleanup * lower test percentage * lower test percentage * lower test percentage Co-authored-by: EC2 Default User --- .../Dockerfile | 28 +++++++++++++++++-- ..._transform_pca_dbscan_movie_clusters.ipynb | 4 +-- .../introduction_to_batch_transform/dbscan.R | 2 +- .../introduction_to_batch_transform/plumber.R | 2 +- 4 files changed, 30 insertions(+), 6 deletions(-) diff --git a/sagemaker_batch_transform/introduction_to_batch_transform/Dockerfile b/sagemaker_batch_transform/introduction_to_batch_transform/Dockerfile index c72b3e416f..9509a739c4 100644 --- a/sagemaker_batch_transform/introduction_to_batch_transform/Dockerfile +++ b/sagemaker_batch_transform/introduction_to_batch_transform/Dockerfile @@ -6,9 +6,33 @@ RUN apt-get -y update && apt-get install -y --no-install-recommends \ wget \ r-base \ r-base-dev \ - ca-certificates + ca-certificates -RUN R -e "install.packages(c('dbscan', 'plumber'), repos='https://cloud.r-project.org')" +RUN R -e "install.packages(c('Rcpp', 'BH', 'R6', 'jsonlite', 'crayon'), repos='https://cloud.r-project.org')" + +RUN wget http://cran.r-project.org/src/contrib/Archive/stringi/stringi_1.2.4.tar.gz +RUN R CMD INSTALL stringi_1.2.4.tar.gz + +RUN wget http://cran.r-project.org/src/contrib/Archive/rlang/rlang_0.2.2.tar.gz +RUN R CMD INSTALL rlang_0.2.2.tar.gz + +RUN wget http://cran.r-project.org/src/contrib/Archive/magrittr/magrittr_1.5.tar.gz +RUN R CMD INSTALL magrittr_1.5.tar.gz + +RUN wget http://cran.r-project.org/src/contrib/Archive/later/later_0.7.5.tar.gz +RUN R CMD INSTALL later_0.7.5.tar.gz + +RUN wget http://cran.r-project.org/src/contrib/Archive/promises/promises_1.0.1.tar.gz +RUN R CMD INSTALL promises_1.0.1.tar.gz + +RUN wget http://cran.r-project.org/src/contrib/Archive/httpuv/httpuv_1.4.4.2.tar.gz +RUN R CMD INSTALL httpuv_1.4.4.2.tar.gz + +RUN wget http://cran.r-project.org/src/contrib/Archive/dbscan/dbscan_1.1-2.tar.gz +RUN R CMD INSTALL dbscan_1.1-2.tar.gz + +RUN wget http://cran.r-project.org/src/contrib/Archive/plumber/plumber_0.4.6.tar.gz +RUN R CMD INSTALL plumber_0.4.6.tar.gz COPY dbscan.R /opt/ml/dbscan.R COPY plumber.R /opt/ml/plumber.R diff --git a/sagemaker_batch_transform/introduction_to_batch_transform/batch_transform_pca_dbscan_movie_clusters.ipynb b/sagemaker_batch_transform/introduction_to_batch_transform/batch_transform_pca_dbscan_movie_clusters.ipynb index dab8931db9..420935fe05 100644 --- a/sagemaker_batch_transform/introduction_to_batch_transform/batch_transform_pca_dbscan_movie_clusters.ipynb +++ b/sagemaker_batch_transform/introduction_to_batch_transform/batch_transform_pca_dbscan_movie_clusters.ipynb @@ -261,7 +261,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now, we'll setup to split our dataset into train and test. Dimensionality reduction and clustering don't always require a holdout set to test accuracy, but it will allow us to illustrate how batch prediction might be used when new data arrives. In this case, our test dataset will be a simple 10% sample of items." + "Now, we'll setup to split our dataset into train and test. Dimensionality reduction and clustering don't always require a holdout set to test accuracy, but it will allow us to illustrate how batch prediction might be used when new data arrives. In this case, our test dataset will be a simple 0.5% sample of items." ] }, { @@ -270,7 +270,7 @@ "metadata": {}, "outputs": [], "source": [ - "test_products = products.sample(frac=0.1)\n", + "test_products = products.sample(frac=0.005)\n", "train_products = products[~(products.index.isin(test_products.index))]" ] }, diff --git a/sagemaker_batch_transform/introduction_to_batch_transform/dbscan.R b/sagemaker_batch_transform/introduction_to_batch_transform/dbscan.R index c19cf3f5fe..7dba606c3f 100644 --- a/sagemaker_batch_transform/introduction_to_batch_transform/dbscan.R +++ b/sagemaker_batch_transform/introduction_to_batch_transform/dbscan.R @@ -69,7 +69,7 @@ parse_file <- function(file) { # Second helper function for apply parse_json <- function(line) { if (validate(line)) { - return(do.call(rbind, fromJSON(line)[['projections']][[1]]))}} + return(do.call(rbind, fromJSON(line)))}} # Setup scoring function diff --git a/sagemaker_batch_transform/introduction_to_batch_transform/plumber.R b/sagemaker_batch_transform/introduction_to_batch_transform/plumber.R index 1a884f083c..6857c2e474 100644 --- a/sagemaker_batch_transform/introduction_to_batch_transform/plumber.R +++ b/sagemaker_batch_transform/introduction_to_batch_transform/plumber.R @@ -47,4 +47,4 @@ parse_file <- function(file) { # Second helper function for apply parse_json <- function(line) { if (validate(line)) { - return(do.call(rbind, fromJSON(line)[['projections']][[1]]))}} + return(do.call(rbind, fromJSON(line)))}}