fix batch_transform_pca_dbscan_movie_clusters_notebook.ipynb (#3566)

* fix batch_transform_pca_dbscan_movie_clusters.ipynb * lower test sample * cleanup * lower test percentage * lower test percentage * lower test percentage Co-authored-by: EC2 Default User <[email protected]>
aws · Aug 19, 2022 · 5a723f0 · 5a723f0
1 parent 92ce0e3
commit 5a723f0
Show file tree

Hide file tree

Showing 4 changed files with 30 additions and 6 deletions.
diff --git a/sagemaker_batch_transform/introduction_to_batch_transform/Dockerfile b/sagemaker_batch_transform/introduction_to_batch_transform/Dockerfile
@@ -6,9 +6,33 @@ RUN apt-get -y update && apt-get install -y --no-install-recommends \
     wget \
     r-base \
     r-base-dev \
-    ca-certificates
+    ca-certificates 
 
-RUN R -e "install.packages(c('dbscan', 'plumber'), repos='https://cloud.r-project.org')"
+RUN R -e "install.packages(c('Rcpp', 'BH', 'R6', 'jsonlite', 'crayon'), repos='https://cloud.r-project.org')"
+
+RUN wget http://cran.r-project.org/src/contrib/Archive/stringi/stringi_1.2.4.tar.gz
+RUN R CMD INSTALL stringi_1.2.4.tar.gz
+
+RUN wget http://cran.r-project.org/src/contrib/Archive/rlang/rlang_0.2.2.tar.gz
+RUN R CMD INSTALL rlang_0.2.2.tar.gz
+
+RUN wget http://cran.r-project.org/src/contrib/Archive/magrittr/magrittr_1.5.tar.gz
+RUN R CMD INSTALL magrittr_1.5.tar.gz
+
+RUN wget http://cran.r-project.org/src/contrib/Archive/later/later_0.7.5.tar.gz
+RUN R CMD INSTALL later_0.7.5.tar.gz
+
+RUN wget http://cran.r-project.org/src/contrib/Archive/promises/promises_1.0.1.tar.gz
+RUN R CMD INSTALL promises_1.0.1.tar.gz
+
+RUN wget http://cran.r-project.org/src/contrib/Archive/httpuv/httpuv_1.4.4.2.tar.gz
+RUN R CMD INSTALL httpuv_1.4.4.2.tar.gz	
+
+RUN wget http://cran.r-project.org/src/contrib/Archive/dbscan/dbscan_1.1-2.tar.gz
+RUN R CMD INSTALL dbscan_1.1-2.tar.gz
+
+RUN wget http://cran.r-project.org/src/contrib/Archive/plumber/plumber_0.4.6.tar.gz
+RUN R CMD INSTALL plumber_0.4.6.tar.gz
 
 COPY dbscan.R /opt/ml/dbscan.R
 COPY plumber.R /opt/ml/plumber.R

diff --git a/...transform/introduction_to_batch_transform/batch_transform_pca_dbscan_movie_clusters.ipynb b/...transform/introduction_to_batch_transform/batch_transform_pca_dbscan_movie_clusters.ipynb
@@ -261,7 +261,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Now, we'll setup to split our dataset into train and test.  Dimensionality reduction and clustering don't always require a holdout set to test accuracy, but it will allow us to illustrate how batch prediction might be used when new data arrives.  In this case, our test dataset will be a simple 10% sample of items."
+    "Now, we'll setup to split our dataset into train and test.  Dimensionality reduction and clustering don't always require a holdout set to test accuracy, but it will allow us to illustrate how batch prediction might be used when new data arrives.  In this case, our test dataset will be a simple 0.5% sample of items."
    ]
   },
   {
@@ -270,7 +270,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "test_products = products.sample(frac=0.1)\n",
+    "test_products = products.sample(frac=0.005)\n",
     "train_products = products[~(products.index.isin(test_products.index))]"
    ]
   },

diff --git a/sagemaker_batch_transform/introduction_to_batch_transform/dbscan.R b/sagemaker_batch_transform/introduction_to_batch_transform/dbscan.R
@@ -69,7 +69,7 @@ parse_file <- function(file) {
 # Second helper function for apply
 parse_json <- function(line) {
     if (validate(line)) {
-        return(do.call(rbind, fromJSON(line)[['projections']][[1]]))}}
+        return(do.call(rbind, fromJSON(line)))}}
 
 
 # Setup scoring function

diff --git a/sagemaker_batch_transform/introduction_to_batch_transform/plumber.R b/sagemaker_batch_transform/introduction_to_batch_transform/plumber.R
@@ -47,4 +47,4 @@ parse_file <- function(file) {
 # Second helper function for apply
 parse_json <- function(line) {
     if (validate(line)) {
-        return(do.call(rbind, fromJSON(line)[['projections']][[1]]))}}
+        return(do.call(rbind, fromJSON(line)))}}