From 579b0fb6e45296b6ba9b4184d8704dc93a6e09cf Mon Sep 17 00:00:00 2001 From: Julia Kroll Date: Thu, 18 Aug 2022 10:00:00 -0500 Subject: [PATCH] Open a subset of ~10k S3 files to reduce runtime --- .../fairness_and_explainability_spark.ipynb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sagemaker_processing/fairness_and_explainability/fairness_and_explainability_spark.ipynb b/sagemaker_processing/fairness_and_explainability/fairness_and_explainability_spark.ipynb index 4a2e5a2572..1279b4b0d2 100644 --- a/sagemaker_processing/fairness_and_explainability/fairness_and_explainability_spark.ipynb +++ b/sagemaker_processing/fairness_and_explainability/fairness_and_explainability_spark.ipynb @@ -663,8 +663,10 @@ "metadata": {}, "outputs": [], "source": [ + "# For the sake of time, open a subset of the thousands of files\n", + "num_files_to_open = 100 # len(s3_files)\n", "local_shap_values = pd.DataFrame()\n", - "for file in s3_files:\n", + "for file in s3_files[num_files_to_open:]:\n", " output = sagemaker.s3.S3Downloader.read_file(file)\n", " df = pd.read_csv(StringIO(output), sep=\",\")\n", " local_shap_values = local_shap_values.append(df, ignore_index=True)"