diff --git a/sagemaker_processing/fairness_and_explainability/fairness_and_explainability_spark.ipynb b/sagemaker_processing/fairness_and_explainability/fairness_and_explainability_spark.ipynb index 0ec153dda9..1279b4b0d2 100644 --- a/sagemaker_processing/fairness_and_explainability/fairness_and_explainability_spark.ipynb +++ b/sagemaker_processing/fairness_and_explainability/fairness_and_explainability_spark.ipynb @@ -44,7 +44,7 @@ "1. Explaining the importance of the various input features on the model's decision\n", "1. Accessing the reports through SageMaker Studio if you have an instance set up.\n", "\n", - "In doing so, the notebook will first train a [SageMaker XGBoost](https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost.html) model using training dataset, then use SageMaker Clarify to analyze a testing dataset in CSV format. SageMaker Clarify also supports analyzing dataset in [SageMaker JSONLines dense format](https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-inference.html#common-in-formats), which is illustrated in [another notebook](https://github.com/aws/amazon-sagemaker-examples/blob/master/sagemaker_processing/fairness_and_explainability/fairness_and_explainability_jsonlines_format.ipynb)." + "In doing so, the notebook will first train a [SageMaker XGBoost](https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost.html) model using training dataset, then use SageMaker Clarify to analyze a testing dataset in CSV format. SageMaker Clarify also supports analyzing dataset in [SageMaker JSON Lines dense format](https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-inference.html#common-in-formats), which is illustrated in [another notebook](https://github.com/aws/amazon-sagemaker-examples/blob/master/sagemaker_processing/fairness_and_explainability/fairness_and_explainability_jsonlines_format.ipynb)." ] }, { @@ -663,8 +663,10 @@ "metadata": {}, "outputs": [], "source": [ + "# For the sake of time, open a subset of the thousands of files\n", + "num_files_to_open = 100 # len(s3_files)\n", "local_shap_values = pd.DataFrame()\n", - "for file in s3_files:\n", + "for file in s3_files[num_files_to_open:]:\n", " output = sagemaker.s3.S3Downloader.read_file(file)\n", " df = pd.read_csv(StringIO(output), sep=\",\")\n", " local_shap_values = local_shap_values.append(df, ignore_index=True)"