From e5936857e03c08d856d6ae6149e4ee35bbf56552 Mon Sep 17 00:00:00 2001 From: Chencha Jacob Date: Tue, 9 Jul 2024 14:03:17 +0100 Subject: [PATCH 1/8] Added example for downloading a PDF from Google Cloud Storage using Python that uses similar syntax to s3 --- docs/user/streaming-data.md | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/docs/user/streaming-data.md b/docs/user/streaming-data.md index 3b044ddec..a7eb6690c 100644 --- a/docs/user/streaming-data.md +++ b/docs/user/streaming-data.md @@ -73,4 +73,19 @@ obj = s3.get_object(Body=csv_buffer.getvalue(), Bucket="my-bucket", Key="my/doc. reader = PdfReader(BytesIO(obj["Body"].read())) ``` -It works similarly for Google Cloud Storage ([example](https://stackoverflow.com/a/68403628/562769)). +To use with Google cloud storage + +```python +from io import BytesIO +from google.cloud import storage +import os + +os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "Set this up either as secret or json file downloaded from gcloud" +storage_client = storage.Client() +blob = storage_client.bucket('my-bucket').blob('mydoc.pdf') +file_stream = BytesIO() +blob.download_to_file(file_stream) +reader = PdfReader(file_stream) + +``` + From a72a0eba0bac7770c6c447fa519aa03336fb7586 Mon Sep 17 00:00:00 2001 From: Chencha Jacob Date: Tue, 9 Jul 2024 14:03:17 +0100 Subject: [PATCH 2/8] DOC: Added example for downloading a PDF from Google Cloud Storage using Python that uses similar syntax to s3 --- docs/user/streaming-data.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/docs/user/streaming-data.md b/docs/user/streaming-data.md index 3b044ddec..61021a9fe 100644 --- a/docs/user/streaming-data.md +++ b/docs/user/streaming-data.md @@ -73,4 +73,18 @@ obj = s3.get_object(Body=csv_buffer.getvalue(), Bucket="my-bucket", Key="my/doc. reader = PdfReader(BytesIO(obj["Body"].read())) ``` -It works similarly for Google Cloud Storage ([example](https://stackoverflow.com/a/68403628/562769)). +To use with Google cloud storage + +```python +from io import BytesIO +from google.cloud import storage +import os +# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] must be set for the client to work +storage_client = storage.Client() +blob = storage_client.bucket('my-bucket').blob('mydoc.pdf') +file_stream = BytesIO() +blob.download_to_file(file_stream) +reader = PdfReader(file_stream) + +``` + From b0bb8e69f8a694e5a08f94f741e8c0e1baf18a5d Mon Sep 17 00:00:00 2001 From: Chencha Jacob Date: Tue, 9 Jul 2024 14:32:38 +0100 Subject: [PATCH 3/8] DOC: double quotes for file and bucket names --- docs/user/streaming-data.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/user/streaming-data.md b/docs/user/streaming-data.md index a7eb6690c..2d5ac5b48 100644 --- a/docs/user/streaming-data.md +++ b/docs/user/streaming-data.md @@ -79,8 +79,7 @@ To use with Google cloud storage from io import BytesIO from google.cloud import storage import os - -os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "Set this up either as secret or json file downloaded from gcloud" +# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] must be set storage_client = storage.Client() blob = storage_client.bucket('my-bucket').blob('mydoc.pdf') file_stream = BytesIO() From dba9ff268e76c918dca600145caeaaa13c56e64f Mon Sep 17 00:00:00 2001 From: Chencha Jacob Date: Wed, 10 Jul 2024 12:48:12 +0100 Subject: [PATCH 4/8] DOC: added better punctuation for clarity --- docs/user/streaming-data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/streaming-data.md b/docs/user/streaming-data.md index 2d5ac5b48..77e60c537 100644 --- a/docs/user/streaming-data.md +++ b/docs/user/streaming-data.md @@ -81,7 +81,7 @@ from google.cloud import storage import os # os.environ["GOOGLE_APPLICATION_CREDENTIALS"] must be set storage_client = storage.Client() -blob = storage_client.bucket('my-bucket').blob('mydoc.pdf') +blob = storage_client.bucket("my-bucket").blob("mydoc.pdf") file_stream = BytesIO() blob.download_to_file(file_stream) reader = PdfReader(file_stream) From 4584136abd4a268a3483c785bc83c9345c2ef938 Mon Sep 17 00:00:00 2001 From: Stefan <96178532+stefan6419846@users.noreply.github.com> Date: Wed, 10 Jul 2024 14:25:28 +0200 Subject: [PATCH 5/8] improve formatting --- docs/user/streaming-data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/streaming-data.md b/docs/user/streaming-data.md index 77e60c537..5981c8e01 100644 --- a/docs/user/streaming-data.md +++ b/docs/user/streaming-data.md @@ -79,12 +79,12 @@ To use with Google cloud storage from io import BytesIO from google.cloud import storage import os + # os.environ["GOOGLE_APPLICATION_CREDENTIALS"] must be set storage_client = storage.Client() blob = storage_client.bucket("my-bucket").blob("mydoc.pdf") file_stream = BytesIO() blob.download_to_file(file_stream) reader = PdfReader(file_stream) - ``` From 3722c6d5745f481e3b21aac3ca94309a22fe2235 Mon Sep 17 00:00:00 2001 From: Stefan <96178532+stefan6419846@users.noreply.github.com> Date: Wed, 10 Jul 2024 14:25:54 +0200 Subject: [PATCH 6/8] add colon --- docs/user/streaming-data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/streaming-data.md b/docs/user/streaming-data.md index 5981c8e01..0421c173e 100644 --- a/docs/user/streaming-data.md +++ b/docs/user/streaming-data.md @@ -73,7 +73,7 @@ obj = s3.get_object(Body=csv_buffer.getvalue(), Bucket="my-bucket", Key="my/doc. reader = PdfReader(BytesIO(obj["Body"].read())) ``` -To use with Google cloud storage +To use with Google cloud storage: ```python from io import BytesIO From dc5f2ac7b97bc1c225d33bbe4515be2c0686bfa3 Mon Sep 17 00:00:00 2001 From: Stefan <96178532+stefan6419846@users.noreply.github.com> Date: Wed, 10 Jul 2024 14:26:51 +0200 Subject: [PATCH 7/8] Update docs/user/streaming-data.md --- docs/user/streaming-data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/streaming-data.md b/docs/user/streaming-data.md index 0421c173e..7de1598ed 100644 --- a/docs/user/streaming-data.md +++ b/docs/user/streaming-data.md @@ -73,7 +73,7 @@ obj = s3.get_object(Body=csv_buffer.getvalue(), Bucket="my-bucket", Key="my/doc. reader = PdfReader(BytesIO(obj["Body"].read())) ``` -To use with Google cloud storage: +To use with Google Cloud storage: ```python from io import BytesIO From 05ff6b03e4ce329374dd4ea523ebea4771b12bb2 Mon Sep 17 00:00:00 2001 From: Stefan <96178532+stefan6419846@users.noreply.github.com> Date: Wed, 10 Jul 2024 14:28:48 +0200 Subject: [PATCH 8/8] fix import order --- docs/user/streaming-data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/streaming-data.md b/docs/user/streaming-data.md index 7de1598ed..b09ab4382 100644 --- a/docs/user/streaming-data.md +++ b/docs/user/streaming-data.md @@ -77,8 +77,8 @@ To use with Google Cloud storage: ```python from io import BytesIO + from google.cloud import storage -import os # os.environ["GOOGLE_APPLICATION_CREDENTIALS"] must be set storage_client = storage.Client()